snowflake-ml-python 1.3.0__py3-none-any.whl → 1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (211) hide show
  1. snowflake/ml/_internal/file_utils.py +3 -3
  2. snowflake/ml/_internal/human_readable_id/adjectives.txt +128 -0
  3. snowflake/ml/_internal/human_readable_id/animals.txt +128 -0
  4. snowflake/ml/_internal/human_readable_id/hrid_generator.py +40 -0
  5. snowflake/ml/_internal/human_readable_id/hrid_generator_base.py +135 -0
  6. snowflake/ml/_internal/telemetry.py +11 -2
  7. snowflake/ml/_internal/utils/formatting.py +1 -1
  8. snowflake/ml/feature_store/feature_store.py +15 -106
  9. snowflake/ml/fileset/sfcfs.py +4 -3
  10. snowflake/ml/fileset/stage_fs.py +18 -0
  11. snowflake/ml/model/_api.py +9 -9
  12. snowflake/ml/model/_client/model/model_version_impl.py +20 -15
  13. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +3 -9
  14. snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +3 -5
  15. snowflake/ml/model/_deploy_client/snowservice/deploy.py +7 -6
  16. snowflake/ml/model/_model_composer/model_composer.py +10 -8
  17. snowflake/ml/model/_model_composer/model_method/function_generator.py +1 -1
  18. snowflake/ml/model/_model_composer/model_method/infer_table_function.py_template +2 -1
  19. snowflake/ml/model/_model_composer/model_method/model_method.py +2 -2
  20. snowflake/ml/model/_model_composer/model_runtime/_runtime_requirements.py +1 -1
  21. snowflake/ml/model/_packager/model_handlers/_base.py +2 -2
  22. snowflake/ml/model/_packager/model_handlers/_utils.py +5 -5
  23. snowflake/ml/model/_packager/model_handlers/custom.py +7 -7
  24. snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +2 -2
  25. snowflake/ml/model/_packager/model_handlers/llm.py +1 -1
  26. snowflake/ml/model/_packager/model_handlers/mlflow.py +1 -1
  27. snowflake/ml/model/_packager/model_handlers/pytorch.py +13 -10
  28. snowflake/ml/model/_packager/model_handlers/sentence_transformers.py +214 -0
  29. snowflake/ml/model/_packager/model_handlers/sklearn.py +6 -6
  30. snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +15 -3
  31. snowflake/ml/model/_packager/model_handlers/tensorflow.py +8 -8
  32. snowflake/ml/model/_packager/model_handlers/torchscript.py +7 -7
  33. snowflake/ml/model/_packager/model_handlers/xgboost.py +8 -8
  34. snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
  35. snowflake/ml/model/_packager/model_packager.py +8 -6
  36. snowflake/ml/model/custom_model.py +3 -1
  37. snowflake/ml/model/type_hints.py +13 -0
  38. snowflake/ml/modeling/_internal/estimator_utils.py +61 -1
  39. snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +4 -43
  40. snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py +4 -4
  41. snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_handlers.py +21 -17
  42. snowflake/ml/modeling/_internal/model_specifications.py +3 -1
  43. snowflake/ml/modeling/_internal/model_trainer.py +2 -2
  44. snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +547 -1
  45. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +67 -114
  46. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +9 -9
  47. snowflake/ml/modeling/_internal/transformer_protocols.py +2 -3
  48. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +33 -61
  49. snowflake/ml/modeling/cluster/affinity_propagation.py +33 -61
  50. snowflake/ml/modeling/cluster/agglomerative_clustering.py +33 -61
  51. snowflake/ml/modeling/cluster/birch.py +33 -61
  52. snowflake/ml/modeling/cluster/bisecting_k_means.py +33 -61
  53. snowflake/ml/modeling/cluster/dbscan.py +33 -61
  54. snowflake/ml/modeling/cluster/feature_agglomeration.py +33 -61
  55. snowflake/ml/modeling/cluster/k_means.py +33 -61
  56. snowflake/ml/modeling/cluster/mean_shift.py +33 -61
  57. snowflake/ml/modeling/cluster/mini_batch_k_means.py +33 -61
  58. snowflake/ml/modeling/cluster/optics.py +33 -61
  59. snowflake/ml/modeling/cluster/spectral_biclustering.py +33 -61
  60. snowflake/ml/modeling/cluster/spectral_clustering.py +33 -61
  61. snowflake/ml/modeling/cluster/spectral_coclustering.py +33 -61
  62. snowflake/ml/modeling/compose/column_transformer.py +33 -61
  63. snowflake/ml/modeling/compose/transformed_target_regressor.py +33 -61
  64. snowflake/ml/modeling/covariance/elliptic_envelope.py +33 -61
  65. snowflake/ml/modeling/covariance/empirical_covariance.py +33 -61
  66. snowflake/ml/modeling/covariance/graphical_lasso.py +33 -61
  67. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +33 -61
  68. snowflake/ml/modeling/covariance/ledoit_wolf.py +33 -61
  69. snowflake/ml/modeling/covariance/min_cov_det.py +33 -61
  70. snowflake/ml/modeling/covariance/oas.py +33 -61
  71. snowflake/ml/modeling/covariance/shrunk_covariance.py +33 -61
  72. snowflake/ml/modeling/decomposition/dictionary_learning.py +33 -61
  73. snowflake/ml/modeling/decomposition/factor_analysis.py +33 -61
  74. snowflake/ml/modeling/decomposition/fast_ica.py +33 -61
  75. snowflake/ml/modeling/decomposition/incremental_pca.py +33 -61
  76. snowflake/ml/modeling/decomposition/kernel_pca.py +33 -61
  77. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +33 -61
  78. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +33 -61
  79. snowflake/ml/modeling/decomposition/pca.py +33 -61
  80. snowflake/ml/modeling/decomposition/sparse_pca.py +33 -61
  81. snowflake/ml/modeling/decomposition/truncated_svd.py +33 -61
  82. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +33 -61
  83. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +33 -61
  84. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +33 -61
  85. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +33 -61
  86. snowflake/ml/modeling/ensemble/bagging_classifier.py +33 -61
  87. snowflake/ml/modeling/ensemble/bagging_regressor.py +33 -61
  88. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +33 -61
  89. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +33 -61
  90. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +33 -61
  91. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +33 -61
  92. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +33 -61
  93. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +33 -61
  94. snowflake/ml/modeling/ensemble/isolation_forest.py +33 -61
  95. snowflake/ml/modeling/ensemble/random_forest_classifier.py +33 -61
  96. snowflake/ml/modeling/ensemble/random_forest_regressor.py +33 -61
  97. snowflake/ml/modeling/ensemble/stacking_regressor.py +33 -61
  98. snowflake/ml/modeling/ensemble/voting_classifier.py +33 -61
  99. snowflake/ml/modeling/ensemble/voting_regressor.py +33 -61
  100. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +33 -61
  101. snowflake/ml/modeling/feature_selection/select_fdr.py +33 -61
  102. snowflake/ml/modeling/feature_selection/select_fpr.py +33 -61
  103. snowflake/ml/modeling/feature_selection/select_fwe.py +33 -61
  104. snowflake/ml/modeling/feature_selection/select_k_best.py +33 -61
  105. snowflake/ml/modeling/feature_selection/select_percentile.py +33 -61
  106. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +33 -61
  107. snowflake/ml/modeling/feature_selection/variance_threshold.py +33 -61
  108. snowflake/ml/modeling/framework/base.py +55 -5
  109. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +33 -61
  110. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +33 -61
  111. snowflake/ml/modeling/impute/iterative_imputer.py +33 -61
  112. snowflake/ml/modeling/impute/knn_imputer.py +33 -61
  113. snowflake/ml/modeling/impute/missing_indicator.py +33 -61
  114. snowflake/ml/modeling/impute/simple_imputer.py +4 -15
  115. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +33 -61
  116. snowflake/ml/modeling/kernel_approximation/nystroem.py +33 -61
  117. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +33 -61
  118. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +33 -61
  119. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +33 -61
  120. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +33 -61
  121. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +36 -63
  122. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +36 -63
  123. snowflake/ml/modeling/linear_model/ard_regression.py +33 -61
  124. snowflake/ml/modeling/linear_model/bayesian_ridge.py +33 -61
  125. snowflake/ml/modeling/linear_model/elastic_net.py +33 -61
  126. snowflake/ml/modeling/linear_model/elastic_net_cv.py +33 -61
  127. snowflake/ml/modeling/linear_model/gamma_regressor.py +33 -61
  128. snowflake/ml/modeling/linear_model/huber_regressor.py +33 -61
  129. snowflake/ml/modeling/linear_model/lars.py +33 -61
  130. snowflake/ml/modeling/linear_model/lars_cv.py +33 -61
  131. snowflake/ml/modeling/linear_model/lasso.py +33 -61
  132. snowflake/ml/modeling/linear_model/lasso_cv.py +33 -61
  133. snowflake/ml/modeling/linear_model/lasso_lars.py +33 -61
  134. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +33 -61
  135. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +33 -61
  136. snowflake/ml/modeling/linear_model/linear_regression.py +33 -61
  137. snowflake/ml/modeling/linear_model/logistic_regression.py +33 -61
  138. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +33 -61
  139. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +33 -61
  140. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +33 -61
  141. snowflake/ml/modeling/linear_model/multi_task_lasso.py +33 -61
  142. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +33 -61
  143. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +33 -61
  144. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +33 -61
  145. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +33 -61
  146. snowflake/ml/modeling/linear_model/perceptron.py +33 -61
  147. snowflake/ml/modeling/linear_model/poisson_regressor.py +33 -61
  148. snowflake/ml/modeling/linear_model/ransac_regressor.py +33 -61
  149. snowflake/ml/modeling/linear_model/ridge.py +33 -61
  150. snowflake/ml/modeling/linear_model/ridge_classifier.py +33 -61
  151. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +33 -61
  152. snowflake/ml/modeling/linear_model/ridge_cv.py +33 -61
  153. snowflake/ml/modeling/linear_model/sgd_classifier.py +33 -61
  154. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +33 -61
  155. snowflake/ml/modeling/linear_model/sgd_regressor.py +33 -61
  156. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +33 -61
  157. snowflake/ml/modeling/linear_model/tweedie_regressor.py +33 -61
  158. snowflake/ml/modeling/manifold/isomap.py +33 -61
  159. snowflake/ml/modeling/manifold/mds.py +33 -61
  160. snowflake/ml/modeling/manifold/spectral_embedding.py +33 -61
  161. snowflake/ml/modeling/manifold/tsne.py +33 -61
  162. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +33 -61
  163. snowflake/ml/modeling/mixture/gaussian_mixture.py +33 -61
  164. snowflake/ml/modeling/model_selection/grid_search_cv.py +39 -57
  165. snowflake/ml/modeling/model_selection/randomized_search_cv.py +26 -57
  166. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +33 -61
  167. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +33 -61
  168. snowflake/ml/modeling/multiclass/output_code_classifier.py +33 -61
  169. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +33 -61
  170. snowflake/ml/modeling/naive_bayes/categorical_nb.py +33 -61
  171. snowflake/ml/modeling/naive_bayes/complement_nb.py +33 -61
  172. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +33 -61
  173. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +33 -61
  174. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +33 -61
  175. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +33 -61
  176. snowflake/ml/modeling/neighbors/kernel_density.py +33 -61
  177. snowflake/ml/modeling/neighbors/local_outlier_factor.py +33 -61
  178. snowflake/ml/modeling/neighbors/nearest_centroid.py +33 -61
  179. snowflake/ml/modeling/neighbors/nearest_neighbors.py +33 -61
  180. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +33 -61
  181. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +33 -61
  182. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +33 -61
  183. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +33 -61
  184. snowflake/ml/modeling/neural_network/mlp_classifier.py +33 -61
  185. snowflake/ml/modeling/neural_network/mlp_regressor.py +33 -61
  186. snowflake/ml/modeling/preprocessing/polynomial_features.py +33 -61
  187. snowflake/ml/modeling/semi_supervised/label_propagation.py +33 -61
  188. snowflake/ml/modeling/semi_supervised/label_spreading.py +33 -61
  189. snowflake/ml/modeling/svm/linear_svc.py +33 -61
  190. snowflake/ml/modeling/svm/linear_svr.py +33 -61
  191. snowflake/ml/modeling/svm/nu_svc.py +33 -61
  192. snowflake/ml/modeling/svm/nu_svr.py +33 -61
  193. snowflake/ml/modeling/svm/svc.py +33 -61
  194. snowflake/ml/modeling/svm/svr.py +33 -61
  195. snowflake/ml/modeling/tree/decision_tree_classifier.py +33 -61
  196. snowflake/ml/modeling/tree/decision_tree_regressor.py +33 -61
  197. snowflake/ml/modeling/tree/extra_tree_classifier.py +33 -61
  198. snowflake/ml/modeling/tree/extra_tree_regressor.py +33 -61
  199. snowflake/ml/modeling/xgboost/xgb_classifier.py +33 -61
  200. snowflake/ml/modeling/xgboost/xgb_regressor.py +33 -61
  201. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +33 -61
  202. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +33 -61
  203. snowflake/ml/registry/_manager/model_manager.py +6 -2
  204. snowflake/ml/registry/model_registry.py +100 -27
  205. snowflake/ml/registry/registry.py +6 -2
  206. snowflake/ml/version.py +1 -1
  207. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/METADATA +43 -7
  208. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/RECORD +211 -206
  209. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/LICENSE.txt +0 -0
  210. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/WHEEL +0 -0
  211. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/top_level.txt +0 -0
@@ -324,18 +324,24 @@ class SequentialFeatureSelector(BaseTransformer):
324
324
  self._get_model_signatures(dataset)
325
325
  return self
326
326
 
327
- def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
328
- if self._drop_input_cols:
329
- return []
330
- else:
331
- return list(set(dataset.columns) - set(self.output_cols))
332
-
333
327
  def _batch_inference_validate_snowpark(
334
328
  self,
335
329
  dataset: DataFrame,
336
330
  inference_method: str,
337
331
  ) -> List[str]:
338
- """Util method to run validate that batch inference can be run on a snowpark dataframe.
332
+ """Util method to run validate that batch inference can be run on a snowpark dataframe and
333
+ return the available package that exists in the snowflake anaconda channel
334
+
335
+ Args:
336
+ dataset: snowpark dataframe
337
+ inference_method: the inference method such as predict, score...
338
+
339
+ Raises:
340
+ SnowflakeMLException: If the estimator is not fitted, raise error
341
+ SnowflakeMLException: If the session is None, raise error
342
+
343
+ Returns:
344
+ A list of available package that exists in the snowflake anaconda channel
339
345
  """
340
346
  if not self._is_fitted:
341
347
  raise exceptions.SnowflakeMLException(
@@ -407,7 +413,7 @@ class SequentialFeatureSelector(BaseTransformer):
407
413
  transform_kwargs = dict(
408
414
  session = dataset._session,
409
415
  dependencies = self._deps,
410
- pass_through_cols = self._get_pass_through_columns(dataset),
416
+ drop_input_cols = self._drop_input_cols,
411
417
  expected_output_cols_type = expected_type_inferred,
412
418
  )
413
419
 
@@ -469,16 +475,16 @@ class SequentialFeatureSelector(BaseTransformer):
469
475
  # from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
470
476
  # based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
471
477
  # each row containing a list of values.
472
- expected_dtype = "ARRAY"
478
+ expected_dtype = "array"
473
479
 
474
480
  # If we were unable to assign a type to this transform in the factory, infer the type here.
475
481
  if expected_dtype == "":
476
- # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
482
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
477
483
  if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
478
- expected_dtype = "ARRAY"
479
- # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
484
+ expected_dtype = "array"
485
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
480
486
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
481
- expected_dtype = "ARRAY"
487
+ expected_dtype = "array"
482
488
  else:
483
489
  output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
484
490
  # We can only infer the output types from the input types if the following two statemetns are true:
@@ -496,7 +502,7 @@ class SequentialFeatureSelector(BaseTransformer):
496
502
  transform_kwargs = dict(
497
503
  session = dataset._session,
498
504
  dependencies = self._deps,
499
- pass_through_cols = self._get_pass_through_columns(dataset),
505
+ drop_input_cols = self._drop_input_cols,
500
506
  expected_output_cols_type = expected_dtype,
501
507
  )
502
508
 
@@ -547,7 +553,7 @@ class SequentialFeatureSelector(BaseTransformer):
547
553
  subproject=_SUBPROJECT,
548
554
  )
549
555
  output_result, fitted_estimator = model_trainer.train_fit_predict(
550
- pass_through_columns=self._get_pass_through_columns(dataset),
556
+ drop_input_cols=self._drop_input_cols,
551
557
  expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
552
558
  )
553
559
  self._sklearn_object = fitted_estimator
@@ -565,44 +571,6 @@ class SequentialFeatureSelector(BaseTransformer):
565
571
  assert self._sklearn_object is not None
566
572
  return self._sklearn_object.embedding_
567
573
 
568
-
569
- def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
570
- """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
571
- Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
572
- """
573
- output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
574
- if output_cols:
575
- output_cols = [
576
- identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
577
- for c in output_cols
578
- ]
579
- elif getattr(self._sklearn_object, "classes_", None) is None:
580
- output_cols = [output_cols_prefix]
581
- elif self._sklearn_object is not None:
582
- classes = self._sklearn_object.classes_
583
- if isinstance(classes, numpy.ndarray):
584
- output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
585
- elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
586
- # If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
587
- output_cols = []
588
- for i, cl in enumerate(classes):
589
- # For binary classification, there is only one output column for each class
590
- # ndarray as the two classes are complementary.
591
- if len(cl) == 2:
592
- output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
593
- else:
594
- output_cols.extend([
595
- f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
596
- ])
597
- else:
598
- output_cols = []
599
-
600
- # Make sure column names are valid snowflake identifiers.
601
- assert output_cols is not None # Make MyPy happy
602
- rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
603
-
604
- return rv
605
-
606
574
  @available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
607
575
  @telemetry.send_api_usage_telemetry(
608
576
  project=_PROJECT,
@@ -642,7 +610,7 @@ class SequentialFeatureSelector(BaseTransformer):
642
610
  transform_kwargs = dict(
643
611
  session=dataset._session,
644
612
  dependencies=self._deps,
645
- pass_through_cols=self._get_pass_through_columns(dataset),
613
+ drop_input_cols = self._drop_input_cols,
646
614
  expected_output_cols_type="float",
647
615
  )
648
616
 
@@ -707,7 +675,7 @@ class SequentialFeatureSelector(BaseTransformer):
707
675
  transform_kwargs = dict(
708
676
  session=dataset._session,
709
677
  dependencies=self._deps,
710
- pass_through_cols=self._get_pass_through_columns(dataset),
678
+ drop_input_cols = self._drop_input_cols,
711
679
  expected_output_cols_type="float",
712
680
  )
713
681
  elif isinstance(dataset, pd.DataFrame):
@@ -768,7 +736,7 @@ class SequentialFeatureSelector(BaseTransformer):
768
736
  transform_kwargs = dict(
769
737
  session=dataset._session,
770
738
  dependencies=self._deps,
771
- pass_through_cols=self._get_pass_through_columns(dataset),
739
+ drop_input_cols = self._drop_input_cols,
772
740
  expected_output_cols_type="float",
773
741
  )
774
742
 
@@ -833,7 +801,7 @@ class SequentialFeatureSelector(BaseTransformer):
833
801
  transform_kwargs = dict(
834
802
  session=dataset._session,
835
803
  dependencies=self._deps,
836
- pass_through_cols=self._get_pass_through_columns(dataset),
804
+ drop_input_cols = self._drop_input_cols,
837
805
  expected_output_cols_type="float",
838
806
  )
839
807
 
@@ -887,13 +855,17 @@ class SequentialFeatureSelector(BaseTransformer):
887
855
  transform_kwargs: ScoreKwargsTypedDict = dict()
888
856
 
889
857
  if isinstance(dataset, DataFrame):
858
+ self._deps = self._batch_inference_validate_snowpark(
859
+ dataset=dataset,
860
+ inference_method="score",
861
+ )
890
862
  selected_cols = self._get_active_columns()
891
863
  if len(selected_cols) > 0:
892
864
  dataset = dataset.select(selected_cols)
893
865
  assert isinstance(dataset._session, Session) # keep mypy happy
894
866
  transform_kwargs = dict(
895
867
  session=dataset._session,
896
- dependencies=["snowflake-snowpark-python"] + self._get_dependencies(),
868
+ dependencies=["snowflake-snowpark-python"] + self._deps,
897
869
  score_sproc_imports=['sklearn'],
898
870
  )
899
871
  elif isinstance(dataset, pd.DataFrame):
@@ -967,9 +939,9 @@ class SequentialFeatureSelector(BaseTransformer):
967
939
  transform_kwargs = dict(
968
940
  session = dataset._session,
969
941
  dependencies = self._deps,
970
- pass_through_cols = self._get_pass_through_columns(dataset),
971
- expected_output_cols_type = "array",
972
- n_neighbors = n_neighbors,
942
+ drop_input_cols = self._drop_input_cols,
943
+ expected_output_cols_type="array",
944
+ n_neighbors = n_neighbors,
973
945
  return_distance = return_distance
974
946
  )
975
947
  elif isinstance(dataset, pd.DataFrame):
@@ -257,18 +257,24 @@ class VarianceThreshold(BaseTransformer):
257
257
  self._get_model_signatures(dataset)
258
258
  return self
259
259
 
260
- def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
261
- if self._drop_input_cols:
262
- return []
263
- else:
264
- return list(set(dataset.columns) - set(self.output_cols))
265
-
266
260
  def _batch_inference_validate_snowpark(
267
261
  self,
268
262
  dataset: DataFrame,
269
263
  inference_method: str,
270
264
  ) -> List[str]:
271
- """Util method to run validate that batch inference can be run on a snowpark dataframe.
265
+ """Util method to run validate that batch inference can be run on a snowpark dataframe and
266
+ return the available package that exists in the snowflake anaconda channel
267
+
268
+ Args:
269
+ dataset: snowpark dataframe
270
+ inference_method: the inference method such as predict, score...
271
+
272
+ Raises:
273
+ SnowflakeMLException: If the estimator is not fitted, raise error
274
+ SnowflakeMLException: If the session is None, raise error
275
+
276
+ Returns:
277
+ A list of available package that exists in the snowflake anaconda channel
272
278
  """
273
279
  if not self._is_fitted:
274
280
  raise exceptions.SnowflakeMLException(
@@ -340,7 +346,7 @@ class VarianceThreshold(BaseTransformer):
340
346
  transform_kwargs = dict(
341
347
  session = dataset._session,
342
348
  dependencies = self._deps,
343
- pass_through_cols = self._get_pass_through_columns(dataset),
349
+ drop_input_cols = self._drop_input_cols,
344
350
  expected_output_cols_type = expected_type_inferred,
345
351
  )
346
352
 
@@ -402,16 +408,16 @@ class VarianceThreshold(BaseTransformer):
402
408
  # from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
403
409
  # based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
404
410
  # each row containing a list of values.
405
- expected_dtype = "ARRAY"
411
+ expected_dtype = "array"
406
412
 
407
413
  # If we were unable to assign a type to this transform in the factory, infer the type here.
408
414
  if expected_dtype == "":
409
- # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
415
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
410
416
  if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
411
- expected_dtype = "ARRAY"
412
- # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
417
+ expected_dtype = "array"
418
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
413
419
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
414
- expected_dtype = "ARRAY"
420
+ expected_dtype = "array"
415
421
  else:
416
422
  output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
417
423
  # We can only infer the output types from the input types if the following two statemetns are true:
@@ -429,7 +435,7 @@ class VarianceThreshold(BaseTransformer):
429
435
  transform_kwargs = dict(
430
436
  session = dataset._session,
431
437
  dependencies = self._deps,
432
- pass_through_cols = self._get_pass_through_columns(dataset),
438
+ drop_input_cols = self._drop_input_cols,
433
439
  expected_output_cols_type = expected_dtype,
434
440
  )
435
441
 
@@ -480,7 +486,7 @@ class VarianceThreshold(BaseTransformer):
480
486
  subproject=_SUBPROJECT,
481
487
  )
482
488
  output_result, fitted_estimator = model_trainer.train_fit_predict(
483
- pass_through_columns=self._get_pass_through_columns(dataset),
489
+ drop_input_cols=self._drop_input_cols,
484
490
  expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
485
491
  )
486
492
  self._sklearn_object = fitted_estimator
@@ -498,44 +504,6 @@ class VarianceThreshold(BaseTransformer):
498
504
  assert self._sklearn_object is not None
499
505
  return self._sklearn_object.embedding_
500
506
 
501
-
502
- def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
503
- """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
504
- Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
505
- """
506
- output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
507
- if output_cols:
508
- output_cols = [
509
- identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
510
- for c in output_cols
511
- ]
512
- elif getattr(self._sklearn_object, "classes_", None) is None:
513
- output_cols = [output_cols_prefix]
514
- elif self._sklearn_object is not None:
515
- classes = self._sklearn_object.classes_
516
- if isinstance(classes, numpy.ndarray):
517
- output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
518
- elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
519
- # If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
520
- output_cols = []
521
- for i, cl in enumerate(classes):
522
- # For binary classification, there is only one output column for each class
523
- # ndarray as the two classes are complementary.
524
- if len(cl) == 2:
525
- output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
526
- else:
527
- output_cols.extend([
528
- f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
529
- ])
530
- else:
531
- output_cols = []
532
-
533
- # Make sure column names are valid snowflake identifiers.
534
- assert output_cols is not None # Make MyPy happy
535
- rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
536
-
537
- return rv
538
-
539
507
  @available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
540
508
  @telemetry.send_api_usage_telemetry(
541
509
  project=_PROJECT,
@@ -575,7 +543,7 @@ class VarianceThreshold(BaseTransformer):
575
543
  transform_kwargs = dict(
576
544
  session=dataset._session,
577
545
  dependencies=self._deps,
578
- pass_through_cols=self._get_pass_through_columns(dataset),
546
+ drop_input_cols = self._drop_input_cols,
579
547
  expected_output_cols_type="float",
580
548
  )
581
549
 
@@ -640,7 +608,7 @@ class VarianceThreshold(BaseTransformer):
640
608
  transform_kwargs = dict(
641
609
  session=dataset._session,
642
610
  dependencies=self._deps,
643
- pass_through_cols=self._get_pass_through_columns(dataset),
611
+ drop_input_cols = self._drop_input_cols,
644
612
  expected_output_cols_type="float",
645
613
  )
646
614
  elif isinstance(dataset, pd.DataFrame):
@@ -701,7 +669,7 @@ class VarianceThreshold(BaseTransformer):
701
669
  transform_kwargs = dict(
702
670
  session=dataset._session,
703
671
  dependencies=self._deps,
704
- pass_through_cols=self._get_pass_through_columns(dataset),
672
+ drop_input_cols = self._drop_input_cols,
705
673
  expected_output_cols_type="float",
706
674
  )
707
675
 
@@ -766,7 +734,7 @@ class VarianceThreshold(BaseTransformer):
766
734
  transform_kwargs = dict(
767
735
  session=dataset._session,
768
736
  dependencies=self._deps,
769
- pass_through_cols=self._get_pass_through_columns(dataset),
737
+ drop_input_cols = self._drop_input_cols,
770
738
  expected_output_cols_type="float",
771
739
  )
772
740
 
@@ -820,13 +788,17 @@ class VarianceThreshold(BaseTransformer):
820
788
  transform_kwargs: ScoreKwargsTypedDict = dict()
821
789
 
822
790
  if isinstance(dataset, DataFrame):
791
+ self._deps = self._batch_inference_validate_snowpark(
792
+ dataset=dataset,
793
+ inference_method="score",
794
+ )
823
795
  selected_cols = self._get_active_columns()
824
796
  if len(selected_cols) > 0:
825
797
  dataset = dataset.select(selected_cols)
826
798
  assert isinstance(dataset._session, Session) # keep mypy happy
827
799
  transform_kwargs = dict(
828
800
  session=dataset._session,
829
- dependencies=["snowflake-snowpark-python"] + self._get_dependencies(),
801
+ dependencies=["snowflake-snowpark-python"] + self._deps,
830
802
  score_sproc_imports=['sklearn'],
831
803
  )
832
804
  elif isinstance(dataset, pd.DataFrame):
@@ -900,9 +872,9 @@ class VarianceThreshold(BaseTransformer):
900
872
  transform_kwargs = dict(
901
873
  session = dataset._session,
902
874
  dependencies = self._deps,
903
- pass_through_cols = self._get_pass_through_columns(dataset),
904
- expected_output_cols_type = "array",
905
- n_neighbors = n_neighbors,
875
+ drop_input_cols = self._drop_input_cols,
876
+ expected_output_cols_type="array",
877
+ n_neighbors = n_neighbors,
906
878
  return_distance = return_distance
907
879
  )
908
880
  elif isinstance(dataset, pd.DataFrame):
@@ -51,8 +51,8 @@ class Base:
51
51
  input_cols: Input columns.
52
52
  output_cols: Output columns.
53
53
  label_cols: Label column(s).
54
- passthrough_cols: List columns not to be used or modified by the estimator/trasformers.
55
- These columns will be passed through all the estimator/trasformer operations without any modifications.
54
+ passthrough_cols: List columns not to be used or modified by the estimator/transformers.
55
+ These columns will be passed through all the estimator/transformer operations without any modifications.
56
56
  """
57
57
  self.input_cols: List[str] = []
58
58
  self.output_cols: List[str] = []
@@ -185,7 +185,10 @@ class Base:
185
185
  error_code=error_codes.INVALID_ATTRIBUTE,
186
186
  original_exception=RuntimeError(
187
187
  modeling_error_messages.SIZE_MISMATCH.format(
188
- "input_cols", len(self.input_cols), "output_cols", len(self.output_cols)
188
+ "input_cols",
189
+ len(self.input_cols),
190
+ "output_cols",
191
+ len(self.output_cols),
189
192
  )
190
193
  ),
191
194
  )
@@ -498,7 +501,11 @@ class BaseTransformer(BaseEstimator):
498
501
  sample_weight_col: Optional[str] = None,
499
502
  ) -> None:
500
503
  """Base class for all transformers."""
501
- super().__init__(file_names=file_names, custom_states=custom_states, sample_weight_col=sample_weight_col)
504
+ super().__init__(
505
+ file_names=file_names,
506
+ custom_states=custom_states,
507
+ sample_weight_col=sample_weight_col,
508
+ )
502
509
  self._sklearn_object = None
503
510
  self._is_fitted = False
504
511
  self._drop_input_cols = drop_input_cols
@@ -577,6 +584,46 @@ class BaseTransformer(BaseEstimator):
577
584
  ),
578
585
  )
579
586
 
587
+ def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
588
+ """Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
589
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
590
+
591
+ Args:
592
+ output_cols_prefix: the prefix for output cols, such as its inference method.
593
+ output_cols: The output cols. Defaults to None. This is introduced by kneighbors methods
594
+
595
+ Returns:
596
+ inferred output column names
597
+ """
598
+ output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
599
+ if output_cols:
600
+ output_cols = [
601
+ identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)]) for c in output_cols
602
+ ]
603
+ elif getattr(self._sklearn_object, "classes_", None) is None:
604
+ output_cols = [output_cols_prefix]
605
+ elif self._sklearn_object is not None:
606
+ classes = self._sklearn_object.classes_
607
+ if isinstance(classes, np.ndarray):
608
+ output_cols = [f"{output_cols_prefix}{str(c)}" for c in classes.tolist()]
609
+ elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], np.ndarray):
610
+ # If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
611
+ output_cols = []
612
+ for i, cl in enumerate(classes):
613
+ # For binary classification, there is only one output column for each class
614
+ # ndarray as the two classes are complementary.
615
+ if len(cl) == 2:
616
+ output_cols.append(f"{output_cols_prefix}{i}_{cl[0]}")
617
+ else:
618
+ output_cols.extend([f"{output_cols_prefix}{i}_{c}" for c in cl.tolist()])
619
+ else:
620
+ output_cols = []
621
+
622
+ # Make sure column names are valid snowflake identifiers.
623
+ assert output_cols is not None # Make MyPy happy
624
+ rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
625
+ return rv
626
+
580
627
  def set_drop_input_cols(self, drop_input_cols: Optional[bool] = False) -> None:
581
628
  self._drop_input_cols = drop_input_cols
582
629
 
@@ -665,7 +712,10 @@ class BaseTransformer(BaseEstimator):
665
712
  error_code=error_codes.INVALID_ATTRIBUTE,
666
713
  original_exception=RuntimeError(
667
714
  modeling_error_messages.SIZE_MISMATCH.format(
668
- "output_cols", len(self.output_cols), "transformed array shape", shape
715
+ "output_cols",
716
+ len(self.output_cols),
717
+ "transformed array shape",
718
+ shape,
669
719
  )
670
720
  ),
671
721
  )
@@ -352,18 +352,24 @@ class GaussianProcessClassifier(BaseTransformer):
352
352
  self._get_model_signatures(dataset)
353
353
  return self
354
354
 
355
- def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
356
- if self._drop_input_cols:
357
- return []
358
- else:
359
- return list(set(dataset.columns) - set(self.output_cols))
360
-
361
355
  def _batch_inference_validate_snowpark(
362
356
  self,
363
357
  dataset: DataFrame,
364
358
  inference_method: str,
365
359
  ) -> List[str]:
366
- """Util method to run validate that batch inference can be run on a snowpark dataframe.
360
+ """Util method to run validate that batch inference can be run on a snowpark dataframe and
361
+ return the available package that exists in the snowflake anaconda channel
362
+
363
+ Args:
364
+ dataset: snowpark dataframe
365
+ inference_method: the inference method such as predict, score...
366
+
367
+ Raises:
368
+ SnowflakeMLException: If the estimator is not fitted, raise error
369
+ SnowflakeMLException: If the session is None, raise error
370
+
371
+ Returns:
372
+ A list of available package that exists in the snowflake anaconda channel
367
373
  """
368
374
  if not self._is_fitted:
369
375
  raise exceptions.SnowflakeMLException(
@@ -437,7 +443,7 @@ class GaussianProcessClassifier(BaseTransformer):
437
443
  transform_kwargs = dict(
438
444
  session = dataset._session,
439
445
  dependencies = self._deps,
440
- pass_through_cols = self._get_pass_through_columns(dataset),
446
+ drop_input_cols = self._drop_input_cols,
441
447
  expected_output_cols_type = expected_type_inferred,
442
448
  )
443
449
 
@@ -497,16 +503,16 @@ class GaussianProcessClassifier(BaseTransformer):
497
503
  # from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
498
504
  # based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
499
505
  # each row containing a list of values.
500
- expected_dtype = "ARRAY"
506
+ expected_dtype = "array"
501
507
 
502
508
  # If we were unable to assign a type to this transform in the factory, infer the type here.
503
509
  if expected_dtype == "":
504
- # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
510
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
505
511
  if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
506
- expected_dtype = "ARRAY"
507
- # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
512
+ expected_dtype = "array"
513
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
508
514
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
509
- expected_dtype = "ARRAY"
515
+ expected_dtype = "array"
510
516
  else:
511
517
  output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
512
518
  # We can only infer the output types from the input types if the following two statemetns are true:
@@ -524,7 +530,7 @@ class GaussianProcessClassifier(BaseTransformer):
524
530
  transform_kwargs = dict(
525
531
  session = dataset._session,
526
532
  dependencies = self._deps,
527
- pass_through_cols = self._get_pass_through_columns(dataset),
533
+ drop_input_cols = self._drop_input_cols,
528
534
  expected_output_cols_type = expected_dtype,
529
535
  )
530
536
 
@@ -575,7 +581,7 @@ class GaussianProcessClassifier(BaseTransformer):
575
581
  subproject=_SUBPROJECT,
576
582
  )
577
583
  output_result, fitted_estimator = model_trainer.train_fit_predict(
578
- pass_through_columns=self._get_pass_through_columns(dataset),
584
+ drop_input_cols=self._drop_input_cols,
579
585
  expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
580
586
  )
581
587
  self._sklearn_object = fitted_estimator
@@ -593,44 +599,6 @@ class GaussianProcessClassifier(BaseTransformer):
593
599
  assert self._sklearn_object is not None
594
600
  return self._sklearn_object.embedding_
595
601
 
596
-
597
- def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
598
- """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
599
- Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
600
- """
601
- output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
602
- if output_cols:
603
- output_cols = [
604
- identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
605
- for c in output_cols
606
- ]
607
- elif getattr(self._sklearn_object, "classes_", None) is None:
608
- output_cols = [output_cols_prefix]
609
- elif self._sklearn_object is not None:
610
- classes = self._sklearn_object.classes_
611
- if isinstance(classes, numpy.ndarray):
612
- output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
613
- elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
614
- # If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
615
- output_cols = []
616
- for i, cl in enumerate(classes):
617
- # For binary classification, there is only one output column for each class
618
- # ndarray as the two classes are complementary.
619
- if len(cl) == 2:
620
- output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
621
- else:
622
- output_cols.extend([
623
- f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
624
- ])
625
- else:
626
- output_cols = []
627
-
628
- # Make sure column names are valid snowflake identifiers.
629
- assert output_cols is not None # Make MyPy happy
630
- rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
631
-
632
- return rv
633
-
634
602
  @available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
635
603
  @telemetry.send_api_usage_telemetry(
636
604
  project=_PROJECT,
@@ -672,7 +640,7 @@ class GaussianProcessClassifier(BaseTransformer):
672
640
  transform_kwargs = dict(
673
641
  session=dataset._session,
674
642
  dependencies=self._deps,
675
- pass_through_cols=self._get_pass_through_columns(dataset),
643
+ drop_input_cols = self._drop_input_cols,
676
644
  expected_output_cols_type="float",
677
645
  )
678
646
 
@@ -739,7 +707,7 @@ class GaussianProcessClassifier(BaseTransformer):
739
707
  transform_kwargs = dict(
740
708
  session=dataset._session,
741
709
  dependencies=self._deps,
742
- pass_through_cols=self._get_pass_through_columns(dataset),
710
+ drop_input_cols = self._drop_input_cols,
743
711
  expected_output_cols_type="float",
744
712
  )
745
713
  elif isinstance(dataset, pd.DataFrame):
@@ -800,7 +768,7 @@ class GaussianProcessClassifier(BaseTransformer):
800
768
  transform_kwargs = dict(
801
769
  session=dataset._session,
802
770
  dependencies=self._deps,
803
- pass_through_cols=self._get_pass_through_columns(dataset),
771
+ drop_input_cols = self._drop_input_cols,
804
772
  expected_output_cols_type="float",
805
773
  )
806
774
 
@@ -865,7 +833,7 @@ class GaussianProcessClassifier(BaseTransformer):
865
833
  transform_kwargs = dict(
866
834
  session=dataset._session,
867
835
  dependencies=self._deps,
868
- pass_through_cols=self._get_pass_through_columns(dataset),
836
+ drop_input_cols = self._drop_input_cols,
869
837
  expected_output_cols_type="float",
870
838
  )
871
839
 
@@ -921,13 +889,17 @@ class GaussianProcessClassifier(BaseTransformer):
921
889
  transform_kwargs: ScoreKwargsTypedDict = dict()
922
890
 
923
891
  if isinstance(dataset, DataFrame):
892
+ self._deps = self._batch_inference_validate_snowpark(
893
+ dataset=dataset,
894
+ inference_method="score",
895
+ )
924
896
  selected_cols = self._get_active_columns()
925
897
  if len(selected_cols) > 0:
926
898
  dataset = dataset.select(selected_cols)
927
899
  assert isinstance(dataset._session, Session) # keep mypy happy
928
900
  transform_kwargs = dict(
929
901
  session=dataset._session,
930
- dependencies=["snowflake-snowpark-python"] + self._get_dependencies(),
902
+ dependencies=["snowflake-snowpark-python"] + self._deps,
931
903
  score_sproc_imports=['sklearn'],
932
904
  )
933
905
  elif isinstance(dataset, pd.DataFrame):
@@ -1001,9 +973,9 @@ class GaussianProcessClassifier(BaseTransformer):
1001
973
  transform_kwargs = dict(
1002
974
  session = dataset._session,
1003
975
  dependencies = self._deps,
1004
- pass_through_cols = self._get_pass_through_columns(dataset),
1005
- expected_output_cols_type = "array",
1006
- n_neighbors = n_neighbors,
976
+ drop_input_cols = self._drop_input_cols,
977
+ expected_output_cols_type="array",
978
+ n_neighbors = n_neighbors,
1007
979
  return_distance = return_distance
1008
980
  )
1009
981
  elif isinstance(dataset, pd.DataFrame):