snowflake-ml-python 1.3.0__py3-none-any.whl → 1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (211) hide show
  1. snowflake/ml/_internal/file_utils.py +3 -3
  2. snowflake/ml/_internal/human_readable_id/adjectives.txt +128 -0
  3. snowflake/ml/_internal/human_readable_id/animals.txt +128 -0
  4. snowflake/ml/_internal/human_readable_id/hrid_generator.py +40 -0
  5. snowflake/ml/_internal/human_readable_id/hrid_generator_base.py +135 -0
  6. snowflake/ml/_internal/telemetry.py +11 -2
  7. snowflake/ml/_internal/utils/formatting.py +1 -1
  8. snowflake/ml/feature_store/feature_store.py +15 -106
  9. snowflake/ml/fileset/sfcfs.py +4 -3
  10. snowflake/ml/fileset/stage_fs.py +18 -0
  11. snowflake/ml/model/_api.py +9 -9
  12. snowflake/ml/model/_client/model/model_version_impl.py +20 -15
  13. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +3 -9
  14. snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +3 -5
  15. snowflake/ml/model/_deploy_client/snowservice/deploy.py +7 -6
  16. snowflake/ml/model/_model_composer/model_composer.py +10 -8
  17. snowflake/ml/model/_model_composer/model_method/function_generator.py +1 -1
  18. snowflake/ml/model/_model_composer/model_method/infer_table_function.py_template +2 -1
  19. snowflake/ml/model/_model_composer/model_method/model_method.py +2 -2
  20. snowflake/ml/model/_model_composer/model_runtime/_runtime_requirements.py +1 -1
  21. snowflake/ml/model/_packager/model_handlers/_base.py +2 -2
  22. snowflake/ml/model/_packager/model_handlers/_utils.py +5 -5
  23. snowflake/ml/model/_packager/model_handlers/custom.py +7 -7
  24. snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +2 -2
  25. snowflake/ml/model/_packager/model_handlers/llm.py +1 -1
  26. snowflake/ml/model/_packager/model_handlers/mlflow.py +1 -1
  27. snowflake/ml/model/_packager/model_handlers/pytorch.py +13 -10
  28. snowflake/ml/model/_packager/model_handlers/sentence_transformers.py +214 -0
  29. snowflake/ml/model/_packager/model_handlers/sklearn.py +6 -6
  30. snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +15 -3
  31. snowflake/ml/model/_packager/model_handlers/tensorflow.py +8 -8
  32. snowflake/ml/model/_packager/model_handlers/torchscript.py +7 -7
  33. snowflake/ml/model/_packager/model_handlers/xgboost.py +8 -8
  34. snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
  35. snowflake/ml/model/_packager/model_packager.py +8 -6
  36. snowflake/ml/model/custom_model.py +3 -1
  37. snowflake/ml/model/type_hints.py +13 -0
  38. snowflake/ml/modeling/_internal/estimator_utils.py +61 -1
  39. snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +4 -43
  40. snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py +4 -4
  41. snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_handlers.py +21 -17
  42. snowflake/ml/modeling/_internal/model_specifications.py +3 -1
  43. snowflake/ml/modeling/_internal/model_trainer.py +2 -2
  44. snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +547 -1
  45. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +67 -114
  46. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +9 -9
  47. snowflake/ml/modeling/_internal/transformer_protocols.py +2 -3
  48. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +33 -61
  49. snowflake/ml/modeling/cluster/affinity_propagation.py +33 -61
  50. snowflake/ml/modeling/cluster/agglomerative_clustering.py +33 -61
  51. snowflake/ml/modeling/cluster/birch.py +33 -61
  52. snowflake/ml/modeling/cluster/bisecting_k_means.py +33 -61
  53. snowflake/ml/modeling/cluster/dbscan.py +33 -61
  54. snowflake/ml/modeling/cluster/feature_agglomeration.py +33 -61
  55. snowflake/ml/modeling/cluster/k_means.py +33 -61
  56. snowflake/ml/modeling/cluster/mean_shift.py +33 -61
  57. snowflake/ml/modeling/cluster/mini_batch_k_means.py +33 -61
  58. snowflake/ml/modeling/cluster/optics.py +33 -61
  59. snowflake/ml/modeling/cluster/spectral_biclustering.py +33 -61
  60. snowflake/ml/modeling/cluster/spectral_clustering.py +33 -61
  61. snowflake/ml/modeling/cluster/spectral_coclustering.py +33 -61
  62. snowflake/ml/modeling/compose/column_transformer.py +33 -61
  63. snowflake/ml/modeling/compose/transformed_target_regressor.py +33 -61
  64. snowflake/ml/modeling/covariance/elliptic_envelope.py +33 -61
  65. snowflake/ml/modeling/covariance/empirical_covariance.py +33 -61
  66. snowflake/ml/modeling/covariance/graphical_lasso.py +33 -61
  67. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +33 -61
  68. snowflake/ml/modeling/covariance/ledoit_wolf.py +33 -61
  69. snowflake/ml/modeling/covariance/min_cov_det.py +33 -61
  70. snowflake/ml/modeling/covariance/oas.py +33 -61
  71. snowflake/ml/modeling/covariance/shrunk_covariance.py +33 -61
  72. snowflake/ml/modeling/decomposition/dictionary_learning.py +33 -61
  73. snowflake/ml/modeling/decomposition/factor_analysis.py +33 -61
  74. snowflake/ml/modeling/decomposition/fast_ica.py +33 -61
  75. snowflake/ml/modeling/decomposition/incremental_pca.py +33 -61
  76. snowflake/ml/modeling/decomposition/kernel_pca.py +33 -61
  77. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +33 -61
  78. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +33 -61
  79. snowflake/ml/modeling/decomposition/pca.py +33 -61
  80. snowflake/ml/modeling/decomposition/sparse_pca.py +33 -61
  81. snowflake/ml/modeling/decomposition/truncated_svd.py +33 -61
  82. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +33 -61
  83. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +33 -61
  84. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +33 -61
  85. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +33 -61
  86. snowflake/ml/modeling/ensemble/bagging_classifier.py +33 -61
  87. snowflake/ml/modeling/ensemble/bagging_regressor.py +33 -61
  88. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +33 -61
  89. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +33 -61
  90. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +33 -61
  91. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +33 -61
  92. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +33 -61
  93. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +33 -61
  94. snowflake/ml/modeling/ensemble/isolation_forest.py +33 -61
  95. snowflake/ml/modeling/ensemble/random_forest_classifier.py +33 -61
  96. snowflake/ml/modeling/ensemble/random_forest_regressor.py +33 -61
  97. snowflake/ml/modeling/ensemble/stacking_regressor.py +33 -61
  98. snowflake/ml/modeling/ensemble/voting_classifier.py +33 -61
  99. snowflake/ml/modeling/ensemble/voting_regressor.py +33 -61
  100. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +33 -61
  101. snowflake/ml/modeling/feature_selection/select_fdr.py +33 -61
  102. snowflake/ml/modeling/feature_selection/select_fpr.py +33 -61
  103. snowflake/ml/modeling/feature_selection/select_fwe.py +33 -61
  104. snowflake/ml/modeling/feature_selection/select_k_best.py +33 -61
  105. snowflake/ml/modeling/feature_selection/select_percentile.py +33 -61
  106. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +33 -61
  107. snowflake/ml/modeling/feature_selection/variance_threshold.py +33 -61
  108. snowflake/ml/modeling/framework/base.py +55 -5
  109. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +33 -61
  110. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +33 -61
  111. snowflake/ml/modeling/impute/iterative_imputer.py +33 -61
  112. snowflake/ml/modeling/impute/knn_imputer.py +33 -61
  113. snowflake/ml/modeling/impute/missing_indicator.py +33 -61
  114. snowflake/ml/modeling/impute/simple_imputer.py +4 -15
  115. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +33 -61
  116. snowflake/ml/modeling/kernel_approximation/nystroem.py +33 -61
  117. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +33 -61
  118. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +33 -61
  119. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +33 -61
  120. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +33 -61
  121. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +36 -63
  122. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +36 -63
  123. snowflake/ml/modeling/linear_model/ard_regression.py +33 -61
  124. snowflake/ml/modeling/linear_model/bayesian_ridge.py +33 -61
  125. snowflake/ml/modeling/linear_model/elastic_net.py +33 -61
  126. snowflake/ml/modeling/linear_model/elastic_net_cv.py +33 -61
  127. snowflake/ml/modeling/linear_model/gamma_regressor.py +33 -61
  128. snowflake/ml/modeling/linear_model/huber_regressor.py +33 -61
  129. snowflake/ml/modeling/linear_model/lars.py +33 -61
  130. snowflake/ml/modeling/linear_model/lars_cv.py +33 -61
  131. snowflake/ml/modeling/linear_model/lasso.py +33 -61
  132. snowflake/ml/modeling/linear_model/lasso_cv.py +33 -61
  133. snowflake/ml/modeling/linear_model/lasso_lars.py +33 -61
  134. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +33 -61
  135. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +33 -61
  136. snowflake/ml/modeling/linear_model/linear_regression.py +33 -61
  137. snowflake/ml/modeling/linear_model/logistic_regression.py +33 -61
  138. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +33 -61
  139. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +33 -61
  140. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +33 -61
  141. snowflake/ml/modeling/linear_model/multi_task_lasso.py +33 -61
  142. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +33 -61
  143. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +33 -61
  144. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +33 -61
  145. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +33 -61
  146. snowflake/ml/modeling/linear_model/perceptron.py +33 -61
  147. snowflake/ml/modeling/linear_model/poisson_regressor.py +33 -61
  148. snowflake/ml/modeling/linear_model/ransac_regressor.py +33 -61
  149. snowflake/ml/modeling/linear_model/ridge.py +33 -61
  150. snowflake/ml/modeling/linear_model/ridge_classifier.py +33 -61
  151. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +33 -61
  152. snowflake/ml/modeling/linear_model/ridge_cv.py +33 -61
  153. snowflake/ml/modeling/linear_model/sgd_classifier.py +33 -61
  154. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +33 -61
  155. snowflake/ml/modeling/linear_model/sgd_regressor.py +33 -61
  156. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +33 -61
  157. snowflake/ml/modeling/linear_model/tweedie_regressor.py +33 -61
  158. snowflake/ml/modeling/manifold/isomap.py +33 -61
  159. snowflake/ml/modeling/manifold/mds.py +33 -61
  160. snowflake/ml/modeling/manifold/spectral_embedding.py +33 -61
  161. snowflake/ml/modeling/manifold/tsne.py +33 -61
  162. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +33 -61
  163. snowflake/ml/modeling/mixture/gaussian_mixture.py +33 -61
  164. snowflake/ml/modeling/model_selection/grid_search_cv.py +39 -57
  165. snowflake/ml/modeling/model_selection/randomized_search_cv.py +26 -57
  166. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +33 -61
  167. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +33 -61
  168. snowflake/ml/modeling/multiclass/output_code_classifier.py +33 -61
  169. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +33 -61
  170. snowflake/ml/modeling/naive_bayes/categorical_nb.py +33 -61
  171. snowflake/ml/modeling/naive_bayes/complement_nb.py +33 -61
  172. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +33 -61
  173. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +33 -61
  174. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +33 -61
  175. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +33 -61
  176. snowflake/ml/modeling/neighbors/kernel_density.py +33 -61
  177. snowflake/ml/modeling/neighbors/local_outlier_factor.py +33 -61
  178. snowflake/ml/modeling/neighbors/nearest_centroid.py +33 -61
  179. snowflake/ml/modeling/neighbors/nearest_neighbors.py +33 -61
  180. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +33 -61
  181. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +33 -61
  182. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +33 -61
  183. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +33 -61
  184. snowflake/ml/modeling/neural_network/mlp_classifier.py +33 -61
  185. snowflake/ml/modeling/neural_network/mlp_regressor.py +33 -61
  186. snowflake/ml/modeling/preprocessing/polynomial_features.py +33 -61
  187. snowflake/ml/modeling/semi_supervised/label_propagation.py +33 -61
  188. snowflake/ml/modeling/semi_supervised/label_spreading.py +33 -61
  189. snowflake/ml/modeling/svm/linear_svc.py +33 -61
  190. snowflake/ml/modeling/svm/linear_svr.py +33 -61
  191. snowflake/ml/modeling/svm/nu_svc.py +33 -61
  192. snowflake/ml/modeling/svm/nu_svr.py +33 -61
  193. snowflake/ml/modeling/svm/svc.py +33 -61
  194. snowflake/ml/modeling/svm/svr.py +33 -61
  195. snowflake/ml/modeling/tree/decision_tree_classifier.py +33 -61
  196. snowflake/ml/modeling/tree/decision_tree_regressor.py +33 -61
  197. snowflake/ml/modeling/tree/extra_tree_classifier.py +33 -61
  198. snowflake/ml/modeling/tree/extra_tree_regressor.py +33 -61
  199. snowflake/ml/modeling/xgboost/xgb_classifier.py +33 -61
  200. snowflake/ml/modeling/xgboost/xgb_regressor.py +33 -61
  201. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +33 -61
  202. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +33 -61
  203. snowflake/ml/registry/_manager/model_manager.py +6 -2
  204. snowflake/ml/registry/model_registry.py +100 -27
  205. snowflake/ml/registry/registry.py +6 -2
  206. snowflake/ml/version.py +1 -1
  207. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/METADATA +43 -7
  208. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/RECORD +211 -206
  209. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/LICENSE.txt +0 -0
  210. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/WHEEL +0 -0
  211. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/top_level.txt +0 -0
@@ -334,18 +334,24 @@ class KNeighborsClassifier(BaseTransformer):
334
334
  self._get_model_signatures(dataset)
335
335
  return self
336
336
 
337
- def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
338
- if self._drop_input_cols:
339
- return []
340
- else:
341
- return list(set(dataset.columns) - set(self.output_cols))
342
-
343
337
  def _batch_inference_validate_snowpark(
344
338
  self,
345
339
  dataset: DataFrame,
346
340
  inference_method: str,
347
341
  ) -> List[str]:
348
- """Util method to run validate that batch inference can be run on a snowpark dataframe.
342
+ """Util method to run validate that batch inference can be run on a snowpark dataframe and
343
+ return the available package that exists in the snowflake anaconda channel
344
+
345
+ Args:
346
+ dataset: snowpark dataframe
347
+ inference_method: the inference method such as predict, score...
348
+
349
+ Raises:
350
+ SnowflakeMLException: If the estimator is not fitted, raise error
351
+ SnowflakeMLException: If the session is None, raise error
352
+
353
+ Returns:
354
+ A list of available package that exists in the snowflake anaconda channel
349
355
  """
350
356
  if not self._is_fitted:
351
357
  raise exceptions.SnowflakeMLException(
@@ -419,7 +425,7 @@ class KNeighborsClassifier(BaseTransformer):
419
425
  transform_kwargs = dict(
420
426
  session = dataset._session,
421
427
  dependencies = self._deps,
422
- pass_through_cols = self._get_pass_through_columns(dataset),
428
+ drop_input_cols = self._drop_input_cols,
423
429
  expected_output_cols_type = expected_type_inferred,
424
430
  )
425
431
 
@@ -479,16 +485,16 @@ class KNeighborsClassifier(BaseTransformer):
479
485
  # from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
480
486
  # based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
481
487
  # each row containing a list of values.
482
- expected_dtype = "ARRAY"
488
+ expected_dtype = "array"
483
489
 
484
490
  # If we were unable to assign a type to this transform in the factory, infer the type here.
485
491
  if expected_dtype == "":
486
- # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
492
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
487
493
  if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
488
- expected_dtype = "ARRAY"
489
- # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
494
+ expected_dtype = "array"
495
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
490
496
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
491
- expected_dtype = "ARRAY"
497
+ expected_dtype = "array"
492
498
  else:
493
499
  output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
494
500
  # We can only infer the output types from the input types if the following two statemetns are true:
@@ -506,7 +512,7 @@ class KNeighborsClassifier(BaseTransformer):
506
512
  transform_kwargs = dict(
507
513
  session = dataset._session,
508
514
  dependencies = self._deps,
509
- pass_through_cols = self._get_pass_through_columns(dataset),
515
+ drop_input_cols = self._drop_input_cols,
510
516
  expected_output_cols_type = expected_dtype,
511
517
  )
512
518
 
@@ -557,7 +563,7 @@ class KNeighborsClassifier(BaseTransformer):
557
563
  subproject=_SUBPROJECT,
558
564
  )
559
565
  output_result, fitted_estimator = model_trainer.train_fit_predict(
560
- pass_through_columns=self._get_pass_through_columns(dataset),
566
+ drop_input_cols=self._drop_input_cols,
561
567
  expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
562
568
  )
563
569
  self._sklearn_object = fitted_estimator
@@ -575,44 +581,6 @@ class KNeighborsClassifier(BaseTransformer):
575
581
  assert self._sklearn_object is not None
576
582
  return self._sklearn_object.embedding_
577
583
 
578
-
579
- def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
580
- """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
581
- Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
582
- """
583
- output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
584
- if output_cols:
585
- output_cols = [
586
- identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
587
- for c in output_cols
588
- ]
589
- elif getattr(self._sklearn_object, "classes_", None) is None:
590
- output_cols = [output_cols_prefix]
591
- elif self._sklearn_object is not None:
592
- classes = self._sklearn_object.classes_
593
- if isinstance(classes, numpy.ndarray):
594
- output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
595
- elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
596
- # If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
597
- output_cols = []
598
- for i, cl in enumerate(classes):
599
- # For binary classification, there is only one output column for each class
600
- # ndarray as the two classes are complementary.
601
- if len(cl) == 2:
602
- output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
603
- else:
604
- output_cols.extend([
605
- f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
606
- ])
607
- else:
608
- output_cols = []
609
-
610
- # Make sure column names are valid snowflake identifiers.
611
- assert output_cols is not None # Make MyPy happy
612
- rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
613
-
614
- return rv
615
-
616
584
  @available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
617
585
  @telemetry.send_api_usage_telemetry(
618
586
  project=_PROJECT,
@@ -654,7 +622,7 @@ class KNeighborsClassifier(BaseTransformer):
654
622
  transform_kwargs = dict(
655
623
  session=dataset._session,
656
624
  dependencies=self._deps,
657
- pass_through_cols=self._get_pass_through_columns(dataset),
625
+ drop_input_cols = self._drop_input_cols,
658
626
  expected_output_cols_type="float",
659
627
  )
660
628
 
@@ -721,7 +689,7 @@ class KNeighborsClassifier(BaseTransformer):
721
689
  transform_kwargs = dict(
722
690
  session=dataset._session,
723
691
  dependencies=self._deps,
724
- pass_through_cols=self._get_pass_through_columns(dataset),
692
+ drop_input_cols = self._drop_input_cols,
725
693
  expected_output_cols_type="float",
726
694
  )
727
695
  elif isinstance(dataset, pd.DataFrame):
@@ -782,7 +750,7 @@ class KNeighborsClassifier(BaseTransformer):
782
750
  transform_kwargs = dict(
783
751
  session=dataset._session,
784
752
  dependencies=self._deps,
785
- pass_through_cols=self._get_pass_through_columns(dataset),
753
+ drop_input_cols = self._drop_input_cols,
786
754
  expected_output_cols_type="float",
787
755
  )
788
756
 
@@ -847,7 +815,7 @@ class KNeighborsClassifier(BaseTransformer):
847
815
  transform_kwargs = dict(
848
816
  session=dataset._session,
849
817
  dependencies=self._deps,
850
- pass_through_cols=self._get_pass_through_columns(dataset),
818
+ drop_input_cols = self._drop_input_cols,
851
819
  expected_output_cols_type="float",
852
820
  )
853
821
 
@@ -903,13 +871,17 @@ class KNeighborsClassifier(BaseTransformer):
903
871
  transform_kwargs: ScoreKwargsTypedDict = dict()
904
872
 
905
873
  if isinstance(dataset, DataFrame):
874
+ self._deps = self._batch_inference_validate_snowpark(
875
+ dataset=dataset,
876
+ inference_method="score",
877
+ )
906
878
  selected_cols = self._get_active_columns()
907
879
  if len(selected_cols) > 0:
908
880
  dataset = dataset.select(selected_cols)
909
881
  assert isinstance(dataset._session, Session) # keep mypy happy
910
882
  transform_kwargs = dict(
911
883
  session=dataset._session,
912
- dependencies=["snowflake-snowpark-python"] + self._get_dependencies(),
884
+ dependencies=["snowflake-snowpark-python"] + self._deps,
913
885
  score_sproc_imports=['sklearn'],
914
886
  )
915
887
  elif isinstance(dataset, pd.DataFrame):
@@ -985,9 +957,9 @@ class KNeighborsClassifier(BaseTransformer):
985
957
  transform_kwargs = dict(
986
958
  session = dataset._session,
987
959
  dependencies = self._deps,
988
- pass_through_cols = self._get_pass_through_columns(dataset),
989
- expected_output_cols_type = "array",
990
- n_neighbors = n_neighbors,
960
+ drop_input_cols = self._drop_input_cols,
961
+ expected_output_cols_type="array",
962
+ n_neighbors = n_neighbors,
991
963
  return_distance = return_distance
992
964
  )
993
965
  elif isinstance(dataset, pd.DataFrame):
@@ -336,18 +336,24 @@ class KNeighborsRegressor(BaseTransformer):
336
336
  self._get_model_signatures(dataset)
337
337
  return self
338
338
 
339
- def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
340
- if self._drop_input_cols:
341
- return []
342
- else:
343
- return list(set(dataset.columns) - set(self.output_cols))
344
-
345
339
  def _batch_inference_validate_snowpark(
346
340
  self,
347
341
  dataset: DataFrame,
348
342
  inference_method: str,
349
343
  ) -> List[str]:
350
- """Util method to run validate that batch inference can be run on a snowpark dataframe.
344
+ """Util method to run validate that batch inference can be run on a snowpark dataframe and
345
+ return the available package that exists in the snowflake anaconda channel
346
+
347
+ Args:
348
+ dataset: snowpark dataframe
349
+ inference_method: the inference method such as predict, score...
350
+
351
+ Raises:
352
+ SnowflakeMLException: If the estimator is not fitted, raise error
353
+ SnowflakeMLException: If the session is None, raise error
354
+
355
+ Returns:
356
+ A list of available package that exists in the snowflake anaconda channel
351
357
  """
352
358
  if not self._is_fitted:
353
359
  raise exceptions.SnowflakeMLException(
@@ -421,7 +427,7 @@ class KNeighborsRegressor(BaseTransformer):
421
427
  transform_kwargs = dict(
422
428
  session = dataset._session,
423
429
  dependencies = self._deps,
424
- pass_through_cols = self._get_pass_through_columns(dataset),
430
+ drop_input_cols = self._drop_input_cols,
425
431
  expected_output_cols_type = expected_type_inferred,
426
432
  )
427
433
 
@@ -481,16 +487,16 @@ class KNeighborsRegressor(BaseTransformer):
481
487
  # from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
482
488
  # based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
483
489
  # each row containing a list of values.
484
- expected_dtype = "ARRAY"
490
+ expected_dtype = "array"
485
491
 
486
492
  # If we were unable to assign a type to this transform in the factory, infer the type here.
487
493
  if expected_dtype == "":
488
- # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
494
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
489
495
  if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
490
- expected_dtype = "ARRAY"
491
- # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
496
+ expected_dtype = "array"
497
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
492
498
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
493
- expected_dtype = "ARRAY"
499
+ expected_dtype = "array"
494
500
  else:
495
501
  output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
496
502
  # We can only infer the output types from the input types if the following two statemetns are true:
@@ -508,7 +514,7 @@ class KNeighborsRegressor(BaseTransformer):
508
514
  transform_kwargs = dict(
509
515
  session = dataset._session,
510
516
  dependencies = self._deps,
511
- pass_through_cols = self._get_pass_through_columns(dataset),
517
+ drop_input_cols = self._drop_input_cols,
512
518
  expected_output_cols_type = expected_dtype,
513
519
  )
514
520
 
@@ -559,7 +565,7 @@ class KNeighborsRegressor(BaseTransformer):
559
565
  subproject=_SUBPROJECT,
560
566
  )
561
567
  output_result, fitted_estimator = model_trainer.train_fit_predict(
562
- pass_through_columns=self._get_pass_through_columns(dataset),
568
+ drop_input_cols=self._drop_input_cols,
563
569
  expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
564
570
  )
565
571
  self._sklearn_object = fitted_estimator
@@ -577,44 +583,6 @@ class KNeighborsRegressor(BaseTransformer):
577
583
  assert self._sklearn_object is not None
578
584
  return self._sklearn_object.embedding_
579
585
 
580
-
581
- def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
582
- """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
583
- Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
584
- """
585
- output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
586
- if output_cols:
587
- output_cols = [
588
- identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
589
- for c in output_cols
590
- ]
591
- elif getattr(self._sklearn_object, "classes_", None) is None:
592
- output_cols = [output_cols_prefix]
593
- elif self._sklearn_object is not None:
594
- classes = self._sklearn_object.classes_
595
- if isinstance(classes, numpy.ndarray):
596
- output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
597
- elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
598
- # If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
599
- output_cols = []
600
- for i, cl in enumerate(classes):
601
- # For binary classification, there is only one output column for each class
602
- # ndarray as the two classes are complementary.
603
- if len(cl) == 2:
604
- output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
605
- else:
606
- output_cols.extend([
607
- f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
608
- ])
609
- else:
610
- output_cols = []
611
-
612
- # Make sure column names are valid snowflake identifiers.
613
- assert output_cols is not None # Make MyPy happy
614
- rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
615
-
616
- return rv
617
-
618
586
  @available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
619
587
  @telemetry.send_api_usage_telemetry(
620
588
  project=_PROJECT,
@@ -654,7 +622,7 @@ class KNeighborsRegressor(BaseTransformer):
654
622
  transform_kwargs = dict(
655
623
  session=dataset._session,
656
624
  dependencies=self._deps,
657
- pass_through_cols=self._get_pass_through_columns(dataset),
625
+ drop_input_cols = self._drop_input_cols,
658
626
  expected_output_cols_type="float",
659
627
  )
660
628
 
@@ -719,7 +687,7 @@ class KNeighborsRegressor(BaseTransformer):
719
687
  transform_kwargs = dict(
720
688
  session=dataset._session,
721
689
  dependencies=self._deps,
722
- pass_through_cols=self._get_pass_through_columns(dataset),
690
+ drop_input_cols = self._drop_input_cols,
723
691
  expected_output_cols_type="float",
724
692
  )
725
693
  elif isinstance(dataset, pd.DataFrame):
@@ -780,7 +748,7 @@ class KNeighborsRegressor(BaseTransformer):
780
748
  transform_kwargs = dict(
781
749
  session=dataset._session,
782
750
  dependencies=self._deps,
783
- pass_through_cols=self._get_pass_through_columns(dataset),
751
+ drop_input_cols = self._drop_input_cols,
784
752
  expected_output_cols_type="float",
785
753
  )
786
754
 
@@ -845,7 +813,7 @@ class KNeighborsRegressor(BaseTransformer):
845
813
  transform_kwargs = dict(
846
814
  session=dataset._session,
847
815
  dependencies=self._deps,
848
- pass_through_cols=self._get_pass_through_columns(dataset),
816
+ drop_input_cols = self._drop_input_cols,
849
817
  expected_output_cols_type="float",
850
818
  )
851
819
 
@@ -901,13 +869,17 @@ class KNeighborsRegressor(BaseTransformer):
901
869
  transform_kwargs: ScoreKwargsTypedDict = dict()
902
870
 
903
871
  if isinstance(dataset, DataFrame):
872
+ self._deps = self._batch_inference_validate_snowpark(
873
+ dataset=dataset,
874
+ inference_method="score",
875
+ )
904
876
  selected_cols = self._get_active_columns()
905
877
  if len(selected_cols) > 0:
906
878
  dataset = dataset.select(selected_cols)
907
879
  assert isinstance(dataset._session, Session) # keep mypy happy
908
880
  transform_kwargs = dict(
909
881
  session=dataset._session,
910
- dependencies=["snowflake-snowpark-python"] + self._get_dependencies(),
882
+ dependencies=["snowflake-snowpark-python"] + self._deps,
911
883
  score_sproc_imports=['sklearn'],
912
884
  )
913
885
  elif isinstance(dataset, pd.DataFrame):
@@ -983,9 +955,9 @@ class KNeighborsRegressor(BaseTransformer):
983
955
  transform_kwargs = dict(
984
956
  session = dataset._session,
985
957
  dependencies = self._deps,
986
- pass_through_cols = self._get_pass_through_columns(dataset),
987
- expected_output_cols_type = "array",
988
- n_neighbors = n_neighbors,
958
+ drop_input_cols = self._drop_input_cols,
959
+ expected_output_cols_type="array",
960
+ n_neighbors = n_neighbors,
989
961
  return_distance = return_distance
990
962
  )
991
963
  elif isinstance(dataset, pd.DataFrame):
@@ -313,18 +313,24 @@ class KernelDensity(BaseTransformer):
313
313
  self._get_model_signatures(dataset)
314
314
  return self
315
315
 
316
- def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
317
- if self._drop_input_cols:
318
- return []
319
- else:
320
- return list(set(dataset.columns) - set(self.output_cols))
321
-
322
316
  def _batch_inference_validate_snowpark(
323
317
  self,
324
318
  dataset: DataFrame,
325
319
  inference_method: str,
326
320
  ) -> List[str]:
327
- """Util method to run validate that batch inference can be run on a snowpark dataframe.
321
+ """Util method to run validate that batch inference can be run on a snowpark dataframe and
322
+ return the available package that exists in the snowflake anaconda channel
323
+
324
+ Args:
325
+ dataset: snowpark dataframe
326
+ inference_method: the inference method such as predict, score...
327
+
328
+ Raises:
329
+ SnowflakeMLException: If the estimator is not fitted, raise error
330
+ SnowflakeMLException: If the session is None, raise error
331
+
332
+ Returns:
333
+ A list of available package that exists in the snowflake anaconda channel
328
334
  """
329
335
  if not self._is_fitted:
330
336
  raise exceptions.SnowflakeMLException(
@@ -396,7 +402,7 @@ class KernelDensity(BaseTransformer):
396
402
  transform_kwargs = dict(
397
403
  session = dataset._session,
398
404
  dependencies = self._deps,
399
- pass_through_cols = self._get_pass_through_columns(dataset),
405
+ drop_input_cols = self._drop_input_cols,
400
406
  expected_output_cols_type = expected_type_inferred,
401
407
  )
402
408
 
@@ -456,16 +462,16 @@ class KernelDensity(BaseTransformer):
456
462
  # from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
457
463
  # based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
458
464
  # each row containing a list of values.
459
- expected_dtype = "ARRAY"
465
+ expected_dtype = "array"
460
466
 
461
467
  # If we were unable to assign a type to this transform in the factory, infer the type here.
462
468
  if expected_dtype == "":
463
- # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
469
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
464
470
  if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
465
- expected_dtype = "ARRAY"
466
- # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
471
+ expected_dtype = "array"
472
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
467
473
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
468
- expected_dtype = "ARRAY"
474
+ expected_dtype = "array"
469
475
  else:
470
476
  output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
471
477
  # We can only infer the output types from the input types if the following two statemetns are true:
@@ -483,7 +489,7 @@ class KernelDensity(BaseTransformer):
483
489
  transform_kwargs = dict(
484
490
  session = dataset._session,
485
491
  dependencies = self._deps,
486
- pass_through_cols = self._get_pass_through_columns(dataset),
492
+ drop_input_cols = self._drop_input_cols,
487
493
  expected_output_cols_type = expected_dtype,
488
494
  )
489
495
 
@@ -534,7 +540,7 @@ class KernelDensity(BaseTransformer):
534
540
  subproject=_SUBPROJECT,
535
541
  )
536
542
  output_result, fitted_estimator = model_trainer.train_fit_predict(
537
- pass_through_columns=self._get_pass_through_columns(dataset),
543
+ drop_input_cols=self._drop_input_cols,
538
544
  expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
539
545
  )
540
546
  self._sklearn_object = fitted_estimator
@@ -552,44 +558,6 @@ class KernelDensity(BaseTransformer):
552
558
  assert self._sklearn_object is not None
553
559
  return self._sklearn_object.embedding_
554
560
 
555
-
556
- def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
557
- """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
558
- Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
559
- """
560
- output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
561
- if output_cols:
562
- output_cols = [
563
- identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
564
- for c in output_cols
565
- ]
566
- elif getattr(self._sklearn_object, "classes_", None) is None:
567
- output_cols = [output_cols_prefix]
568
- elif self._sklearn_object is not None:
569
- classes = self._sklearn_object.classes_
570
- if isinstance(classes, numpy.ndarray):
571
- output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
572
- elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
573
- # If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
574
- output_cols = []
575
- for i, cl in enumerate(classes):
576
- # For binary classification, there is only one output column for each class
577
- # ndarray as the two classes are complementary.
578
- if len(cl) == 2:
579
- output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
580
- else:
581
- output_cols.extend([
582
- f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
583
- ])
584
- else:
585
- output_cols = []
586
-
587
- # Make sure column names are valid snowflake identifiers.
588
- assert output_cols is not None # Make MyPy happy
589
- rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
590
-
591
- return rv
592
-
593
561
  @available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
594
562
  @telemetry.send_api_usage_telemetry(
595
563
  project=_PROJECT,
@@ -629,7 +597,7 @@ class KernelDensity(BaseTransformer):
629
597
  transform_kwargs = dict(
630
598
  session=dataset._session,
631
599
  dependencies=self._deps,
632
- pass_through_cols=self._get_pass_through_columns(dataset),
600
+ drop_input_cols = self._drop_input_cols,
633
601
  expected_output_cols_type="float",
634
602
  )
635
603
 
@@ -694,7 +662,7 @@ class KernelDensity(BaseTransformer):
694
662
  transform_kwargs = dict(
695
663
  session=dataset._session,
696
664
  dependencies=self._deps,
697
- pass_through_cols=self._get_pass_through_columns(dataset),
665
+ drop_input_cols = self._drop_input_cols,
698
666
  expected_output_cols_type="float",
699
667
  )
700
668
  elif isinstance(dataset, pd.DataFrame):
@@ -755,7 +723,7 @@ class KernelDensity(BaseTransformer):
755
723
  transform_kwargs = dict(
756
724
  session=dataset._session,
757
725
  dependencies=self._deps,
758
- pass_through_cols=self._get_pass_through_columns(dataset),
726
+ drop_input_cols = self._drop_input_cols,
759
727
  expected_output_cols_type="float",
760
728
  )
761
729
 
@@ -822,7 +790,7 @@ class KernelDensity(BaseTransformer):
822
790
  transform_kwargs = dict(
823
791
  session=dataset._session,
824
792
  dependencies=self._deps,
825
- pass_through_cols=self._get_pass_through_columns(dataset),
793
+ drop_input_cols = self._drop_input_cols,
826
794
  expected_output_cols_type="float",
827
795
  )
828
796
 
@@ -878,13 +846,17 @@ class KernelDensity(BaseTransformer):
878
846
  transform_kwargs: ScoreKwargsTypedDict = dict()
879
847
 
880
848
  if isinstance(dataset, DataFrame):
849
+ self._deps = self._batch_inference_validate_snowpark(
850
+ dataset=dataset,
851
+ inference_method="score",
852
+ )
881
853
  selected_cols = self._get_active_columns()
882
854
  if len(selected_cols) > 0:
883
855
  dataset = dataset.select(selected_cols)
884
856
  assert isinstance(dataset._session, Session) # keep mypy happy
885
857
  transform_kwargs = dict(
886
858
  session=dataset._session,
887
- dependencies=["snowflake-snowpark-python"] + self._get_dependencies(),
859
+ dependencies=["snowflake-snowpark-python"] + self._deps,
888
860
  score_sproc_imports=['sklearn'],
889
861
  )
890
862
  elif isinstance(dataset, pd.DataFrame):
@@ -958,9 +930,9 @@ class KernelDensity(BaseTransformer):
958
930
  transform_kwargs = dict(
959
931
  session = dataset._session,
960
932
  dependencies = self._deps,
961
- pass_through_cols = self._get_pass_through_columns(dataset),
962
- expected_output_cols_type = "array",
963
- n_neighbors = n_neighbors,
933
+ drop_input_cols = self._drop_input_cols,
934
+ expected_output_cols_type="array",
935
+ n_neighbors = n_neighbors,
964
936
  return_distance = return_distance
965
937
  )
966
938
  elif isinstance(dataset, pd.DataFrame):