snowflake-ml-python 1.3.0__py3-none-any.whl → 1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (211) hide show
  1. snowflake/ml/_internal/file_utils.py +3 -3
  2. snowflake/ml/_internal/human_readable_id/adjectives.txt +128 -0
  3. snowflake/ml/_internal/human_readable_id/animals.txt +128 -0
  4. snowflake/ml/_internal/human_readable_id/hrid_generator.py +40 -0
  5. snowflake/ml/_internal/human_readable_id/hrid_generator_base.py +135 -0
  6. snowflake/ml/_internal/telemetry.py +11 -2
  7. snowflake/ml/_internal/utils/formatting.py +1 -1
  8. snowflake/ml/feature_store/feature_store.py +15 -106
  9. snowflake/ml/fileset/sfcfs.py +4 -3
  10. snowflake/ml/fileset/stage_fs.py +18 -0
  11. snowflake/ml/model/_api.py +9 -9
  12. snowflake/ml/model/_client/model/model_version_impl.py +20 -15
  13. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +3 -9
  14. snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +3 -5
  15. snowflake/ml/model/_deploy_client/snowservice/deploy.py +7 -6
  16. snowflake/ml/model/_model_composer/model_composer.py +10 -8
  17. snowflake/ml/model/_model_composer/model_method/function_generator.py +1 -1
  18. snowflake/ml/model/_model_composer/model_method/infer_table_function.py_template +2 -1
  19. snowflake/ml/model/_model_composer/model_method/model_method.py +2 -2
  20. snowflake/ml/model/_model_composer/model_runtime/_runtime_requirements.py +1 -1
  21. snowflake/ml/model/_packager/model_handlers/_base.py +2 -2
  22. snowflake/ml/model/_packager/model_handlers/_utils.py +5 -5
  23. snowflake/ml/model/_packager/model_handlers/custom.py +7 -7
  24. snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +2 -2
  25. snowflake/ml/model/_packager/model_handlers/llm.py +1 -1
  26. snowflake/ml/model/_packager/model_handlers/mlflow.py +1 -1
  27. snowflake/ml/model/_packager/model_handlers/pytorch.py +13 -10
  28. snowflake/ml/model/_packager/model_handlers/sentence_transformers.py +214 -0
  29. snowflake/ml/model/_packager/model_handlers/sklearn.py +6 -6
  30. snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +15 -3
  31. snowflake/ml/model/_packager/model_handlers/tensorflow.py +8 -8
  32. snowflake/ml/model/_packager/model_handlers/torchscript.py +7 -7
  33. snowflake/ml/model/_packager/model_handlers/xgboost.py +8 -8
  34. snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
  35. snowflake/ml/model/_packager/model_packager.py +8 -6
  36. snowflake/ml/model/custom_model.py +3 -1
  37. snowflake/ml/model/type_hints.py +13 -0
  38. snowflake/ml/modeling/_internal/estimator_utils.py +61 -1
  39. snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +4 -43
  40. snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py +4 -4
  41. snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_handlers.py +21 -17
  42. snowflake/ml/modeling/_internal/model_specifications.py +3 -1
  43. snowflake/ml/modeling/_internal/model_trainer.py +2 -2
  44. snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +547 -1
  45. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +67 -114
  46. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +9 -9
  47. snowflake/ml/modeling/_internal/transformer_protocols.py +2 -3
  48. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +33 -61
  49. snowflake/ml/modeling/cluster/affinity_propagation.py +33 -61
  50. snowflake/ml/modeling/cluster/agglomerative_clustering.py +33 -61
  51. snowflake/ml/modeling/cluster/birch.py +33 -61
  52. snowflake/ml/modeling/cluster/bisecting_k_means.py +33 -61
  53. snowflake/ml/modeling/cluster/dbscan.py +33 -61
  54. snowflake/ml/modeling/cluster/feature_agglomeration.py +33 -61
  55. snowflake/ml/modeling/cluster/k_means.py +33 -61
  56. snowflake/ml/modeling/cluster/mean_shift.py +33 -61
  57. snowflake/ml/modeling/cluster/mini_batch_k_means.py +33 -61
  58. snowflake/ml/modeling/cluster/optics.py +33 -61
  59. snowflake/ml/modeling/cluster/spectral_biclustering.py +33 -61
  60. snowflake/ml/modeling/cluster/spectral_clustering.py +33 -61
  61. snowflake/ml/modeling/cluster/spectral_coclustering.py +33 -61
  62. snowflake/ml/modeling/compose/column_transformer.py +33 -61
  63. snowflake/ml/modeling/compose/transformed_target_regressor.py +33 -61
  64. snowflake/ml/modeling/covariance/elliptic_envelope.py +33 -61
  65. snowflake/ml/modeling/covariance/empirical_covariance.py +33 -61
  66. snowflake/ml/modeling/covariance/graphical_lasso.py +33 -61
  67. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +33 -61
  68. snowflake/ml/modeling/covariance/ledoit_wolf.py +33 -61
  69. snowflake/ml/modeling/covariance/min_cov_det.py +33 -61
  70. snowflake/ml/modeling/covariance/oas.py +33 -61
  71. snowflake/ml/modeling/covariance/shrunk_covariance.py +33 -61
  72. snowflake/ml/modeling/decomposition/dictionary_learning.py +33 -61
  73. snowflake/ml/modeling/decomposition/factor_analysis.py +33 -61
  74. snowflake/ml/modeling/decomposition/fast_ica.py +33 -61
  75. snowflake/ml/modeling/decomposition/incremental_pca.py +33 -61
  76. snowflake/ml/modeling/decomposition/kernel_pca.py +33 -61
  77. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +33 -61
  78. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +33 -61
  79. snowflake/ml/modeling/decomposition/pca.py +33 -61
  80. snowflake/ml/modeling/decomposition/sparse_pca.py +33 -61
  81. snowflake/ml/modeling/decomposition/truncated_svd.py +33 -61
  82. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +33 -61
  83. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +33 -61
  84. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +33 -61
  85. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +33 -61
  86. snowflake/ml/modeling/ensemble/bagging_classifier.py +33 -61
  87. snowflake/ml/modeling/ensemble/bagging_regressor.py +33 -61
  88. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +33 -61
  89. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +33 -61
  90. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +33 -61
  91. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +33 -61
  92. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +33 -61
  93. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +33 -61
  94. snowflake/ml/modeling/ensemble/isolation_forest.py +33 -61
  95. snowflake/ml/modeling/ensemble/random_forest_classifier.py +33 -61
  96. snowflake/ml/modeling/ensemble/random_forest_regressor.py +33 -61
  97. snowflake/ml/modeling/ensemble/stacking_regressor.py +33 -61
  98. snowflake/ml/modeling/ensemble/voting_classifier.py +33 -61
  99. snowflake/ml/modeling/ensemble/voting_regressor.py +33 -61
  100. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +33 -61
  101. snowflake/ml/modeling/feature_selection/select_fdr.py +33 -61
  102. snowflake/ml/modeling/feature_selection/select_fpr.py +33 -61
  103. snowflake/ml/modeling/feature_selection/select_fwe.py +33 -61
  104. snowflake/ml/modeling/feature_selection/select_k_best.py +33 -61
  105. snowflake/ml/modeling/feature_selection/select_percentile.py +33 -61
  106. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +33 -61
  107. snowflake/ml/modeling/feature_selection/variance_threshold.py +33 -61
  108. snowflake/ml/modeling/framework/base.py +55 -5
  109. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +33 -61
  110. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +33 -61
  111. snowflake/ml/modeling/impute/iterative_imputer.py +33 -61
  112. snowflake/ml/modeling/impute/knn_imputer.py +33 -61
  113. snowflake/ml/modeling/impute/missing_indicator.py +33 -61
  114. snowflake/ml/modeling/impute/simple_imputer.py +4 -15
  115. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +33 -61
  116. snowflake/ml/modeling/kernel_approximation/nystroem.py +33 -61
  117. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +33 -61
  118. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +33 -61
  119. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +33 -61
  120. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +33 -61
  121. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +36 -63
  122. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +36 -63
  123. snowflake/ml/modeling/linear_model/ard_regression.py +33 -61
  124. snowflake/ml/modeling/linear_model/bayesian_ridge.py +33 -61
  125. snowflake/ml/modeling/linear_model/elastic_net.py +33 -61
  126. snowflake/ml/modeling/linear_model/elastic_net_cv.py +33 -61
  127. snowflake/ml/modeling/linear_model/gamma_regressor.py +33 -61
  128. snowflake/ml/modeling/linear_model/huber_regressor.py +33 -61
  129. snowflake/ml/modeling/linear_model/lars.py +33 -61
  130. snowflake/ml/modeling/linear_model/lars_cv.py +33 -61
  131. snowflake/ml/modeling/linear_model/lasso.py +33 -61
  132. snowflake/ml/modeling/linear_model/lasso_cv.py +33 -61
  133. snowflake/ml/modeling/linear_model/lasso_lars.py +33 -61
  134. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +33 -61
  135. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +33 -61
  136. snowflake/ml/modeling/linear_model/linear_regression.py +33 -61
  137. snowflake/ml/modeling/linear_model/logistic_regression.py +33 -61
  138. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +33 -61
  139. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +33 -61
  140. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +33 -61
  141. snowflake/ml/modeling/linear_model/multi_task_lasso.py +33 -61
  142. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +33 -61
  143. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +33 -61
  144. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +33 -61
  145. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +33 -61
  146. snowflake/ml/modeling/linear_model/perceptron.py +33 -61
  147. snowflake/ml/modeling/linear_model/poisson_regressor.py +33 -61
  148. snowflake/ml/modeling/linear_model/ransac_regressor.py +33 -61
  149. snowflake/ml/modeling/linear_model/ridge.py +33 -61
  150. snowflake/ml/modeling/linear_model/ridge_classifier.py +33 -61
  151. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +33 -61
  152. snowflake/ml/modeling/linear_model/ridge_cv.py +33 -61
  153. snowflake/ml/modeling/linear_model/sgd_classifier.py +33 -61
  154. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +33 -61
  155. snowflake/ml/modeling/linear_model/sgd_regressor.py +33 -61
  156. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +33 -61
  157. snowflake/ml/modeling/linear_model/tweedie_regressor.py +33 -61
  158. snowflake/ml/modeling/manifold/isomap.py +33 -61
  159. snowflake/ml/modeling/manifold/mds.py +33 -61
  160. snowflake/ml/modeling/manifold/spectral_embedding.py +33 -61
  161. snowflake/ml/modeling/manifold/tsne.py +33 -61
  162. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +33 -61
  163. snowflake/ml/modeling/mixture/gaussian_mixture.py +33 -61
  164. snowflake/ml/modeling/model_selection/grid_search_cv.py +39 -57
  165. snowflake/ml/modeling/model_selection/randomized_search_cv.py +26 -57
  166. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +33 -61
  167. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +33 -61
  168. snowflake/ml/modeling/multiclass/output_code_classifier.py +33 -61
  169. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +33 -61
  170. snowflake/ml/modeling/naive_bayes/categorical_nb.py +33 -61
  171. snowflake/ml/modeling/naive_bayes/complement_nb.py +33 -61
  172. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +33 -61
  173. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +33 -61
  174. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +33 -61
  175. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +33 -61
  176. snowflake/ml/modeling/neighbors/kernel_density.py +33 -61
  177. snowflake/ml/modeling/neighbors/local_outlier_factor.py +33 -61
  178. snowflake/ml/modeling/neighbors/nearest_centroid.py +33 -61
  179. snowflake/ml/modeling/neighbors/nearest_neighbors.py +33 -61
  180. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +33 -61
  181. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +33 -61
  182. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +33 -61
  183. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +33 -61
  184. snowflake/ml/modeling/neural_network/mlp_classifier.py +33 -61
  185. snowflake/ml/modeling/neural_network/mlp_regressor.py +33 -61
  186. snowflake/ml/modeling/preprocessing/polynomial_features.py +33 -61
  187. snowflake/ml/modeling/semi_supervised/label_propagation.py +33 -61
  188. snowflake/ml/modeling/semi_supervised/label_spreading.py +33 -61
  189. snowflake/ml/modeling/svm/linear_svc.py +33 -61
  190. snowflake/ml/modeling/svm/linear_svr.py +33 -61
  191. snowflake/ml/modeling/svm/nu_svc.py +33 -61
  192. snowflake/ml/modeling/svm/nu_svr.py +33 -61
  193. snowflake/ml/modeling/svm/svc.py +33 -61
  194. snowflake/ml/modeling/svm/svr.py +33 -61
  195. snowflake/ml/modeling/tree/decision_tree_classifier.py +33 -61
  196. snowflake/ml/modeling/tree/decision_tree_regressor.py +33 -61
  197. snowflake/ml/modeling/tree/extra_tree_classifier.py +33 -61
  198. snowflake/ml/modeling/tree/extra_tree_regressor.py +33 -61
  199. snowflake/ml/modeling/xgboost/xgb_classifier.py +33 -61
  200. snowflake/ml/modeling/xgboost/xgb_regressor.py +33 -61
  201. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +33 -61
  202. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +33 -61
  203. snowflake/ml/registry/_manager/model_manager.py +6 -2
  204. snowflake/ml/registry/model_registry.py +100 -27
  205. snowflake/ml/registry/registry.py +6 -2
  206. snowflake/ml/version.py +1 -1
  207. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/METADATA +43 -7
  208. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/RECORD +211 -206
  209. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/LICENSE.txt +0 -0
  210. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/WHEEL +0 -0
  211. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/top_level.txt +0 -0
@@ -345,18 +345,24 @@ class NeighborhoodComponentsAnalysis(BaseTransformer):
345
345
  self._get_model_signatures(dataset)
346
346
  return self
347
347
 
348
- def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
349
- if self._drop_input_cols:
350
- return []
351
- else:
352
- return list(set(dataset.columns) - set(self.output_cols))
353
-
354
348
  def _batch_inference_validate_snowpark(
355
349
  self,
356
350
  dataset: DataFrame,
357
351
  inference_method: str,
358
352
  ) -> List[str]:
359
- """Util method to run validate that batch inference can be run on a snowpark dataframe.
353
+ """Util method to run validate that batch inference can be run on a snowpark dataframe and
354
+ return the available package that exists in the snowflake anaconda channel
355
+
356
+ Args:
357
+ dataset: snowpark dataframe
358
+ inference_method: the inference method such as predict, score...
359
+
360
+ Raises:
361
+ SnowflakeMLException: If the estimator is not fitted, raise error
362
+ SnowflakeMLException: If the session is None, raise error
363
+
364
+ Returns:
365
+ A list of available package that exists in the snowflake anaconda channel
360
366
  """
361
367
  if not self._is_fitted:
362
368
  raise exceptions.SnowflakeMLException(
@@ -428,7 +434,7 @@ class NeighborhoodComponentsAnalysis(BaseTransformer):
428
434
  transform_kwargs = dict(
429
435
  session = dataset._session,
430
436
  dependencies = self._deps,
431
- pass_through_cols = self._get_pass_through_columns(dataset),
437
+ drop_input_cols = self._drop_input_cols,
432
438
  expected_output_cols_type = expected_type_inferred,
433
439
  )
434
440
 
@@ -490,16 +496,16 @@ class NeighborhoodComponentsAnalysis(BaseTransformer):
490
496
  # from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
491
497
  # based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
492
498
  # each row containing a list of values.
493
- expected_dtype = "ARRAY"
499
+ expected_dtype = "array"
494
500
 
495
501
  # If we were unable to assign a type to this transform in the factory, infer the type here.
496
502
  if expected_dtype == "":
497
- # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
503
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
498
504
  if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
499
- expected_dtype = "ARRAY"
500
- # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
505
+ expected_dtype = "array"
506
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
501
507
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
502
- expected_dtype = "ARRAY"
508
+ expected_dtype = "array"
503
509
  else:
504
510
  output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
505
511
  # We can only infer the output types from the input types if the following two statemetns are true:
@@ -517,7 +523,7 @@ class NeighborhoodComponentsAnalysis(BaseTransformer):
517
523
  transform_kwargs = dict(
518
524
  session = dataset._session,
519
525
  dependencies = self._deps,
520
- pass_through_cols = self._get_pass_through_columns(dataset),
526
+ drop_input_cols = self._drop_input_cols,
521
527
  expected_output_cols_type = expected_dtype,
522
528
  )
523
529
 
@@ -568,7 +574,7 @@ class NeighborhoodComponentsAnalysis(BaseTransformer):
568
574
  subproject=_SUBPROJECT,
569
575
  )
570
576
  output_result, fitted_estimator = model_trainer.train_fit_predict(
571
- pass_through_columns=self._get_pass_through_columns(dataset),
577
+ drop_input_cols=self._drop_input_cols,
572
578
  expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
573
579
  )
574
580
  self._sklearn_object = fitted_estimator
@@ -586,44 +592,6 @@ class NeighborhoodComponentsAnalysis(BaseTransformer):
586
592
  assert self._sklearn_object is not None
587
593
  return self._sklearn_object.embedding_
588
594
 
589
-
590
- def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
591
- """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
592
- Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
593
- """
594
- output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
595
- if output_cols:
596
- output_cols = [
597
- identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
598
- for c in output_cols
599
- ]
600
- elif getattr(self._sklearn_object, "classes_", None) is None:
601
- output_cols = [output_cols_prefix]
602
- elif self._sklearn_object is not None:
603
- classes = self._sklearn_object.classes_
604
- if isinstance(classes, numpy.ndarray):
605
- output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
606
- elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
607
- # If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
608
- output_cols = []
609
- for i, cl in enumerate(classes):
610
- # For binary classification, there is only one output column for each class
611
- # ndarray as the two classes are complementary.
612
- if len(cl) == 2:
613
- output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
614
- else:
615
- output_cols.extend([
616
- f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
617
- ])
618
- else:
619
- output_cols = []
620
-
621
- # Make sure column names are valid snowflake identifiers.
622
- assert output_cols is not None # Make MyPy happy
623
- rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
624
-
625
- return rv
626
-
627
595
  @available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
628
596
  @telemetry.send_api_usage_telemetry(
629
597
  project=_PROJECT,
@@ -663,7 +631,7 @@ class NeighborhoodComponentsAnalysis(BaseTransformer):
663
631
  transform_kwargs = dict(
664
632
  session=dataset._session,
665
633
  dependencies=self._deps,
666
- pass_through_cols=self._get_pass_through_columns(dataset),
634
+ drop_input_cols = self._drop_input_cols,
667
635
  expected_output_cols_type="float",
668
636
  )
669
637
 
@@ -728,7 +696,7 @@ class NeighborhoodComponentsAnalysis(BaseTransformer):
728
696
  transform_kwargs = dict(
729
697
  session=dataset._session,
730
698
  dependencies=self._deps,
731
- pass_through_cols=self._get_pass_through_columns(dataset),
699
+ drop_input_cols = self._drop_input_cols,
732
700
  expected_output_cols_type="float",
733
701
  )
734
702
  elif isinstance(dataset, pd.DataFrame):
@@ -789,7 +757,7 @@ class NeighborhoodComponentsAnalysis(BaseTransformer):
789
757
  transform_kwargs = dict(
790
758
  session=dataset._session,
791
759
  dependencies=self._deps,
792
- pass_through_cols=self._get_pass_through_columns(dataset),
760
+ drop_input_cols = self._drop_input_cols,
793
761
  expected_output_cols_type="float",
794
762
  )
795
763
 
@@ -854,7 +822,7 @@ class NeighborhoodComponentsAnalysis(BaseTransformer):
854
822
  transform_kwargs = dict(
855
823
  session=dataset._session,
856
824
  dependencies=self._deps,
857
- pass_through_cols=self._get_pass_through_columns(dataset),
825
+ drop_input_cols = self._drop_input_cols,
858
826
  expected_output_cols_type="float",
859
827
  )
860
828
 
@@ -908,13 +876,17 @@ class NeighborhoodComponentsAnalysis(BaseTransformer):
908
876
  transform_kwargs: ScoreKwargsTypedDict = dict()
909
877
 
910
878
  if isinstance(dataset, DataFrame):
879
+ self._deps = self._batch_inference_validate_snowpark(
880
+ dataset=dataset,
881
+ inference_method="score",
882
+ )
911
883
  selected_cols = self._get_active_columns()
912
884
  if len(selected_cols) > 0:
913
885
  dataset = dataset.select(selected_cols)
914
886
  assert isinstance(dataset._session, Session) # keep mypy happy
915
887
  transform_kwargs = dict(
916
888
  session=dataset._session,
917
- dependencies=["snowflake-snowpark-python"] + self._get_dependencies(),
889
+ dependencies=["snowflake-snowpark-python"] + self._deps,
918
890
  score_sproc_imports=['sklearn'],
919
891
  )
920
892
  elif isinstance(dataset, pd.DataFrame):
@@ -988,9 +960,9 @@ class NeighborhoodComponentsAnalysis(BaseTransformer):
988
960
  transform_kwargs = dict(
989
961
  session = dataset._session,
990
962
  dependencies = self._deps,
991
- pass_through_cols = self._get_pass_through_columns(dataset),
992
- expected_output_cols_type = "array",
993
- n_neighbors = n_neighbors,
963
+ drop_input_cols = self._drop_input_cols,
964
+ expected_output_cols_type="array",
965
+ n_neighbors = n_neighbors,
994
966
  return_distance = return_distance
995
967
  )
996
968
  elif isinstance(dataset, pd.DataFrame):
@@ -346,18 +346,24 @@ class RadiusNeighborsClassifier(BaseTransformer):
346
346
  self._get_model_signatures(dataset)
347
347
  return self
348
348
 
349
- def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
350
- if self._drop_input_cols:
351
- return []
352
- else:
353
- return list(set(dataset.columns) - set(self.output_cols))
354
-
355
349
  def _batch_inference_validate_snowpark(
356
350
  self,
357
351
  dataset: DataFrame,
358
352
  inference_method: str,
359
353
  ) -> List[str]:
360
- """Util method to run validate that batch inference can be run on a snowpark dataframe.
354
+ """Util method to run validate that batch inference can be run on a snowpark dataframe and
355
+ return the available package that exists in the snowflake anaconda channel
356
+
357
+ Args:
358
+ dataset: snowpark dataframe
359
+ inference_method: the inference method such as predict, score...
360
+
361
+ Raises:
362
+ SnowflakeMLException: If the estimator is not fitted, raise error
363
+ SnowflakeMLException: If the session is None, raise error
364
+
365
+ Returns:
366
+ A list of available package that exists in the snowflake anaconda channel
361
367
  """
362
368
  if not self._is_fitted:
363
369
  raise exceptions.SnowflakeMLException(
@@ -431,7 +437,7 @@ class RadiusNeighborsClassifier(BaseTransformer):
431
437
  transform_kwargs = dict(
432
438
  session = dataset._session,
433
439
  dependencies = self._deps,
434
- pass_through_cols = self._get_pass_through_columns(dataset),
440
+ drop_input_cols = self._drop_input_cols,
435
441
  expected_output_cols_type = expected_type_inferred,
436
442
  )
437
443
 
@@ -491,16 +497,16 @@ class RadiusNeighborsClassifier(BaseTransformer):
491
497
  # from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
492
498
  # based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
493
499
  # each row containing a list of values.
494
- expected_dtype = "ARRAY"
500
+ expected_dtype = "array"
495
501
 
496
502
  # If we were unable to assign a type to this transform in the factory, infer the type here.
497
503
  if expected_dtype == "":
498
- # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
504
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
499
505
  if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
500
- expected_dtype = "ARRAY"
501
- # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
506
+ expected_dtype = "array"
507
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
502
508
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
503
- expected_dtype = "ARRAY"
509
+ expected_dtype = "array"
504
510
  else:
505
511
  output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
506
512
  # We can only infer the output types from the input types if the following two statemetns are true:
@@ -518,7 +524,7 @@ class RadiusNeighborsClassifier(BaseTransformer):
518
524
  transform_kwargs = dict(
519
525
  session = dataset._session,
520
526
  dependencies = self._deps,
521
- pass_through_cols = self._get_pass_through_columns(dataset),
527
+ drop_input_cols = self._drop_input_cols,
522
528
  expected_output_cols_type = expected_dtype,
523
529
  )
524
530
 
@@ -569,7 +575,7 @@ class RadiusNeighborsClassifier(BaseTransformer):
569
575
  subproject=_SUBPROJECT,
570
576
  )
571
577
  output_result, fitted_estimator = model_trainer.train_fit_predict(
572
- pass_through_columns=self._get_pass_through_columns(dataset),
578
+ drop_input_cols=self._drop_input_cols,
573
579
  expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
574
580
  )
575
581
  self._sklearn_object = fitted_estimator
@@ -587,44 +593,6 @@ class RadiusNeighborsClassifier(BaseTransformer):
587
593
  assert self._sklearn_object is not None
588
594
  return self._sklearn_object.embedding_
589
595
 
590
-
591
- def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
592
- """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
593
- Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
594
- """
595
- output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
596
- if output_cols:
597
- output_cols = [
598
- identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
599
- for c in output_cols
600
- ]
601
- elif getattr(self._sklearn_object, "classes_", None) is None:
602
- output_cols = [output_cols_prefix]
603
- elif self._sklearn_object is not None:
604
- classes = self._sklearn_object.classes_
605
- if isinstance(classes, numpy.ndarray):
606
- output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
607
- elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
608
- # If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
609
- output_cols = []
610
- for i, cl in enumerate(classes):
611
- # For binary classification, there is only one output column for each class
612
- # ndarray as the two classes are complementary.
613
- if len(cl) == 2:
614
- output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
615
- else:
616
- output_cols.extend([
617
- f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
618
- ])
619
- else:
620
- output_cols = []
621
-
622
- # Make sure column names are valid snowflake identifiers.
623
- assert output_cols is not None # Make MyPy happy
624
- rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
625
-
626
- return rv
627
-
628
596
  @available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
629
597
  @telemetry.send_api_usage_telemetry(
630
598
  project=_PROJECT,
@@ -666,7 +634,7 @@ class RadiusNeighborsClassifier(BaseTransformer):
666
634
  transform_kwargs = dict(
667
635
  session=dataset._session,
668
636
  dependencies=self._deps,
669
- pass_through_cols=self._get_pass_through_columns(dataset),
637
+ drop_input_cols = self._drop_input_cols,
670
638
  expected_output_cols_type="float",
671
639
  )
672
640
 
@@ -733,7 +701,7 @@ class RadiusNeighborsClassifier(BaseTransformer):
733
701
  transform_kwargs = dict(
734
702
  session=dataset._session,
735
703
  dependencies=self._deps,
736
- pass_through_cols=self._get_pass_through_columns(dataset),
704
+ drop_input_cols = self._drop_input_cols,
737
705
  expected_output_cols_type="float",
738
706
  )
739
707
  elif isinstance(dataset, pd.DataFrame):
@@ -794,7 +762,7 @@ class RadiusNeighborsClassifier(BaseTransformer):
794
762
  transform_kwargs = dict(
795
763
  session=dataset._session,
796
764
  dependencies=self._deps,
797
- pass_through_cols=self._get_pass_through_columns(dataset),
765
+ drop_input_cols = self._drop_input_cols,
798
766
  expected_output_cols_type="float",
799
767
  )
800
768
 
@@ -859,7 +827,7 @@ class RadiusNeighborsClassifier(BaseTransformer):
859
827
  transform_kwargs = dict(
860
828
  session=dataset._session,
861
829
  dependencies=self._deps,
862
- pass_through_cols=self._get_pass_through_columns(dataset),
830
+ drop_input_cols = self._drop_input_cols,
863
831
  expected_output_cols_type="float",
864
832
  )
865
833
 
@@ -915,13 +883,17 @@ class RadiusNeighborsClassifier(BaseTransformer):
915
883
  transform_kwargs: ScoreKwargsTypedDict = dict()
916
884
 
917
885
  if isinstance(dataset, DataFrame):
886
+ self._deps = self._batch_inference_validate_snowpark(
887
+ dataset=dataset,
888
+ inference_method="score",
889
+ )
918
890
  selected_cols = self._get_active_columns()
919
891
  if len(selected_cols) > 0:
920
892
  dataset = dataset.select(selected_cols)
921
893
  assert isinstance(dataset._session, Session) # keep mypy happy
922
894
  transform_kwargs = dict(
923
895
  session=dataset._session,
924
- dependencies=["snowflake-snowpark-python"] + self._get_dependencies(),
896
+ dependencies=["snowflake-snowpark-python"] + self._deps,
925
897
  score_sproc_imports=['sklearn'],
926
898
  )
927
899
  elif isinstance(dataset, pd.DataFrame):
@@ -995,9 +967,9 @@ class RadiusNeighborsClassifier(BaseTransformer):
995
967
  transform_kwargs = dict(
996
968
  session = dataset._session,
997
969
  dependencies = self._deps,
998
- pass_through_cols = self._get_pass_through_columns(dataset),
999
- expected_output_cols_type = "array",
1000
- n_neighbors = n_neighbors,
970
+ drop_input_cols = self._drop_input_cols,
971
+ expected_output_cols_type="array",
972
+ n_neighbors = n_neighbors,
1001
973
  return_distance = return_distance
1002
974
  )
1003
975
  elif isinstance(dataset, pd.DataFrame):
@@ -336,18 +336,24 @@ class RadiusNeighborsRegressor(BaseTransformer):
336
336
  self._get_model_signatures(dataset)
337
337
  return self
338
338
 
339
- def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
340
- if self._drop_input_cols:
341
- return []
342
- else:
343
- return list(set(dataset.columns) - set(self.output_cols))
344
-
345
339
  def _batch_inference_validate_snowpark(
346
340
  self,
347
341
  dataset: DataFrame,
348
342
  inference_method: str,
349
343
  ) -> List[str]:
350
- """Util method to run validate that batch inference can be run on a snowpark dataframe.
344
+ """Util method to run validate that batch inference can be run on a snowpark dataframe and
345
+ return the available package that exists in the snowflake anaconda channel
346
+
347
+ Args:
348
+ dataset: snowpark dataframe
349
+ inference_method: the inference method such as predict, score...
350
+
351
+ Raises:
352
+ SnowflakeMLException: If the estimator is not fitted, raise error
353
+ SnowflakeMLException: If the session is None, raise error
354
+
355
+ Returns:
356
+ A list of available package that exists in the snowflake anaconda channel
351
357
  """
352
358
  if not self._is_fitted:
353
359
  raise exceptions.SnowflakeMLException(
@@ -421,7 +427,7 @@ class RadiusNeighborsRegressor(BaseTransformer):
421
427
  transform_kwargs = dict(
422
428
  session = dataset._session,
423
429
  dependencies = self._deps,
424
- pass_through_cols = self._get_pass_through_columns(dataset),
430
+ drop_input_cols = self._drop_input_cols,
425
431
  expected_output_cols_type = expected_type_inferred,
426
432
  )
427
433
 
@@ -481,16 +487,16 @@ class RadiusNeighborsRegressor(BaseTransformer):
481
487
  # from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
482
488
  # based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
483
489
  # each row containing a list of values.
484
- expected_dtype = "ARRAY"
490
+ expected_dtype = "array"
485
491
 
486
492
  # If we were unable to assign a type to this transform in the factory, infer the type here.
487
493
  if expected_dtype == "":
488
- # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
494
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
489
495
  if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
490
- expected_dtype = "ARRAY"
491
- # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
496
+ expected_dtype = "array"
497
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
492
498
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
493
- expected_dtype = "ARRAY"
499
+ expected_dtype = "array"
494
500
  else:
495
501
  output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
496
502
  # We can only infer the output types from the input types if the following two statemetns are true:
@@ -508,7 +514,7 @@ class RadiusNeighborsRegressor(BaseTransformer):
508
514
  transform_kwargs = dict(
509
515
  session = dataset._session,
510
516
  dependencies = self._deps,
511
- pass_through_cols = self._get_pass_through_columns(dataset),
517
+ drop_input_cols = self._drop_input_cols,
512
518
  expected_output_cols_type = expected_dtype,
513
519
  )
514
520
 
@@ -559,7 +565,7 @@ class RadiusNeighborsRegressor(BaseTransformer):
559
565
  subproject=_SUBPROJECT,
560
566
  )
561
567
  output_result, fitted_estimator = model_trainer.train_fit_predict(
562
- pass_through_columns=self._get_pass_through_columns(dataset),
568
+ drop_input_cols=self._drop_input_cols,
563
569
  expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
564
570
  )
565
571
  self._sklearn_object = fitted_estimator
@@ -577,44 +583,6 @@ class RadiusNeighborsRegressor(BaseTransformer):
577
583
  assert self._sklearn_object is not None
578
584
  return self._sklearn_object.embedding_
579
585
 
580
-
581
- def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
582
- """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
583
- Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
584
- """
585
- output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
586
- if output_cols:
587
- output_cols = [
588
- identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
589
- for c in output_cols
590
- ]
591
- elif getattr(self._sklearn_object, "classes_", None) is None:
592
- output_cols = [output_cols_prefix]
593
- elif self._sklearn_object is not None:
594
- classes = self._sklearn_object.classes_
595
- if isinstance(classes, numpy.ndarray):
596
- output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
597
- elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
598
- # If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
599
- output_cols = []
600
- for i, cl in enumerate(classes):
601
- # For binary classification, there is only one output column for each class
602
- # ndarray as the two classes are complementary.
603
- if len(cl) == 2:
604
- output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
605
- else:
606
- output_cols.extend([
607
- f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
608
- ])
609
- else:
610
- output_cols = []
611
-
612
- # Make sure column names are valid snowflake identifiers.
613
- assert output_cols is not None # Make MyPy happy
614
- rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
615
-
616
- return rv
617
-
618
586
  @available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
619
587
  @telemetry.send_api_usage_telemetry(
620
588
  project=_PROJECT,
@@ -654,7 +622,7 @@ class RadiusNeighborsRegressor(BaseTransformer):
654
622
  transform_kwargs = dict(
655
623
  session=dataset._session,
656
624
  dependencies=self._deps,
657
- pass_through_cols=self._get_pass_through_columns(dataset),
625
+ drop_input_cols = self._drop_input_cols,
658
626
  expected_output_cols_type="float",
659
627
  )
660
628
 
@@ -719,7 +687,7 @@ class RadiusNeighborsRegressor(BaseTransformer):
719
687
  transform_kwargs = dict(
720
688
  session=dataset._session,
721
689
  dependencies=self._deps,
722
- pass_through_cols=self._get_pass_through_columns(dataset),
690
+ drop_input_cols = self._drop_input_cols,
723
691
  expected_output_cols_type="float",
724
692
  )
725
693
  elif isinstance(dataset, pd.DataFrame):
@@ -780,7 +748,7 @@ class RadiusNeighborsRegressor(BaseTransformer):
780
748
  transform_kwargs = dict(
781
749
  session=dataset._session,
782
750
  dependencies=self._deps,
783
- pass_through_cols=self._get_pass_through_columns(dataset),
751
+ drop_input_cols = self._drop_input_cols,
784
752
  expected_output_cols_type="float",
785
753
  )
786
754
 
@@ -845,7 +813,7 @@ class RadiusNeighborsRegressor(BaseTransformer):
845
813
  transform_kwargs = dict(
846
814
  session=dataset._session,
847
815
  dependencies=self._deps,
848
- pass_through_cols=self._get_pass_through_columns(dataset),
816
+ drop_input_cols = self._drop_input_cols,
849
817
  expected_output_cols_type="float",
850
818
  )
851
819
 
@@ -901,13 +869,17 @@ class RadiusNeighborsRegressor(BaseTransformer):
901
869
  transform_kwargs: ScoreKwargsTypedDict = dict()
902
870
 
903
871
  if isinstance(dataset, DataFrame):
872
+ self._deps = self._batch_inference_validate_snowpark(
873
+ dataset=dataset,
874
+ inference_method="score",
875
+ )
904
876
  selected_cols = self._get_active_columns()
905
877
  if len(selected_cols) > 0:
906
878
  dataset = dataset.select(selected_cols)
907
879
  assert isinstance(dataset._session, Session) # keep mypy happy
908
880
  transform_kwargs = dict(
909
881
  session=dataset._session,
910
- dependencies=["snowflake-snowpark-python"] + self._get_dependencies(),
882
+ dependencies=["snowflake-snowpark-python"] + self._deps,
911
883
  score_sproc_imports=['sklearn'],
912
884
  )
913
885
  elif isinstance(dataset, pd.DataFrame):
@@ -981,9 +953,9 @@ class RadiusNeighborsRegressor(BaseTransformer):
981
953
  transform_kwargs = dict(
982
954
  session = dataset._session,
983
955
  dependencies = self._deps,
984
- pass_through_cols = self._get_pass_through_columns(dataset),
985
- expected_output_cols_type = "array",
986
- n_neighbors = n_neighbors,
956
+ drop_input_cols = self._drop_input_cols,
957
+ expected_output_cols_type="array",
958
+ n_neighbors = n_neighbors,
987
959
  return_distance = return_distance
988
960
  )
989
961
  elif isinstance(dataset, pd.DataFrame):