snowflake-ml-python 1.3.0__py3-none-any.whl → 1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (211) hide show
  1. snowflake/ml/_internal/file_utils.py +3 -3
  2. snowflake/ml/_internal/human_readable_id/adjectives.txt +128 -0
  3. snowflake/ml/_internal/human_readable_id/animals.txt +128 -0
  4. snowflake/ml/_internal/human_readable_id/hrid_generator.py +40 -0
  5. snowflake/ml/_internal/human_readable_id/hrid_generator_base.py +135 -0
  6. snowflake/ml/_internal/telemetry.py +11 -2
  7. snowflake/ml/_internal/utils/formatting.py +1 -1
  8. snowflake/ml/feature_store/feature_store.py +15 -106
  9. snowflake/ml/fileset/sfcfs.py +4 -3
  10. snowflake/ml/fileset/stage_fs.py +18 -0
  11. snowflake/ml/model/_api.py +9 -9
  12. snowflake/ml/model/_client/model/model_version_impl.py +20 -15
  13. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +3 -9
  14. snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +3 -5
  15. snowflake/ml/model/_deploy_client/snowservice/deploy.py +7 -6
  16. snowflake/ml/model/_model_composer/model_composer.py +10 -8
  17. snowflake/ml/model/_model_composer/model_method/function_generator.py +1 -1
  18. snowflake/ml/model/_model_composer/model_method/infer_table_function.py_template +2 -1
  19. snowflake/ml/model/_model_composer/model_method/model_method.py +2 -2
  20. snowflake/ml/model/_model_composer/model_runtime/_runtime_requirements.py +1 -1
  21. snowflake/ml/model/_packager/model_handlers/_base.py +2 -2
  22. snowflake/ml/model/_packager/model_handlers/_utils.py +5 -5
  23. snowflake/ml/model/_packager/model_handlers/custom.py +7 -7
  24. snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +2 -2
  25. snowflake/ml/model/_packager/model_handlers/llm.py +1 -1
  26. snowflake/ml/model/_packager/model_handlers/mlflow.py +1 -1
  27. snowflake/ml/model/_packager/model_handlers/pytorch.py +13 -10
  28. snowflake/ml/model/_packager/model_handlers/sentence_transformers.py +214 -0
  29. snowflake/ml/model/_packager/model_handlers/sklearn.py +6 -6
  30. snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +15 -3
  31. snowflake/ml/model/_packager/model_handlers/tensorflow.py +8 -8
  32. snowflake/ml/model/_packager/model_handlers/torchscript.py +7 -7
  33. snowflake/ml/model/_packager/model_handlers/xgboost.py +8 -8
  34. snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
  35. snowflake/ml/model/_packager/model_packager.py +8 -6
  36. snowflake/ml/model/custom_model.py +3 -1
  37. snowflake/ml/model/type_hints.py +13 -0
  38. snowflake/ml/modeling/_internal/estimator_utils.py +61 -1
  39. snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +4 -43
  40. snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py +4 -4
  41. snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_handlers.py +21 -17
  42. snowflake/ml/modeling/_internal/model_specifications.py +3 -1
  43. snowflake/ml/modeling/_internal/model_trainer.py +2 -2
  44. snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +547 -1
  45. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +67 -114
  46. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +9 -9
  47. snowflake/ml/modeling/_internal/transformer_protocols.py +2 -3
  48. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +33 -61
  49. snowflake/ml/modeling/cluster/affinity_propagation.py +33 -61
  50. snowflake/ml/modeling/cluster/agglomerative_clustering.py +33 -61
  51. snowflake/ml/modeling/cluster/birch.py +33 -61
  52. snowflake/ml/modeling/cluster/bisecting_k_means.py +33 -61
  53. snowflake/ml/modeling/cluster/dbscan.py +33 -61
  54. snowflake/ml/modeling/cluster/feature_agglomeration.py +33 -61
  55. snowflake/ml/modeling/cluster/k_means.py +33 -61
  56. snowflake/ml/modeling/cluster/mean_shift.py +33 -61
  57. snowflake/ml/modeling/cluster/mini_batch_k_means.py +33 -61
  58. snowflake/ml/modeling/cluster/optics.py +33 -61
  59. snowflake/ml/modeling/cluster/spectral_biclustering.py +33 -61
  60. snowflake/ml/modeling/cluster/spectral_clustering.py +33 -61
  61. snowflake/ml/modeling/cluster/spectral_coclustering.py +33 -61
  62. snowflake/ml/modeling/compose/column_transformer.py +33 -61
  63. snowflake/ml/modeling/compose/transformed_target_regressor.py +33 -61
  64. snowflake/ml/modeling/covariance/elliptic_envelope.py +33 -61
  65. snowflake/ml/modeling/covariance/empirical_covariance.py +33 -61
  66. snowflake/ml/modeling/covariance/graphical_lasso.py +33 -61
  67. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +33 -61
  68. snowflake/ml/modeling/covariance/ledoit_wolf.py +33 -61
  69. snowflake/ml/modeling/covariance/min_cov_det.py +33 -61
  70. snowflake/ml/modeling/covariance/oas.py +33 -61
  71. snowflake/ml/modeling/covariance/shrunk_covariance.py +33 -61
  72. snowflake/ml/modeling/decomposition/dictionary_learning.py +33 -61
  73. snowflake/ml/modeling/decomposition/factor_analysis.py +33 -61
  74. snowflake/ml/modeling/decomposition/fast_ica.py +33 -61
  75. snowflake/ml/modeling/decomposition/incremental_pca.py +33 -61
  76. snowflake/ml/modeling/decomposition/kernel_pca.py +33 -61
  77. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +33 -61
  78. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +33 -61
  79. snowflake/ml/modeling/decomposition/pca.py +33 -61
  80. snowflake/ml/modeling/decomposition/sparse_pca.py +33 -61
  81. snowflake/ml/modeling/decomposition/truncated_svd.py +33 -61
  82. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +33 -61
  83. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +33 -61
  84. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +33 -61
  85. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +33 -61
  86. snowflake/ml/modeling/ensemble/bagging_classifier.py +33 -61
  87. snowflake/ml/modeling/ensemble/bagging_regressor.py +33 -61
  88. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +33 -61
  89. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +33 -61
  90. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +33 -61
  91. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +33 -61
  92. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +33 -61
  93. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +33 -61
  94. snowflake/ml/modeling/ensemble/isolation_forest.py +33 -61
  95. snowflake/ml/modeling/ensemble/random_forest_classifier.py +33 -61
  96. snowflake/ml/modeling/ensemble/random_forest_regressor.py +33 -61
  97. snowflake/ml/modeling/ensemble/stacking_regressor.py +33 -61
  98. snowflake/ml/modeling/ensemble/voting_classifier.py +33 -61
  99. snowflake/ml/modeling/ensemble/voting_regressor.py +33 -61
  100. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +33 -61
  101. snowflake/ml/modeling/feature_selection/select_fdr.py +33 -61
  102. snowflake/ml/modeling/feature_selection/select_fpr.py +33 -61
  103. snowflake/ml/modeling/feature_selection/select_fwe.py +33 -61
  104. snowflake/ml/modeling/feature_selection/select_k_best.py +33 -61
  105. snowflake/ml/modeling/feature_selection/select_percentile.py +33 -61
  106. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +33 -61
  107. snowflake/ml/modeling/feature_selection/variance_threshold.py +33 -61
  108. snowflake/ml/modeling/framework/base.py +55 -5
  109. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +33 -61
  110. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +33 -61
  111. snowflake/ml/modeling/impute/iterative_imputer.py +33 -61
  112. snowflake/ml/modeling/impute/knn_imputer.py +33 -61
  113. snowflake/ml/modeling/impute/missing_indicator.py +33 -61
  114. snowflake/ml/modeling/impute/simple_imputer.py +4 -15
  115. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +33 -61
  116. snowflake/ml/modeling/kernel_approximation/nystroem.py +33 -61
  117. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +33 -61
  118. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +33 -61
  119. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +33 -61
  120. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +33 -61
  121. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +36 -63
  122. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +36 -63
  123. snowflake/ml/modeling/linear_model/ard_regression.py +33 -61
  124. snowflake/ml/modeling/linear_model/bayesian_ridge.py +33 -61
  125. snowflake/ml/modeling/linear_model/elastic_net.py +33 -61
  126. snowflake/ml/modeling/linear_model/elastic_net_cv.py +33 -61
  127. snowflake/ml/modeling/linear_model/gamma_regressor.py +33 -61
  128. snowflake/ml/modeling/linear_model/huber_regressor.py +33 -61
  129. snowflake/ml/modeling/linear_model/lars.py +33 -61
  130. snowflake/ml/modeling/linear_model/lars_cv.py +33 -61
  131. snowflake/ml/modeling/linear_model/lasso.py +33 -61
  132. snowflake/ml/modeling/linear_model/lasso_cv.py +33 -61
  133. snowflake/ml/modeling/linear_model/lasso_lars.py +33 -61
  134. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +33 -61
  135. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +33 -61
  136. snowflake/ml/modeling/linear_model/linear_regression.py +33 -61
  137. snowflake/ml/modeling/linear_model/logistic_regression.py +33 -61
  138. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +33 -61
  139. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +33 -61
  140. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +33 -61
  141. snowflake/ml/modeling/linear_model/multi_task_lasso.py +33 -61
  142. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +33 -61
  143. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +33 -61
  144. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +33 -61
  145. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +33 -61
  146. snowflake/ml/modeling/linear_model/perceptron.py +33 -61
  147. snowflake/ml/modeling/linear_model/poisson_regressor.py +33 -61
  148. snowflake/ml/modeling/linear_model/ransac_regressor.py +33 -61
  149. snowflake/ml/modeling/linear_model/ridge.py +33 -61
  150. snowflake/ml/modeling/linear_model/ridge_classifier.py +33 -61
  151. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +33 -61
  152. snowflake/ml/modeling/linear_model/ridge_cv.py +33 -61
  153. snowflake/ml/modeling/linear_model/sgd_classifier.py +33 -61
  154. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +33 -61
  155. snowflake/ml/modeling/linear_model/sgd_regressor.py +33 -61
  156. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +33 -61
  157. snowflake/ml/modeling/linear_model/tweedie_regressor.py +33 -61
  158. snowflake/ml/modeling/manifold/isomap.py +33 -61
  159. snowflake/ml/modeling/manifold/mds.py +33 -61
  160. snowflake/ml/modeling/manifold/spectral_embedding.py +33 -61
  161. snowflake/ml/modeling/manifold/tsne.py +33 -61
  162. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +33 -61
  163. snowflake/ml/modeling/mixture/gaussian_mixture.py +33 -61
  164. snowflake/ml/modeling/model_selection/grid_search_cv.py +39 -57
  165. snowflake/ml/modeling/model_selection/randomized_search_cv.py +26 -57
  166. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +33 -61
  167. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +33 -61
  168. snowflake/ml/modeling/multiclass/output_code_classifier.py +33 -61
  169. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +33 -61
  170. snowflake/ml/modeling/naive_bayes/categorical_nb.py +33 -61
  171. snowflake/ml/modeling/naive_bayes/complement_nb.py +33 -61
  172. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +33 -61
  173. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +33 -61
  174. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +33 -61
  175. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +33 -61
  176. snowflake/ml/modeling/neighbors/kernel_density.py +33 -61
  177. snowflake/ml/modeling/neighbors/local_outlier_factor.py +33 -61
  178. snowflake/ml/modeling/neighbors/nearest_centroid.py +33 -61
  179. snowflake/ml/modeling/neighbors/nearest_neighbors.py +33 -61
  180. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +33 -61
  181. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +33 -61
  182. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +33 -61
  183. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +33 -61
  184. snowflake/ml/modeling/neural_network/mlp_classifier.py +33 -61
  185. snowflake/ml/modeling/neural_network/mlp_regressor.py +33 -61
  186. snowflake/ml/modeling/preprocessing/polynomial_features.py +33 -61
  187. snowflake/ml/modeling/semi_supervised/label_propagation.py +33 -61
  188. snowflake/ml/modeling/semi_supervised/label_spreading.py +33 -61
  189. snowflake/ml/modeling/svm/linear_svc.py +33 -61
  190. snowflake/ml/modeling/svm/linear_svr.py +33 -61
  191. snowflake/ml/modeling/svm/nu_svc.py +33 -61
  192. snowflake/ml/modeling/svm/nu_svr.py +33 -61
  193. snowflake/ml/modeling/svm/svc.py +33 -61
  194. snowflake/ml/modeling/svm/svr.py +33 -61
  195. snowflake/ml/modeling/tree/decision_tree_classifier.py +33 -61
  196. snowflake/ml/modeling/tree/decision_tree_regressor.py +33 -61
  197. snowflake/ml/modeling/tree/extra_tree_classifier.py +33 -61
  198. snowflake/ml/modeling/tree/extra_tree_regressor.py +33 -61
  199. snowflake/ml/modeling/xgboost/xgb_classifier.py +33 -61
  200. snowflake/ml/modeling/xgboost/xgb_regressor.py +33 -61
  201. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +33 -61
  202. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +33 -61
  203. snowflake/ml/registry/_manager/model_manager.py +6 -2
  204. snowflake/ml/registry/model_registry.py +100 -27
  205. snowflake/ml/registry/registry.py +6 -2
  206. snowflake/ml/version.py +1 -1
  207. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/METADATA +43 -7
  208. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/RECORD +211 -206
  209. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/LICENSE.txt +0 -0
  210. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/WHEEL +0 -0
  211. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/top_level.txt +0 -0
@@ -343,18 +343,24 @@ class GaussianProcessRegressor(BaseTransformer):
343
343
  self._get_model_signatures(dataset)
344
344
  return self
345
345
 
346
- def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
347
- if self._drop_input_cols:
348
- return []
349
- else:
350
- return list(set(dataset.columns) - set(self.output_cols))
351
-
352
346
  def _batch_inference_validate_snowpark(
353
347
  self,
354
348
  dataset: DataFrame,
355
349
  inference_method: str,
356
350
  ) -> List[str]:
357
- """Util method to run validate that batch inference can be run on a snowpark dataframe.
351
+ """Util method to run validate that batch inference can be run on a snowpark dataframe and
352
+ return the available package that exists in the snowflake anaconda channel
353
+
354
+ Args:
355
+ dataset: snowpark dataframe
356
+ inference_method: the inference method such as predict, score...
357
+
358
+ Raises:
359
+ SnowflakeMLException: If the estimator is not fitted, raise error
360
+ SnowflakeMLException: If the session is None, raise error
361
+
362
+ Returns:
363
+ A list of available package that exists in the snowflake anaconda channel
358
364
  """
359
365
  if not self._is_fitted:
360
366
  raise exceptions.SnowflakeMLException(
@@ -428,7 +434,7 @@ class GaussianProcessRegressor(BaseTransformer):
428
434
  transform_kwargs = dict(
429
435
  session = dataset._session,
430
436
  dependencies = self._deps,
431
- pass_through_cols = self._get_pass_through_columns(dataset),
437
+ drop_input_cols = self._drop_input_cols,
432
438
  expected_output_cols_type = expected_type_inferred,
433
439
  )
434
440
 
@@ -488,16 +494,16 @@ class GaussianProcessRegressor(BaseTransformer):
488
494
  # from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
489
495
  # based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
490
496
  # each row containing a list of values.
491
- expected_dtype = "ARRAY"
497
+ expected_dtype = "array"
492
498
 
493
499
  # If we were unable to assign a type to this transform in the factory, infer the type here.
494
500
  if expected_dtype == "":
495
- # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
501
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
496
502
  if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
497
- expected_dtype = "ARRAY"
498
- # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
503
+ expected_dtype = "array"
504
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
499
505
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
500
- expected_dtype = "ARRAY"
506
+ expected_dtype = "array"
501
507
  else:
502
508
  output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
503
509
  # We can only infer the output types from the input types if the following two statemetns are true:
@@ -515,7 +521,7 @@ class GaussianProcessRegressor(BaseTransformer):
515
521
  transform_kwargs = dict(
516
522
  session = dataset._session,
517
523
  dependencies = self._deps,
518
- pass_through_cols = self._get_pass_through_columns(dataset),
524
+ drop_input_cols = self._drop_input_cols,
519
525
  expected_output_cols_type = expected_dtype,
520
526
  )
521
527
 
@@ -566,7 +572,7 @@ class GaussianProcessRegressor(BaseTransformer):
566
572
  subproject=_SUBPROJECT,
567
573
  )
568
574
  output_result, fitted_estimator = model_trainer.train_fit_predict(
569
- pass_through_columns=self._get_pass_through_columns(dataset),
575
+ drop_input_cols=self._drop_input_cols,
570
576
  expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
571
577
  )
572
578
  self._sklearn_object = fitted_estimator
@@ -584,44 +590,6 @@ class GaussianProcessRegressor(BaseTransformer):
584
590
  assert self._sklearn_object is not None
585
591
  return self._sklearn_object.embedding_
586
592
 
587
-
588
- def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
589
- """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
590
- Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
591
- """
592
- output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
593
- if output_cols:
594
- output_cols = [
595
- identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
596
- for c in output_cols
597
- ]
598
- elif getattr(self._sklearn_object, "classes_", None) is None:
599
- output_cols = [output_cols_prefix]
600
- elif self._sklearn_object is not None:
601
- classes = self._sklearn_object.classes_
602
- if isinstance(classes, numpy.ndarray):
603
- output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
604
- elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
605
- # If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
606
- output_cols = []
607
- for i, cl in enumerate(classes):
608
- # For binary classification, there is only one output column for each class
609
- # ndarray as the two classes are complementary.
610
- if len(cl) == 2:
611
- output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
612
- else:
613
- output_cols.extend([
614
- f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
615
- ])
616
- else:
617
- output_cols = []
618
-
619
- # Make sure column names are valid snowflake identifiers.
620
- assert output_cols is not None # Make MyPy happy
621
- rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
622
-
623
- return rv
624
-
625
593
  @available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
626
594
  @telemetry.send_api_usage_telemetry(
627
595
  project=_PROJECT,
@@ -661,7 +629,7 @@ class GaussianProcessRegressor(BaseTransformer):
661
629
  transform_kwargs = dict(
662
630
  session=dataset._session,
663
631
  dependencies=self._deps,
664
- pass_through_cols=self._get_pass_through_columns(dataset),
632
+ drop_input_cols = self._drop_input_cols,
665
633
  expected_output_cols_type="float",
666
634
  )
667
635
 
@@ -726,7 +694,7 @@ class GaussianProcessRegressor(BaseTransformer):
726
694
  transform_kwargs = dict(
727
695
  session=dataset._session,
728
696
  dependencies=self._deps,
729
- pass_through_cols=self._get_pass_through_columns(dataset),
697
+ drop_input_cols = self._drop_input_cols,
730
698
  expected_output_cols_type="float",
731
699
  )
732
700
  elif isinstance(dataset, pd.DataFrame):
@@ -787,7 +755,7 @@ class GaussianProcessRegressor(BaseTransformer):
787
755
  transform_kwargs = dict(
788
756
  session=dataset._session,
789
757
  dependencies=self._deps,
790
- pass_through_cols=self._get_pass_through_columns(dataset),
758
+ drop_input_cols = self._drop_input_cols,
791
759
  expected_output_cols_type="float",
792
760
  )
793
761
 
@@ -852,7 +820,7 @@ class GaussianProcessRegressor(BaseTransformer):
852
820
  transform_kwargs = dict(
853
821
  session=dataset._session,
854
822
  dependencies=self._deps,
855
- pass_through_cols=self._get_pass_through_columns(dataset),
823
+ drop_input_cols = self._drop_input_cols,
856
824
  expected_output_cols_type="float",
857
825
  )
858
826
 
@@ -908,13 +876,17 @@ class GaussianProcessRegressor(BaseTransformer):
908
876
  transform_kwargs: ScoreKwargsTypedDict = dict()
909
877
 
910
878
  if isinstance(dataset, DataFrame):
879
+ self._deps = self._batch_inference_validate_snowpark(
880
+ dataset=dataset,
881
+ inference_method="score",
882
+ )
911
883
  selected_cols = self._get_active_columns()
912
884
  if len(selected_cols) > 0:
913
885
  dataset = dataset.select(selected_cols)
914
886
  assert isinstance(dataset._session, Session) # keep mypy happy
915
887
  transform_kwargs = dict(
916
888
  session=dataset._session,
917
- dependencies=["snowflake-snowpark-python"] + self._get_dependencies(),
889
+ dependencies=["snowflake-snowpark-python"] + self._deps,
918
890
  score_sproc_imports=['sklearn'],
919
891
  )
920
892
  elif isinstance(dataset, pd.DataFrame):
@@ -988,9 +960,9 @@ class GaussianProcessRegressor(BaseTransformer):
988
960
  transform_kwargs = dict(
989
961
  session = dataset._session,
990
962
  dependencies = self._deps,
991
- pass_through_cols = self._get_pass_through_columns(dataset),
992
- expected_output_cols_type = "array",
993
- n_neighbors = n_neighbors,
963
+ drop_input_cols = self._drop_input_cols,
964
+ expected_output_cols_type="array",
965
+ n_neighbors = n_neighbors,
994
966
  return_distance = return_distance
995
967
  )
996
968
  elif isinstance(dataset, pd.DataFrame):
@@ -385,18 +385,24 @@ class IterativeImputer(BaseTransformer):
385
385
  self._get_model_signatures(dataset)
386
386
  return self
387
387
 
388
- def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
389
- if self._drop_input_cols:
390
- return []
391
- else:
392
- return list(set(dataset.columns) - set(self.output_cols))
393
-
394
388
  def _batch_inference_validate_snowpark(
395
389
  self,
396
390
  dataset: DataFrame,
397
391
  inference_method: str,
398
392
  ) -> List[str]:
399
- """Util method to run validate that batch inference can be run on a snowpark dataframe.
393
+ """Util method to run validate that batch inference can be run on a snowpark dataframe and
394
+ return the available package that exists in the snowflake anaconda channel
395
+
396
+ Args:
397
+ dataset: snowpark dataframe
398
+ inference_method: the inference method such as predict, score...
399
+
400
+ Raises:
401
+ SnowflakeMLException: If the estimator is not fitted, raise error
402
+ SnowflakeMLException: If the session is None, raise error
403
+
404
+ Returns:
405
+ A list of available package that exists in the snowflake anaconda channel
400
406
  """
401
407
  if not self._is_fitted:
402
408
  raise exceptions.SnowflakeMLException(
@@ -468,7 +474,7 @@ class IterativeImputer(BaseTransformer):
468
474
  transform_kwargs = dict(
469
475
  session = dataset._session,
470
476
  dependencies = self._deps,
471
- pass_through_cols = self._get_pass_through_columns(dataset),
477
+ drop_input_cols = self._drop_input_cols,
472
478
  expected_output_cols_type = expected_type_inferred,
473
479
  )
474
480
 
@@ -530,16 +536,16 @@ class IterativeImputer(BaseTransformer):
530
536
  # from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
531
537
  # based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
532
538
  # each row containing a list of values.
533
- expected_dtype = "ARRAY"
539
+ expected_dtype = "array"
534
540
 
535
541
  # If we were unable to assign a type to this transform in the factory, infer the type here.
536
542
  if expected_dtype == "":
537
- # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
543
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
538
544
  if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
539
- expected_dtype = "ARRAY"
540
- # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
545
+ expected_dtype = "array"
546
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
541
547
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
542
- expected_dtype = "ARRAY"
548
+ expected_dtype = "array"
543
549
  else:
544
550
  output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
545
551
  # We can only infer the output types from the input types if the following two statemetns are true:
@@ -557,7 +563,7 @@ class IterativeImputer(BaseTransformer):
557
563
  transform_kwargs = dict(
558
564
  session = dataset._session,
559
565
  dependencies = self._deps,
560
- pass_through_cols = self._get_pass_through_columns(dataset),
566
+ drop_input_cols = self._drop_input_cols,
561
567
  expected_output_cols_type = expected_dtype,
562
568
  )
563
569
 
@@ -608,7 +614,7 @@ class IterativeImputer(BaseTransformer):
608
614
  subproject=_SUBPROJECT,
609
615
  )
610
616
  output_result, fitted_estimator = model_trainer.train_fit_predict(
611
- pass_through_columns=self._get_pass_through_columns(dataset),
617
+ drop_input_cols=self._drop_input_cols,
612
618
  expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
613
619
  )
614
620
  self._sklearn_object = fitted_estimator
@@ -626,44 +632,6 @@ class IterativeImputer(BaseTransformer):
626
632
  assert self._sklearn_object is not None
627
633
  return self._sklearn_object.embedding_
628
634
 
629
-
630
- def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
631
- """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
632
- Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
633
- """
634
- output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
635
- if output_cols:
636
- output_cols = [
637
- identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
638
- for c in output_cols
639
- ]
640
- elif getattr(self._sklearn_object, "classes_", None) is None:
641
- output_cols = [output_cols_prefix]
642
- elif self._sklearn_object is not None:
643
- classes = self._sklearn_object.classes_
644
- if isinstance(classes, numpy.ndarray):
645
- output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
646
- elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
647
- # If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
648
- output_cols = []
649
- for i, cl in enumerate(classes):
650
- # For binary classification, there is only one output column for each class
651
- # ndarray as the two classes are complementary.
652
- if len(cl) == 2:
653
- output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
654
- else:
655
- output_cols.extend([
656
- f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
657
- ])
658
- else:
659
- output_cols = []
660
-
661
- # Make sure column names are valid snowflake identifiers.
662
- assert output_cols is not None # Make MyPy happy
663
- rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
664
-
665
- return rv
666
-
667
635
  @available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
668
636
  @telemetry.send_api_usage_telemetry(
669
637
  project=_PROJECT,
@@ -703,7 +671,7 @@ class IterativeImputer(BaseTransformer):
703
671
  transform_kwargs = dict(
704
672
  session=dataset._session,
705
673
  dependencies=self._deps,
706
- pass_through_cols=self._get_pass_through_columns(dataset),
674
+ drop_input_cols = self._drop_input_cols,
707
675
  expected_output_cols_type="float",
708
676
  )
709
677
 
@@ -768,7 +736,7 @@ class IterativeImputer(BaseTransformer):
768
736
  transform_kwargs = dict(
769
737
  session=dataset._session,
770
738
  dependencies=self._deps,
771
- pass_through_cols=self._get_pass_through_columns(dataset),
739
+ drop_input_cols = self._drop_input_cols,
772
740
  expected_output_cols_type="float",
773
741
  )
774
742
  elif isinstance(dataset, pd.DataFrame):
@@ -829,7 +797,7 @@ class IterativeImputer(BaseTransformer):
829
797
  transform_kwargs = dict(
830
798
  session=dataset._session,
831
799
  dependencies=self._deps,
832
- pass_through_cols=self._get_pass_through_columns(dataset),
800
+ drop_input_cols = self._drop_input_cols,
833
801
  expected_output_cols_type="float",
834
802
  )
835
803
 
@@ -894,7 +862,7 @@ class IterativeImputer(BaseTransformer):
894
862
  transform_kwargs = dict(
895
863
  session=dataset._session,
896
864
  dependencies=self._deps,
897
- pass_through_cols=self._get_pass_through_columns(dataset),
865
+ drop_input_cols = self._drop_input_cols,
898
866
  expected_output_cols_type="float",
899
867
  )
900
868
 
@@ -948,13 +916,17 @@ class IterativeImputer(BaseTransformer):
948
916
  transform_kwargs: ScoreKwargsTypedDict = dict()
949
917
 
950
918
  if isinstance(dataset, DataFrame):
919
+ self._deps = self._batch_inference_validate_snowpark(
920
+ dataset=dataset,
921
+ inference_method="score",
922
+ )
951
923
  selected_cols = self._get_active_columns()
952
924
  if len(selected_cols) > 0:
953
925
  dataset = dataset.select(selected_cols)
954
926
  assert isinstance(dataset._session, Session) # keep mypy happy
955
927
  transform_kwargs = dict(
956
928
  session=dataset._session,
957
- dependencies=["snowflake-snowpark-python"] + self._get_dependencies(),
929
+ dependencies=["snowflake-snowpark-python"] + self._deps,
958
930
  score_sproc_imports=['sklearn'],
959
931
  )
960
932
  elif isinstance(dataset, pd.DataFrame):
@@ -1028,9 +1000,9 @@ class IterativeImputer(BaseTransformer):
1028
1000
  transform_kwargs = dict(
1029
1001
  session = dataset._session,
1030
1002
  dependencies = self._deps,
1031
- pass_through_cols = self._get_pass_through_columns(dataset),
1032
- expected_output_cols_type = "array",
1033
- n_neighbors = n_neighbors,
1003
+ drop_input_cols = self._drop_input_cols,
1004
+ expected_output_cols_type="array",
1005
+ n_neighbors = n_neighbors,
1034
1006
  return_distance = return_distance
1035
1007
  )
1036
1008
  elif isinstance(dataset, pd.DataFrame):
@@ -311,18 +311,24 @@ class KNNImputer(BaseTransformer):
311
311
  self._get_model_signatures(dataset)
312
312
  return self
313
313
 
314
- def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
315
- if self._drop_input_cols:
316
- return []
317
- else:
318
- return list(set(dataset.columns) - set(self.output_cols))
319
-
320
314
  def _batch_inference_validate_snowpark(
321
315
  self,
322
316
  dataset: DataFrame,
323
317
  inference_method: str,
324
318
  ) -> List[str]:
325
- """Util method to run validate that batch inference can be run on a snowpark dataframe.
319
+ """Util method to run validate that batch inference can be run on a snowpark dataframe and
320
+ return the available package that exists in the snowflake anaconda channel
321
+
322
+ Args:
323
+ dataset: snowpark dataframe
324
+ inference_method: the inference method such as predict, score...
325
+
326
+ Raises:
327
+ SnowflakeMLException: If the estimator is not fitted, raise error
328
+ SnowflakeMLException: If the session is None, raise error
329
+
330
+ Returns:
331
+ A list of available package that exists in the snowflake anaconda channel
326
332
  """
327
333
  if not self._is_fitted:
328
334
  raise exceptions.SnowflakeMLException(
@@ -394,7 +400,7 @@ class KNNImputer(BaseTransformer):
394
400
  transform_kwargs = dict(
395
401
  session = dataset._session,
396
402
  dependencies = self._deps,
397
- pass_through_cols = self._get_pass_through_columns(dataset),
403
+ drop_input_cols = self._drop_input_cols,
398
404
  expected_output_cols_type = expected_type_inferred,
399
405
  )
400
406
 
@@ -456,16 +462,16 @@ class KNNImputer(BaseTransformer):
456
462
  # from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
457
463
  # based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
458
464
  # each row containing a list of values.
459
- expected_dtype = "ARRAY"
465
+ expected_dtype = "array"
460
466
 
461
467
  # If we were unable to assign a type to this transform in the factory, infer the type here.
462
468
  if expected_dtype == "":
463
- # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
469
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
464
470
  if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
465
- expected_dtype = "ARRAY"
466
- # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
471
+ expected_dtype = "array"
472
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
467
473
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
468
- expected_dtype = "ARRAY"
474
+ expected_dtype = "array"
469
475
  else:
470
476
  output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
471
477
  # We can only infer the output types from the input types if the following two statemetns are true:
@@ -483,7 +489,7 @@ class KNNImputer(BaseTransformer):
483
489
  transform_kwargs = dict(
484
490
  session = dataset._session,
485
491
  dependencies = self._deps,
486
- pass_through_cols = self._get_pass_through_columns(dataset),
492
+ drop_input_cols = self._drop_input_cols,
487
493
  expected_output_cols_type = expected_dtype,
488
494
  )
489
495
 
@@ -534,7 +540,7 @@ class KNNImputer(BaseTransformer):
534
540
  subproject=_SUBPROJECT,
535
541
  )
536
542
  output_result, fitted_estimator = model_trainer.train_fit_predict(
537
- pass_through_columns=self._get_pass_through_columns(dataset),
543
+ drop_input_cols=self._drop_input_cols,
538
544
  expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
539
545
  )
540
546
  self._sklearn_object = fitted_estimator
@@ -552,44 +558,6 @@ class KNNImputer(BaseTransformer):
552
558
  assert self._sklearn_object is not None
553
559
  return self._sklearn_object.embedding_
554
560
 
555
-
556
- def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
557
- """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
558
- Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
559
- """
560
- output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
561
- if output_cols:
562
- output_cols = [
563
- identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
564
- for c in output_cols
565
- ]
566
- elif getattr(self._sklearn_object, "classes_", None) is None:
567
- output_cols = [output_cols_prefix]
568
- elif self._sklearn_object is not None:
569
- classes = self._sklearn_object.classes_
570
- if isinstance(classes, numpy.ndarray):
571
- output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
572
- elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
573
- # If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
574
- output_cols = []
575
- for i, cl in enumerate(classes):
576
- # For binary classification, there is only one output column for each class
577
- # ndarray as the two classes are complementary.
578
- if len(cl) == 2:
579
- output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
580
- else:
581
- output_cols.extend([
582
- f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
583
- ])
584
- else:
585
- output_cols = []
586
-
587
- # Make sure column names are valid snowflake identifiers.
588
- assert output_cols is not None # Make MyPy happy
589
- rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
590
-
591
- return rv
592
-
593
561
  @available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
594
562
  @telemetry.send_api_usage_telemetry(
595
563
  project=_PROJECT,
@@ -629,7 +597,7 @@ class KNNImputer(BaseTransformer):
629
597
  transform_kwargs = dict(
630
598
  session=dataset._session,
631
599
  dependencies=self._deps,
632
- pass_through_cols=self._get_pass_through_columns(dataset),
600
+ drop_input_cols = self._drop_input_cols,
633
601
  expected_output_cols_type="float",
634
602
  )
635
603
 
@@ -694,7 +662,7 @@ class KNNImputer(BaseTransformer):
694
662
  transform_kwargs = dict(
695
663
  session=dataset._session,
696
664
  dependencies=self._deps,
697
- pass_through_cols=self._get_pass_through_columns(dataset),
665
+ drop_input_cols = self._drop_input_cols,
698
666
  expected_output_cols_type="float",
699
667
  )
700
668
  elif isinstance(dataset, pd.DataFrame):
@@ -755,7 +723,7 @@ class KNNImputer(BaseTransformer):
755
723
  transform_kwargs = dict(
756
724
  session=dataset._session,
757
725
  dependencies=self._deps,
758
- pass_through_cols=self._get_pass_through_columns(dataset),
726
+ drop_input_cols = self._drop_input_cols,
759
727
  expected_output_cols_type="float",
760
728
  )
761
729
 
@@ -820,7 +788,7 @@ class KNNImputer(BaseTransformer):
820
788
  transform_kwargs = dict(
821
789
  session=dataset._session,
822
790
  dependencies=self._deps,
823
- pass_through_cols=self._get_pass_through_columns(dataset),
791
+ drop_input_cols = self._drop_input_cols,
824
792
  expected_output_cols_type="float",
825
793
  )
826
794
 
@@ -874,13 +842,17 @@ class KNNImputer(BaseTransformer):
874
842
  transform_kwargs: ScoreKwargsTypedDict = dict()
875
843
 
876
844
  if isinstance(dataset, DataFrame):
845
+ self._deps = self._batch_inference_validate_snowpark(
846
+ dataset=dataset,
847
+ inference_method="score",
848
+ )
877
849
  selected_cols = self._get_active_columns()
878
850
  if len(selected_cols) > 0:
879
851
  dataset = dataset.select(selected_cols)
880
852
  assert isinstance(dataset._session, Session) # keep mypy happy
881
853
  transform_kwargs = dict(
882
854
  session=dataset._session,
883
- dependencies=["snowflake-snowpark-python"] + self._get_dependencies(),
855
+ dependencies=["snowflake-snowpark-python"] + self._deps,
884
856
  score_sproc_imports=['sklearn'],
885
857
  )
886
858
  elif isinstance(dataset, pd.DataFrame):
@@ -954,9 +926,9 @@ class KNNImputer(BaseTransformer):
954
926
  transform_kwargs = dict(
955
927
  session = dataset._session,
956
928
  dependencies = self._deps,
957
- pass_through_cols = self._get_pass_through_columns(dataset),
958
- expected_output_cols_type = "array",
959
- n_neighbors = n_neighbors,
929
+ drop_input_cols = self._drop_input_cols,
930
+ expected_output_cols_type="array",
931
+ n_neighbors = n_neighbors,
960
932
  return_distance = return_distance
961
933
  )
962
934
  elif isinstance(dataset, pd.DataFrame):