snowflake-ml-python 1.3.0__py3-none-any.whl → 1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (211) hide show
  1. snowflake/ml/_internal/file_utils.py +3 -3
  2. snowflake/ml/_internal/human_readable_id/adjectives.txt +128 -0
  3. snowflake/ml/_internal/human_readable_id/animals.txt +128 -0
  4. snowflake/ml/_internal/human_readable_id/hrid_generator.py +40 -0
  5. snowflake/ml/_internal/human_readable_id/hrid_generator_base.py +135 -0
  6. snowflake/ml/_internal/telemetry.py +11 -2
  7. snowflake/ml/_internal/utils/formatting.py +1 -1
  8. snowflake/ml/feature_store/feature_store.py +15 -106
  9. snowflake/ml/fileset/sfcfs.py +4 -3
  10. snowflake/ml/fileset/stage_fs.py +18 -0
  11. snowflake/ml/model/_api.py +9 -9
  12. snowflake/ml/model/_client/model/model_version_impl.py +20 -15
  13. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +3 -9
  14. snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +3 -5
  15. snowflake/ml/model/_deploy_client/snowservice/deploy.py +7 -6
  16. snowflake/ml/model/_model_composer/model_composer.py +10 -8
  17. snowflake/ml/model/_model_composer/model_method/function_generator.py +1 -1
  18. snowflake/ml/model/_model_composer/model_method/infer_table_function.py_template +2 -1
  19. snowflake/ml/model/_model_composer/model_method/model_method.py +2 -2
  20. snowflake/ml/model/_model_composer/model_runtime/_runtime_requirements.py +1 -1
  21. snowflake/ml/model/_packager/model_handlers/_base.py +2 -2
  22. snowflake/ml/model/_packager/model_handlers/_utils.py +5 -5
  23. snowflake/ml/model/_packager/model_handlers/custom.py +7 -7
  24. snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +2 -2
  25. snowflake/ml/model/_packager/model_handlers/llm.py +1 -1
  26. snowflake/ml/model/_packager/model_handlers/mlflow.py +1 -1
  27. snowflake/ml/model/_packager/model_handlers/pytorch.py +13 -10
  28. snowflake/ml/model/_packager/model_handlers/sentence_transformers.py +214 -0
  29. snowflake/ml/model/_packager/model_handlers/sklearn.py +6 -6
  30. snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +15 -3
  31. snowflake/ml/model/_packager/model_handlers/tensorflow.py +8 -8
  32. snowflake/ml/model/_packager/model_handlers/torchscript.py +7 -7
  33. snowflake/ml/model/_packager/model_handlers/xgboost.py +8 -8
  34. snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
  35. snowflake/ml/model/_packager/model_packager.py +8 -6
  36. snowflake/ml/model/custom_model.py +3 -1
  37. snowflake/ml/model/type_hints.py +13 -0
  38. snowflake/ml/modeling/_internal/estimator_utils.py +61 -1
  39. snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +4 -43
  40. snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py +4 -4
  41. snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_handlers.py +21 -17
  42. snowflake/ml/modeling/_internal/model_specifications.py +3 -1
  43. snowflake/ml/modeling/_internal/model_trainer.py +2 -2
  44. snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +547 -1
  45. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +67 -114
  46. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +9 -9
  47. snowflake/ml/modeling/_internal/transformer_protocols.py +2 -3
  48. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +33 -61
  49. snowflake/ml/modeling/cluster/affinity_propagation.py +33 -61
  50. snowflake/ml/modeling/cluster/agglomerative_clustering.py +33 -61
  51. snowflake/ml/modeling/cluster/birch.py +33 -61
  52. snowflake/ml/modeling/cluster/bisecting_k_means.py +33 -61
  53. snowflake/ml/modeling/cluster/dbscan.py +33 -61
  54. snowflake/ml/modeling/cluster/feature_agglomeration.py +33 -61
  55. snowflake/ml/modeling/cluster/k_means.py +33 -61
  56. snowflake/ml/modeling/cluster/mean_shift.py +33 -61
  57. snowflake/ml/modeling/cluster/mini_batch_k_means.py +33 -61
  58. snowflake/ml/modeling/cluster/optics.py +33 -61
  59. snowflake/ml/modeling/cluster/spectral_biclustering.py +33 -61
  60. snowflake/ml/modeling/cluster/spectral_clustering.py +33 -61
  61. snowflake/ml/modeling/cluster/spectral_coclustering.py +33 -61
  62. snowflake/ml/modeling/compose/column_transformer.py +33 -61
  63. snowflake/ml/modeling/compose/transformed_target_regressor.py +33 -61
  64. snowflake/ml/modeling/covariance/elliptic_envelope.py +33 -61
  65. snowflake/ml/modeling/covariance/empirical_covariance.py +33 -61
  66. snowflake/ml/modeling/covariance/graphical_lasso.py +33 -61
  67. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +33 -61
  68. snowflake/ml/modeling/covariance/ledoit_wolf.py +33 -61
  69. snowflake/ml/modeling/covariance/min_cov_det.py +33 -61
  70. snowflake/ml/modeling/covariance/oas.py +33 -61
  71. snowflake/ml/modeling/covariance/shrunk_covariance.py +33 -61
  72. snowflake/ml/modeling/decomposition/dictionary_learning.py +33 -61
  73. snowflake/ml/modeling/decomposition/factor_analysis.py +33 -61
  74. snowflake/ml/modeling/decomposition/fast_ica.py +33 -61
  75. snowflake/ml/modeling/decomposition/incremental_pca.py +33 -61
  76. snowflake/ml/modeling/decomposition/kernel_pca.py +33 -61
  77. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +33 -61
  78. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +33 -61
  79. snowflake/ml/modeling/decomposition/pca.py +33 -61
  80. snowflake/ml/modeling/decomposition/sparse_pca.py +33 -61
  81. snowflake/ml/modeling/decomposition/truncated_svd.py +33 -61
  82. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +33 -61
  83. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +33 -61
  84. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +33 -61
  85. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +33 -61
  86. snowflake/ml/modeling/ensemble/bagging_classifier.py +33 -61
  87. snowflake/ml/modeling/ensemble/bagging_regressor.py +33 -61
  88. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +33 -61
  89. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +33 -61
  90. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +33 -61
  91. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +33 -61
  92. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +33 -61
  93. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +33 -61
  94. snowflake/ml/modeling/ensemble/isolation_forest.py +33 -61
  95. snowflake/ml/modeling/ensemble/random_forest_classifier.py +33 -61
  96. snowflake/ml/modeling/ensemble/random_forest_regressor.py +33 -61
  97. snowflake/ml/modeling/ensemble/stacking_regressor.py +33 -61
  98. snowflake/ml/modeling/ensemble/voting_classifier.py +33 -61
  99. snowflake/ml/modeling/ensemble/voting_regressor.py +33 -61
  100. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +33 -61
  101. snowflake/ml/modeling/feature_selection/select_fdr.py +33 -61
  102. snowflake/ml/modeling/feature_selection/select_fpr.py +33 -61
  103. snowflake/ml/modeling/feature_selection/select_fwe.py +33 -61
  104. snowflake/ml/modeling/feature_selection/select_k_best.py +33 -61
  105. snowflake/ml/modeling/feature_selection/select_percentile.py +33 -61
  106. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +33 -61
  107. snowflake/ml/modeling/feature_selection/variance_threshold.py +33 -61
  108. snowflake/ml/modeling/framework/base.py +55 -5
  109. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +33 -61
  110. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +33 -61
  111. snowflake/ml/modeling/impute/iterative_imputer.py +33 -61
  112. snowflake/ml/modeling/impute/knn_imputer.py +33 -61
  113. snowflake/ml/modeling/impute/missing_indicator.py +33 -61
  114. snowflake/ml/modeling/impute/simple_imputer.py +4 -15
  115. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +33 -61
  116. snowflake/ml/modeling/kernel_approximation/nystroem.py +33 -61
  117. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +33 -61
  118. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +33 -61
  119. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +33 -61
  120. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +33 -61
  121. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +36 -63
  122. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +36 -63
  123. snowflake/ml/modeling/linear_model/ard_regression.py +33 -61
  124. snowflake/ml/modeling/linear_model/bayesian_ridge.py +33 -61
  125. snowflake/ml/modeling/linear_model/elastic_net.py +33 -61
  126. snowflake/ml/modeling/linear_model/elastic_net_cv.py +33 -61
  127. snowflake/ml/modeling/linear_model/gamma_regressor.py +33 -61
  128. snowflake/ml/modeling/linear_model/huber_regressor.py +33 -61
  129. snowflake/ml/modeling/linear_model/lars.py +33 -61
  130. snowflake/ml/modeling/linear_model/lars_cv.py +33 -61
  131. snowflake/ml/modeling/linear_model/lasso.py +33 -61
  132. snowflake/ml/modeling/linear_model/lasso_cv.py +33 -61
  133. snowflake/ml/modeling/linear_model/lasso_lars.py +33 -61
  134. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +33 -61
  135. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +33 -61
  136. snowflake/ml/modeling/linear_model/linear_regression.py +33 -61
  137. snowflake/ml/modeling/linear_model/logistic_regression.py +33 -61
  138. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +33 -61
  139. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +33 -61
  140. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +33 -61
  141. snowflake/ml/modeling/linear_model/multi_task_lasso.py +33 -61
  142. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +33 -61
  143. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +33 -61
  144. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +33 -61
  145. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +33 -61
  146. snowflake/ml/modeling/linear_model/perceptron.py +33 -61
  147. snowflake/ml/modeling/linear_model/poisson_regressor.py +33 -61
  148. snowflake/ml/modeling/linear_model/ransac_regressor.py +33 -61
  149. snowflake/ml/modeling/linear_model/ridge.py +33 -61
  150. snowflake/ml/modeling/linear_model/ridge_classifier.py +33 -61
  151. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +33 -61
  152. snowflake/ml/modeling/linear_model/ridge_cv.py +33 -61
  153. snowflake/ml/modeling/linear_model/sgd_classifier.py +33 -61
  154. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +33 -61
  155. snowflake/ml/modeling/linear_model/sgd_regressor.py +33 -61
  156. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +33 -61
  157. snowflake/ml/modeling/linear_model/tweedie_regressor.py +33 -61
  158. snowflake/ml/modeling/manifold/isomap.py +33 -61
  159. snowflake/ml/modeling/manifold/mds.py +33 -61
  160. snowflake/ml/modeling/manifold/spectral_embedding.py +33 -61
  161. snowflake/ml/modeling/manifold/tsne.py +33 -61
  162. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +33 -61
  163. snowflake/ml/modeling/mixture/gaussian_mixture.py +33 -61
  164. snowflake/ml/modeling/model_selection/grid_search_cv.py +39 -57
  165. snowflake/ml/modeling/model_selection/randomized_search_cv.py +26 -57
  166. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +33 -61
  167. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +33 -61
  168. snowflake/ml/modeling/multiclass/output_code_classifier.py +33 -61
  169. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +33 -61
  170. snowflake/ml/modeling/naive_bayes/categorical_nb.py +33 -61
  171. snowflake/ml/modeling/naive_bayes/complement_nb.py +33 -61
  172. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +33 -61
  173. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +33 -61
  174. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +33 -61
  175. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +33 -61
  176. snowflake/ml/modeling/neighbors/kernel_density.py +33 -61
  177. snowflake/ml/modeling/neighbors/local_outlier_factor.py +33 -61
  178. snowflake/ml/modeling/neighbors/nearest_centroid.py +33 -61
  179. snowflake/ml/modeling/neighbors/nearest_neighbors.py +33 -61
  180. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +33 -61
  181. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +33 -61
  182. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +33 -61
  183. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +33 -61
  184. snowflake/ml/modeling/neural_network/mlp_classifier.py +33 -61
  185. snowflake/ml/modeling/neural_network/mlp_regressor.py +33 -61
  186. snowflake/ml/modeling/preprocessing/polynomial_features.py +33 -61
  187. snowflake/ml/modeling/semi_supervised/label_propagation.py +33 -61
  188. snowflake/ml/modeling/semi_supervised/label_spreading.py +33 -61
  189. snowflake/ml/modeling/svm/linear_svc.py +33 -61
  190. snowflake/ml/modeling/svm/linear_svr.py +33 -61
  191. snowflake/ml/modeling/svm/nu_svc.py +33 -61
  192. snowflake/ml/modeling/svm/nu_svr.py +33 -61
  193. snowflake/ml/modeling/svm/svc.py +33 -61
  194. snowflake/ml/modeling/svm/svr.py +33 -61
  195. snowflake/ml/modeling/tree/decision_tree_classifier.py +33 -61
  196. snowflake/ml/modeling/tree/decision_tree_regressor.py +33 -61
  197. snowflake/ml/modeling/tree/extra_tree_classifier.py +33 -61
  198. snowflake/ml/modeling/tree/extra_tree_regressor.py +33 -61
  199. snowflake/ml/modeling/xgboost/xgb_classifier.py +33 -61
  200. snowflake/ml/modeling/xgboost/xgb_regressor.py +33 -61
  201. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +33 -61
  202. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +33 -61
  203. snowflake/ml/registry/_manager/model_manager.py +6 -2
  204. snowflake/ml/registry/model_registry.py +100 -27
  205. snowflake/ml/registry/registry.py +6 -2
  206. snowflake/ml/version.py +1 -1
  207. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/METADATA +43 -7
  208. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/RECORD +211 -206
  209. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/LICENSE.txt +0 -0
  210. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/WHEEL +0 -0
  211. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/top_level.txt +0 -0
@@ -302,18 +302,24 @@ class AdaBoostRegressor(BaseTransformer):
302
302
  self._get_model_signatures(dataset)
303
303
  return self
304
304
 
305
- def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
306
- if self._drop_input_cols:
307
- return []
308
- else:
309
- return list(set(dataset.columns) - set(self.output_cols))
310
-
311
305
  def _batch_inference_validate_snowpark(
312
306
  self,
313
307
  dataset: DataFrame,
314
308
  inference_method: str,
315
309
  ) -> List[str]:
316
- """Util method to run validate that batch inference can be run on a snowpark dataframe.
310
+ """Util method to run validate that batch inference can be run on a snowpark dataframe and
311
+ return the available package that exists in the snowflake anaconda channel
312
+
313
+ Args:
314
+ dataset: snowpark dataframe
315
+ inference_method: the inference method such as predict, score...
316
+
317
+ Raises:
318
+ SnowflakeMLException: If the estimator is not fitted, raise error
319
+ SnowflakeMLException: If the session is None, raise error
320
+
321
+ Returns:
322
+ A list of available package that exists in the snowflake anaconda channel
317
323
  """
318
324
  if not self._is_fitted:
319
325
  raise exceptions.SnowflakeMLException(
@@ -387,7 +393,7 @@ class AdaBoostRegressor(BaseTransformer):
387
393
  transform_kwargs = dict(
388
394
  session = dataset._session,
389
395
  dependencies = self._deps,
390
- pass_through_cols = self._get_pass_through_columns(dataset),
396
+ drop_input_cols = self._drop_input_cols,
391
397
  expected_output_cols_type = expected_type_inferred,
392
398
  )
393
399
 
@@ -447,16 +453,16 @@ class AdaBoostRegressor(BaseTransformer):
447
453
  # from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
448
454
  # based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
449
455
  # each row containing a list of values.
450
- expected_dtype = "ARRAY"
456
+ expected_dtype = "array"
451
457
 
452
458
  # If we were unable to assign a type to this transform in the factory, infer the type here.
453
459
  if expected_dtype == "":
454
- # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
460
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
455
461
  if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
456
- expected_dtype = "ARRAY"
457
- # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
462
+ expected_dtype = "array"
463
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
458
464
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
459
- expected_dtype = "ARRAY"
465
+ expected_dtype = "array"
460
466
  else:
461
467
  output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
462
468
  # We can only infer the output types from the input types if the following two statemetns are true:
@@ -474,7 +480,7 @@ class AdaBoostRegressor(BaseTransformer):
474
480
  transform_kwargs = dict(
475
481
  session = dataset._session,
476
482
  dependencies = self._deps,
477
- pass_through_cols = self._get_pass_through_columns(dataset),
483
+ drop_input_cols = self._drop_input_cols,
478
484
  expected_output_cols_type = expected_dtype,
479
485
  )
480
486
 
@@ -525,7 +531,7 @@ class AdaBoostRegressor(BaseTransformer):
525
531
  subproject=_SUBPROJECT,
526
532
  )
527
533
  output_result, fitted_estimator = model_trainer.train_fit_predict(
528
- pass_through_columns=self._get_pass_through_columns(dataset),
534
+ drop_input_cols=self._drop_input_cols,
529
535
  expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
530
536
  )
531
537
  self._sklearn_object = fitted_estimator
@@ -543,44 +549,6 @@ class AdaBoostRegressor(BaseTransformer):
543
549
  assert self._sklearn_object is not None
544
550
  return self._sklearn_object.embedding_
545
551
 
546
-
547
- def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
548
- """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
549
- Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
550
- """
551
- output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
552
- if output_cols:
553
- output_cols = [
554
- identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
555
- for c in output_cols
556
- ]
557
- elif getattr(self._sklearn_object, "classes_", None) is None:
558
- output_cols = [output_cols_prefix]
559
- elif self._sklearn_object is not None:
560
- classes = self._sklearn_object.classes_
561
- if isinstance(classes, numpy.ndarray):
562
- output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
563
- elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
564
- # If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
565
- output_cols = []
566
- for i, cl in enumerate(classes):
567
- # For binary classification, there is only one output column for each class
568
- # ndarray as the two classes are complementary.
569
- if len(cl) == 2:
570
- output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
571
- else:
572
- output_cols.extend([
573
- f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
574
- ])
575
- else:
576
- output_cols = []
577
-
578
- # Make sure column names are valid snowflake identifiers.
579
- assert output_cols is not None # Make MyPy happy
580
- rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
581
-
582
- return rv
583
-
584
552
  @available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
585
553
  @telemetry.send_api_usage_telemetry(
586
554
  project=_PROJECT,
@@ -620,7 +588,7 @@ class AdaBoostRegressor(BaseTransformer):
620
588
  transform_kwargs = dict(
621
589
  session=dataset._session,
622
590
  dependencies=self._deps,
623
- pass_through_cols=self._get_pass_through_columns(dataset),
591
+ drop_input_cols = self._drop_input_cols,
624
592
  expected_output_cols_type="float",
625
593
  )
626
594
 
@@ -685,7 +653,7 @@ class AdaBoostRegressor(BaseTransformer):
685
653
  transform_kwargs = dict(
686
654
  session=dataset._session,
687
655
  dependencies=self._deps,
688
- pass_through_cols=self._get_pass_through_columns(dataset),
656
+ drop_input_cols = self._drop_input_cols,
689
657
  expected_output_cols_type="float",
690
658
  )
691
659
  elif isinstance(dataset, pd.DataFrame):
@@ -746,7 +714,7 @@ class AdaBoostRegressor(BaseTransformer):
746
714
  transform_kwargs = dict(
747
715
  session=dataset._session,
748
716
  dependencies=self._deps,
749
- pass_through_cols=self._get_pass_through_columns(dataset),
717
+ drop_input_cols = self._drop_input_cols,
750
718
  expected_output_cols_type="float",
751
719
  )
752
720
 
@@ -811,7 +779,7 @@ class AdaBoostRegressor(BaseTransformer):
811
779
  transform_kwargs = dict(
812
780
  session=dataset._session,
813
781
  dependencies=self._deps,
814
- pass_through_cols=self._get_pass_through_columns(dataset),
782
+ drop_input_cols = self._drop_input_cols,
815
783
  expected_output_cols_type="float",
816
784
  )
817
785
 
@@ -867,13 +835,17 @@ class AdaBoostRegressor(BaseTransformer):
867
835
  transform_kwargs: ScoreKwargsTypedDict = dict()
868
836
 
869
837
  if isinstance(dataset, DataFrame):
838
+ self._deps = self._batch_inference_validate_snowpark(
839
+ dataset=dataset,
840
+ inference_method="score",
841
+ )
870
842
  selected_cols = self._get_active_columns()
871
843
  if len(selected_cols) > 0:
872
844
  dataset = dataset.select(selected_cols)
873
845
  assert isinstance(dataset._session, Session) # keep mypy happy
874
846
  transform_kwargs = dict(
875
847
  session=dataset._session,
876
- dependencies=["snowflake-snowpark-python"] + self._get_dependencies(),
848
+ dependencies=["snowflake-snowpark-python"] + self._deps,
877
849
  score_sproc_imports=['sklearn'],
878
850
  )
879
851
  elif isinstance(dataset, pd.DataFrame):
@@ -947,9 +919,9 @@ class AdaBoostRegressor(BaseTransformer):
947
919
  transform_kwargs = dict(
948
920
  session = dataset._session,
949
921
  dependencies = self._deps,
950
- pass_through_cols = self._get_pass_through_columns(dataset),
951
- expected_output_cols_type = "array",
952
- n_neighbors = n_neighbors,
922
+ drop_input_cols = self._drop_input_cols,
923
+ expected_output_cols_type="array",
924
+ n_neighbors = n_neighbors,
953
925
  return_distance = return_distance
954
926
  )
955
927
  elif isinstance(dataset, pd.DataFrame):
@@ -337,18 +337,24 @@ class BaggingClassifier(BaseTransformer):
337
337
  self._get_model_signatures(dataset)
338
338
  return self
339
339
 
340
- def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
341
- if self._drop_input_cols:
342
- return []
343
- else:
344
- return list(set(dataset.columns) - set(self.output_cols))
345
-
346
340
  def _batch_inference_validate_snowpark(
347
341
  self,
348
342
  dataset: DataFrame,
349
343
  inference_method: str,
350
344
  ) -> List[str]:
351
- """Util method to run validate that batch inference can be run on a snowpark dataframe.
345
+ """Util method to run validate that batch inference can be run on a snowpark dataframe and
346
+ return the available package that exists in the snowflake anaconda channel
347
+
348
+ Args:
349
+ dataset: snowpark dataframe
350
+ inference_method: the inference method such as predict, score...
351
+
352
+ Raises:
353
+ SnowflakeMLException: If the estimator is not fitted, raise error
354
+ SnowflakeMLException: If the session is None, raise error
355
+
356
+ Returns:
357
+ A list of available package that exists in the snowflake anaconda channel
352
358
  """
353
359
  if not self._is_fitted:
354
360
  raise exceptions.SnowflakeMLException(
@@ -422,7 +428,7 @@ class BaggingClassifier(BaseTransformer):
422
428
  transform_kwargs = dict(
423
429
  session = dataset._session,
424
430
  dependencies = self._deps,
425
- pass_through_cols = self._get_pass_through_columns(dataset),
431
+ drop_input_cols = self._drop_input_cols,
426
432
  expected_output_cols_type = expected_type_inferred,
427
433
  )
428
434
 
@@ -482,16 +488,16 @@ class BaggingClassifier(BaseTransformer):
482
488
  # from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
483
489
  # based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
484
490
  # each row containing a list of values.
485
- expected_dtype = "ARRAY"
491
+ expected_dtype = "array"
486
492
 
487
493
  # If we were unable to assign a type to this transform in the factory, infer the type here.
488
494
  if expected_dtype == "":
489
- # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
495
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
490
496
  if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
491
- expected_dtype = "ARRAY"
492
- # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
497
+ expected_dtype = "array"
498
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
493
499
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
494
- expected_dtype = "ARRAY"
500
+ expected_dtype = "array"
495
501
  else:
496
502
  output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
497
503
  # We can only infer the output types from the input types if the following two statemetns are true:
@@ -509,7 +515,7 @@ class BaggingClassifier(BaseTransformer):
509
515
  transform_kwargs = dict(
510
516
  session = dataset._session,
511
517
  dependencies = self._deps,
512
- pass_through_cols = self._get_pass_through_columns(dataset),
518
+ drop_input_cols = self._drop_input_cols,
513
519
  expected_output_cols_type = expected_dtype,
514
520
  )
515
521
 
@@ -560,7 +566,7 @@ class BaggingClassifier(BaseTransformer):
560
566
  subproject=_SUBPROJECT,
561
567
  )
562
568
  output_result, fitted_estimator = model_trainer.train_fit_predict(
563
- pass_through_columns=self._get_pass_through_columns(dataset),
569
+ drop_input_cols=self._drop_input_cols,
564
570
  expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
565
571
  )
566
572
  self._sklearn_object = fitted_estimator
@@ -578,44 +584,6 @@ class BaggingClassifier(BaseTransformer):
578
584
  assert self._sklearn_object is not None
579
585
  return self._sklearn_object.embedding_
580
586
 
581
-
582
- def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
583
- """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
584
- Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
585
- """
586
- output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
587
- if output_cols:
588
- output_cols = [
589
- identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
590
- for c in output_cols
591
- ]
592
- elif getattr(self._sklearn_object, "classes_", None) is None:
593
- output_cols = [output_cols_prefix]
594
- elif self._sklearn_object is not None:
595
- classes = self._sklearn_object.classes_
596
- if isinstance(classes, numpy.ndarray):
597
- output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
598
- elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
599
- # If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
600
- output_cols = []
601
- for i, cl in enumerate(classes):
602
- # For binary classification, there is only one output column for each class
603
- # ndarray as the two classes are complementary.
604
- if len(cl) == 2:
605
- output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
606
- else:
607
- output_cols.extend([
608
- f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
609
- ])
610
- else:
611
- output_cols = []
612
-
613
- # Make sure column names are valid snowflake identifiers.
614
- assert output_cols is not None # Make MyPy happy
615
- rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
616
-
617
- return rv
618
-
619
587
  @available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
620
588
  @telemetry.send_api_usage_telemetry(
621
589
  project=_PROJECT,
@@ -657,7 +625,7 @@ class BaggingClassifier(BaseTransformer):
657
625
  transform_kwargs = dict(
658
626
  session=dataset._session,
659
627
  dependencies=self._deps,
660
- pass_through_cols=self._get_pass_through_columns(dataset),
628
+ drop_input_cols = self._drop_input_cols,
661
629
  expected_output_cols_type="float",
662
630
  )
663
631
 
@@ -724,7 +692,7 @@ class BaggingClassifier(BaseTransformer):
724
692
  transform_kwargs = dict(
725
693
  session=dataset._session,
726
694
  dependencies=self._deps,
727
- pass_through_cols=self._get_pass_through_columns(dataset),
695
+ drop_input_cols = self._drop_input_cols,
728
696
  expected_output_cols_type="float",
729
697
  )
730
698
  elif isinstance(dataset, pd.DataFrame):
@@ -787,7 +755,7 @@ class BaggingClassifier(BaseTransformer):
787
755
  transform_kwargs = dict(
788
756
  session=dataset._session,
789
757
  dependencies=self._deps,
790
- pass_through_cols=self._get_pass_through_columns(dataset),
758
+ drop_input_cols = self._drop_input_cols,
791
759
  expected_output_cols_type="float",
792
760
  )
793
761
 
@@ -852,7 +820,7 @@ class BaggingClassifier(BaseTransformer):
852
820
  transform_kwargs = dict(
853
821
  session=dataset._session,
854
822
  dependencies=self._deps,
855
- pass_through_cols=self._get_pass_through_columns(dataset),
823
+ drop_input_cols = self._drop_input_cols,
856
824
  expected_output_cols_type="float",
857
825
  )
858
826
 
@@ -908,13 +876,17 @@ class BaggingClassifier(BaseTransformer):
908
876
  transform_kwargs: ScoreKwargsTypedDict = dict()
909
877
 
910
878
  if isinstance(dataset, DataFrame):
879
+ self._deps = self._batch_inference_validate_snowpark(
880
+ dataset=dataset,
881
+ inference_method="score",
882
+ )
911
883
  selected_cols = self._get_active_columns()
912
884
  if len(selected_cols) > 0:
913
885
  dataset = dataset.select(selected_cols)
914
886
  assert isinstance(dataset._session, Session) # keep mypy happy
915
887
  transform_kwargs = dict(
916
888
  session=dataset._session,
917
- dependencies=["snowflake-snowpark-python"] + self._get_dependencies(),
889
+ dependencies=["snowflake-snowpark-python"] + self._deps,
918
890
  score_sproc_imports=['sklearn'],
919
891
  )
920
892
  elif isinstance(dataset, pd.DataFrame):
@@ -988,9 +960,9 @@ class BaggingClassifier(BaseTransformer):
988
960
  transform_kwargs = dict(
989
961
  session = dataset._session,
990
962
  dependencies = self._deps,
991
- pass_through_cols = self._get_pass_through_columns(dataset),
992
- expected_output_cols_type = "array",
993
- n_neighbors = n_neighbors,
963
+ drop_input_cols = self._drop_input_cols,
964
+ expected_output_cols_type="array",
965
+ n_neighbors = n_neighbors,
994
966
  return_distance = return_distance
995
967
  )
996
968
  elif isinstance(dataset, pd.DataFrame):
@@ -337,18 +337,24 @@ class BaggingRegressor(BaseTransformer):
337
337
  self._get_model_signatures(dataset)
338
338
  return self
339
339
 
340
- def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
341
- if self._drop_input_cols:
342
- return []
343
- else:
344
- return list(set(dataset.columns) - set(self.output_cols))
345
-
346
340
  def _batch_inference_validate_snowpark(
347
341
  self,
348
342
  dataset: DataFrame,
349
343
  inference_method: str,
350
344
  ) -> List[str]:
351
- """Util method to run validate that batch inference can be run on a snowpark dataframe.
345
+ """Util method to run validate that batch inference can be run on a snowpark dataframe and
346
+ return the available package that exists in the snowflake anaconda channel
347
+
348
+ Args:
349
+ dataset: snowpark dataframe
350
+ inference_method: the inference method such as predict, score...
351
+
352
+ Raises:
353
+ SnowflakeMLException: If the estimator is not fitted, raise error
354
+ SnowflakeMLException: If the session is None, raise error
355
+
356
+ Returns:
357
+ A list of available package that exists in the snowflake anaconda channel
352
358
  """
353
359
  if not self._is_fitted:
354
360
  raise exceptions.SnowflakeMLException(
@@ -422,7 +428,7 @@ class BaggingRegressor(BaseTransformer):
422
428
  transform_kwargs = dict(
423
429
  session = dataset._session,
424
430
  dependencies = self._deps,
425
- pass_through_cols = self._get_pass_through_columns(dataset),
431
+ drop_input_cols = self._drop_input_cols,
426
432
  expected_output_cols_type = expected_type_inferred,
427
433
  )
428
434
 
@@ -482,16 +488,16 @@ class BaggingRegressor(BaseTransformer):
482
488
  # from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
483
489
  # based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
484
490
  # each row containing a list of values.
485
- expected_dtype = "ARRAY"
491
+ expected_dtype = "array"
486
492
 
487
493
  # If we were unable to assign a type to this transform in the factory, infer the type here.
488
494
  if expected_dtype == "":
489
- # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
495
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
490
496
  if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
491
- expected_dtype = "ARRAY"
492
- # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
497
+ expected_dtype = "array"
498
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
493
499
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
494
- expected_dtype = "ARRAY"
500
+ expected_dtype = "array"
495
501
  else:
496
502
  output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
497
503
  # We can only infer the output types from the input types if the following two statemetns are true:
@@ -509,7 +515,7 @@ class BaggingRegressor(BaseTransformer):
509
515
  transform_kwargs = dict(
510
516
  session = dataset._session,
511
517
  dependencies = self._deps,
512
- pass_through_cols = self._get_pass_through_columns(dataset),
518
+ drop_input_cols = self._drop_input_cols,
513
519
  expected_output_cols_type = expected_dtype,
514
520
  )
515
521
 
@@ -560,7 +566,7 @@ class BaggingRegressor(BaseTransformer):
560
566
  subproject=_SUBPROJECT,
561
567
  )
562
568
  output_result, fitted_estimator = model_trainer.train_fit_predict(
563
- pass_through_columns=self._get_pass_through_columns(dataset),
569
+ drop_input_cols=self._drop_input_cols,
564
570
  expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
565
571
  )
566
572
  self._sklearn_object = fitted_estimator
@@ -578,44 +584,6 @@ class BaggingRegressor(BaseTransformer):
578
584
  assert self._sklearn_object is not None
579
585
  return self._sklearn_object.embedding_
580
586
 
581
-
582
- def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
583
- """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
584
- Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
585
- """
586
- output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
587
- if output_cols:
588
- output_cols = [
589
- identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
590
- for c in output_cols
591
- ]
592
- elif getattr(self._sklearn_object, "classes_", None) is None:
593
- output_cols = [output_cols_prefix]
594
- elif self._sklearn_object is not None:
595
- classes = self._sklearn_object.classes_
596
- if isinstance(classes, numpy.ndarray):
597
- output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
598
- elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
599
- # If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
600
- output_cols = []
601
- for i, cl in enumerate(classes):
602
- # For binary classification, there is only one output column for each class
603
- # ndarray as the two classes are complementary.
604
- if len(cl) == 2:
605
- output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
606
- else:
607
- output_cols.extend([
608
- f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
609
- ])
610
- else:
611
- output_cols = []
612
-
613
- # Make sure column names are valid snowflake identifiers.
614
- assert output_cols is not None # Make MyPy happy
615
- rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
616
-
617
- return rv
618
-
619
587
  @available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
620
588
  @telemetry.send_api_usage_telemetry(
621
589
  project=_PROJECT,
@@ -655,7 +623,7 @@ class BaggingRegressor(BaseTransformer):
655
623
  transform_kwargs = dict(
656
624
  session=dataset._session,
657
625
  dependencies=self._deps,
658
- pass_through_cols=self._get_pass_through_columns(dataset),
626
+ drop_input_cols = self._drop_input_cols,
659
627
  expected_output_cols_type="float",
660
628
  )
661
629
 
@@ -720,7 +688,7 @@ class BaggingRegressor(BaseTransformer):
720
688
  transform_kwargs = dict(
721
689
  session=dataset._session,
722
690
  dependencies=self._deps,
723
- pass_through_cols=self._get_pass_through_columns(dataset),
691
+ drop_input_cols = self._drop_input_cols,
724
692
  expected_output_cols_type="float",
725
693
  )
726
694
  elif isinstance(dataset, pd.DataFrame):
@@ -781,7 +749,7 @@ class BaggingRegressor(BaseTransformer):
781
749
  transform_kwargs = dict(
782
750
  session=dataset._session,
783
751
  dependencies=self._deps,
784
- pass_through_cols=self._get_pass_through_columns(dataset),
752
+ drop_input_cols = self._drop_input_cols,
785
753
  expected_output_cols_type="float",
786
754
  )
787
755
 
@@ -846,7 +814,7 @@ class BaggingRegressor(BaseTransformer):
846
814
  transform_kwargs = dict(
847
815
  session=dataset._session,
848
816
  dependencies=self._deps,
849
- pass_through_cols=self._get_pass_through_columns(dataset),
817
+ drop_input_cols = self._drop_input_cols,
850
818
  expected_output_cols_type="float",
851
819
  )
852
820
 
@@ -902,13 +870,17 @@ class BaggingRegressor(BaseTransformer):
902
870
  transform_kwargs: ScoreKwargsTypedDict = dict()
903
871
 
904
872
  if isinstance(dataset, DataFrame):
873
+ self._deps = self._batch_inference_validate_snowpark(
874
+ dataset=dataset,
875
+ inference_method="score",
876
+ )
905
877
  selected_cols = self._get_active_columns()
906
878
  if len(selected_cols) > 0:
907
879
  dataset = dataset.select(selected_cols)
908
880
  assert isinstance(dataset._session, Session) # keep mypy happy
909
881
  transform_kwargs = dict(
910
882
  session=dataset._session,
911
- dependencies=["snowflake-snowpark-python"] + self._get_dependencies(),
883
+ dependencies=["snowflake-snowpark-python"] + self._deps,
912
884
  score_sproc_imports=['sklearn'],
913
885
  )
914
886
  elif isinstance(dataset, pd.DataFrame):
@@ -982,9 +954,9 @@ class BaggingRegressor(BaseTransformer):
982
954
  transform_kwargs = dict(
983
955
  session = dataset._session,
984
956
  dependencies = self._deps,
985
- pass_through_cols = self._get_pass_through_columns(dataset),
986
- expected_output_cols_type = "array",
987
- n_neighbors = n_neighbors,
957
+ drop_input_cols = self._drop_input_cols,
958
+ expected_output_cols_type="array",
959
+ n_neighbors = n_neighbors,
988
960
  return_distance = return_distance
989
961
  )
990
962
  elif isinstance(dataset, pd.DataFrame):