snowflake-ml-python 1.3.0__py3-none-any.whl → 1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (211) hide show
  1. snowflake/ml/_internal/file_utils.py +3 -3
  2. snowflake/ml/_internal/human_readable_id/adjectives.txt +128 -0
  3. snowflake/ml/_internal/human_readable_id/animals.txt +128 -0
  4. snowflake/ml/_internal/human_readable_id/hrid_generator.py +40 -0
  5. snowflake/ml/_internal/human_readable_id/hrid_generator_base.py +135 -0
  6. snowflake/ml/_internal/telemetry.py +11 -2
  7. snowflake/ml/_internal/utils/formatting.py +1 -1
  8. snowflake/ml/feature_store/feature_store.py +15 -106
  9. snowflake/ml/fileset/sfcfs.py +4 -3
  10. snowflake/ml/fileset/stage_fs.py +18 -0
  11. snowflake/ml/model/_api.py +9 -9
  12. snowflake/ml/model/_client/model/model_version_impl.py +20 -15
  13. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +3 -9
  14. snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +3 -5
  15. snowflake/ml/model/_deploy_client/snowservice/deploy.py +7 -6
  16. snowflake/ml/model/_model_composer/model_composer.py +10 -8
  17. snowflake/ml/model/_model_composer/model_method/function_generator.py +1 -1
  18. snowflake/ml/model/_model_composer/model_method/infer_table_function.py_template +2 -1
  19. snowflake/ml/model/_model_composer/model_method/model_method.py +2 -2
  20. snowflake/ml/model/_model_composer/model_runtime/_runtime_requirements.py +1 -1
  21. snowflake/ml/model/_packager/model_handlers/_base.py +2 -2
  22. snowflake/ml/model/_packager/model_handlers/_utils.py +5 -5
  23. snowflake/ml/model/_packager/model_handlers/custom.py +7 -7
  24. snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +2 -2
  25. snowflake/ml/model/_packager/model_handlers/llm.py +1 -1
  26. snowflake/ml/model/_packager/model_handlers/mlflow.py +1 -1
  27. snowflake/ml/model/_packager/model_handlers/pytorch.py +13 -10
  28. snowflake/ml/model/_packager/model_handlers/sentence_transformers.py +214 -0
  29. snowflake/ml/model/_packager/model_handlers/sklearn.py +6 -6
  30. snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +15 -3
  31. snowflake/ml/model/_packager/model_handlers/tensorflow.py +8 -8
  32. snowflake/ml/model/_packager/model_handlers/torchscript.py +7 -7
  33. snowflake/ml/model/_packager/model_handlers/xgboost.py +8 -8
  34. snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
  35. snowflake/ml/model/_packager/model_packager.py +8 -6
  36. snowflake/ml/model/custom_model.py +3 -1
  37. snowflake/ml/model/type_hints.py +13 -0
  38. snowflake/ml/modeling/_internal/estimator_utils.py +61 -1
  39. snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +4 -43
  40. snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py +4 -4
  41. snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_handlers.py +21 -17
  42. snowflake/ml/modeling/_internal/model_specifications.py +3 -1
  43. snowflake/ml/modeling/_internal/model_trainer.py +2 -2
  44. snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +547 -1
  45. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +67 -114
  46. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +9 -9
  47. snowflake/ml/modeling/_internal/transformer_protocols.py +2 -3
  48. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +33 -61
  49. snowflake/ml/modeling/cluster/affinity_propagation.py +33 -61
  50. snowflake/ml/modeling/cluster/agglomerative_clustering.py +33 -61
  51. snowflake/ml/modeling/cluster/birch.py +33 -61
  52. snowflake/ml/modeling/cluster/bisecting_k_means.py +33 -61
  53. snowflake/ml/modeling/cluster/dbscan.py +33 -61
  54. snowflake/ml/modeling/cluster/feature_agglomeration.py +33 -61
  55. snowflake/ml/modeling/cluster/k_means.py +33 -61
  56. snowflake/ml/modeling/cluster/mean_shift.py +33 -61
  57. snowflake/ml/modeling/cluster/mini_batch_k_means.py +33 -61
  58. snowflake/ml/modeling/cluster/optics.py +33 -61
  59. snowflake/ml/modeling/cluster/spectral_biclustering.py +33 -61
  60. snowflake/ml/modeling/cluster/spectral_clustering.py +33 -61
  61. snowflake/ml/modeling/cluster/spectral_coclustering.py +33 -61
  62. snowflake/ml/modeling/compose/column_transformer.py +33 -61
  63. snowflake/ml/modeling/compose/transformed_target_regressor.py +33 -61
  64. snowflake/ml/modeling/covariance/elliptic_envelope.py +33 -61
  65. snowflake/ml/modeling/covariance/empirical_covariance.py +33 -61
  66. snowflake/ml/modeling/covariance/graphical_lasso.py +33 -61
  67. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +33 -61
  68. snowflake/ml/modeling/covariance/ledoit_wolf.py +33 -61
  69. snowflake/ml/modeling/covariance/min_cov_det.py +33 -61
  70. snowflake/ml/modeling/covariance/oas.py +33 -61
  71. snowflake/ml/modeling/covariance/shrunk_covariance.py +33 -61
  72. snowflake/ml/modeling/decomposition/dictionary_learning.py +33 -61
  73. snowflake/ml/modeling/decomposition/factor_analysis.py +33 -61
  74. snowflake/ml/modeling/decomposition/fast_ica.py +33 -61
  75. snowflake/ml/modeling/decomposition/incremental_pca.py +33 -61
  76. snowflake/ml/modeling/decomposition/kernel_pca.py +33 -61
  77. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +33 -61
  78. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +33 -61
  79. snowflake/ml/modeling/decomposition/pca.py +33 -61
  80. snowflake/ml/modeling/decomposition/sparse_pca.py +33 -61
  81. snowflake/ml/modeling/decomposition/truncated_svd.py +33 -61
  82. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +33 -61
  83. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +33 -61
  84. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +33 -61
  85. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +33 -61
  86. snowflake/ml/modeling/ensemble/bagging_classifier.py +33 -61
  87. snowflake/ml/modeling/ensemble/bagging_regressor.py +33 -61
  88. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +33 -61
  89. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +33 -61
  90. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +33 -61
  91. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +33 -61
  92. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +33 -61
  93. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +33 -61
  94. snowflake/ml/modeling/ensemble/isolation_forest.py +33 -61
  95. snowflake/ml/modeling/ensemble/random_forest_classifier.py +33 -61
  96. snowflake/ml/modeling/ensemble/random_forest_regressor.py +33 -61
  97. snowflake/ml/modeling/ensemble/stacking_regressor.py +33 -61
  98. snowflake/ml/modeling/ensemble/voting_classifier.py +33 -61
  99. snowflake/ml/modeling/ensemble/voting_regressor.py +33 -61
  100. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +33 -61
  101. snowflake/ml/modeling/feature_selection/select_fdr.py +33 -61
  102. snowflake/ml/modeling/feature_selection/select_fpr.py +33 -61
  103. snowflake/ml/modeling/feature_selection/select_fwe.py +33 -61
  104. snowflake/ml/modeling/feature_selection/select_k_best.py +33 -61
  105. snowflake/ml/modeling/feature_selection/select_percentile.py +33 -61
  106. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +33 -61
  107. snowflake/ml/modeling/feature_selection/variance_threshold.py +33 -61
  108. snowflake/ml/modeling/framework/base.py +55 -5
  109. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +33 -61
  110. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +33 -61
  111. snowflake/ml/modeling/impute/iterative_imputer.py +33 -61
  112. snowflake/ml/modeling/impute/knn_imputer.py +33 -61
  113. snowflake/ml/modeling/impute/missing_indicator.py +33 -61
  114. snowflake/ml/modeling/impute/simple_imputer.py +4 -15
  115. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +33 -61
  116. snowflake/ml/modeling/kernel_approximation/nystroem.py +33 -61
  117. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +33 -61
  118. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +33 -61
  119. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +33 -61
  120. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +33 -61
  121. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +36 -63
  122. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +36 -63
  123. snowflake/ml/modeling/linear_model/ard_regression.py +33 -61
  124. snowflake/ml/modeling/linear_model/bayesian_ridge.py +33 -61
  125. snowflake/ml/modeling/linear_model/elastic_net.py +33 -61
  126. snowflake/ml/modeling/linear_model/elastic_net_cv.py +33 -61
  127. snowflake/ml/modeling/linear_model/gamma_regressor.py +33 -61
  128. snowflake/ml/modeling/linear_model/huber_regressor.py +33 -61
  129. snowflake/ml/modeling/linear_model/lars.py +33 -61
  130. snowflake/ml/modeling/linear_model/lars_cv.py +33 -61
  131. snowflake/ml/modeling/linear_model/lasso.py +33 -61
  132. snowflake/ml/modeling/linear_model/lasso_cv.py +33 -61
  133. snowflake/ml/modeling/linear_model/lasso_lars.py +33 -61
  134. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +33 -61
  135. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +33 -61
  136. snowflake/ml/modeling/linear_model/linear_regression.py +33 -61
  137. snowflake/ml/modeling/linear_model/logistic_regression.py +33 -61
  138. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +33 -61
  139. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +33 -61
  140. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +33 -61
  141. snowflake/ml/modeling/linear_model/multi_task_lasso.py +33 -61
  142. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +33 -61
  143. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +33 -61
  144. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +33 -61
  145. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +33 -61
  146. snowflake/ml/modeling/linear_model/perceptron.py +33 -61
  147. snowflake/ml/modeling/linear_model/poisson_regressor.py +33 -61
  148. snowflake/ml/modeling/linear_model/ransac_regressor.py +33 -61
  149. snowflake/ml/modeling/linear_model/ridge.py +33 -61
  150. snowflake/ml/modeling/linear_model/ridge_classifier.py +33 -61
  151. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +33 -61
  152. snowflake/ml/modeling/linear_model/ridge_cv.py +33 -61
  153. snowflake/ml/modeling/linear_model/sgd_classifier.py +33 -61
  154. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +33 -61
  155. snowflake/ml/modeling/linear_model/sgd_regressor.py +33 -61
  156. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +33 -61
  157. snowflake/ml/modeling/linear_model/tweedie_regressor.py +33 -61
  158. snowflake/ml/modeling/manifold/isomap.py +33 -61
  159. snowflake/ml/modeling/manifold/mds.py +33 -61
  160. snowflake/ml/modeling/manifold/spectral_embedding.py +33 -61
  161. snowflake/ml/modeling/manifold/tsne.py +33 -61
  162. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +33 -61
  163. snowflake/ml/modeling/mixture/gaussian_mixture.py +33 -61
  164. snowflake/ml/modeling/model_selection/grid_search_cv.py +39 -57
  165. snowflake/ml/modeling/model_selection/randomized_search_cv.py +26 -57
  166. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +33 -61
  167. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +33 -61
  168. snowflake/ml/modeling/multiclass/output_code_classifier.py +33 -61
  169. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +33 -61
  170. snowflake/ml/modeling/naive_bayes/categorical_nb.py +33 -61
  171. snowflake/ml/modeling/naive_bayes/complement_nb.py +33 -61
  172. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +33 -61
  173. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +33 -61
  174. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +33 -61
  175. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +33 -61
  176. snowflake/ml/modeling/neighbors/kernel_density.py +33 -61
  177. snowflake/ml/modeling/neighbors/local_outlier_factor.py +33 -61
  178. snowflake/ml/modeling/neighbors/nearest_centroid.py +33 -61
  179. snowflake/ml/modeling/neighbors/nearest_neighbors.py +33 -61
  180. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +33 -61
  181. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +33 -61
  182. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +33 -61
  183. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +33 -61
  184. snowflake/ml/modeling/neural_network/mlp_classifier.py +33 -61
  185. snowflake/ml/modeling/neural_network/mlp_regressor.py +33 -61
  186. snowflake/ml/modeling/preprocessing/polynomial_features.py +33 -61
  187. snowflake/ml/modeling/semi_supervised/label_propagation.py +33 -61
  188. snowflake/ml/modeling/semi_supervised/label_spreading.py +33 -61
  189. snowflake/ml/modeling/svm/linear_svc.py +33 -61
  190. snowflake/ml/modeling/svm/linear_svr.py +33 -61
  191. snowflake/ml/modeling/svm/nu_svc.py +33 -61
  192. snowflake/ml/modeling/svm/nu_svr.py +33 -61
  193. snowflake/ml/modeling/svm/svc.py +33 -61
  194. snowflake/ml/modeling/svm/svr.py +33 -61
  195. snowflake/ml/modeling/tree/decision_tree_classifier.py +33 -61
  196. snowflake/ml/modeling/tree/decision_tree_regressor.py +33 -61
  197. snowflake/ml/modeling/tree/extra_tree_classifier.py +33 -61
  198. snowflake/ml/modeling/tree/extra_tree_regressor.py +33 -61
  199. snowflake/ml/modeling/xgboost/xgb_classifier.py +33 -61
  200. snowflake/ml/modeling/xgboost/xgb_regressor.py +33 -61
  201. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +33 -61
  202. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +33 -61
  203. snowflake/ml/registry/_manager/model_manager.py +6 -2
  204. snowflake/ml/registry/model_registry.py +100 -27
  205. snowflake/ml/registry/registry.py +6 -2
  206. snowflake/ml/version.py +1 -1
  207. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/METADATA +43 -7
  208. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/RECORD +211 -206
  209. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/LICENSE.txt +0 -0
  210. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/WHEEL +0 -0
  211. {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/top_level.txt +0 -0
@@ -316,18 +316,24 @@ class StackingRegressor(BaseTransformer):
316
316
  self._get_model_signatures(dataset)
317
317
  return self
318
318
 
319
- def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
320
- if self._drop_input_cols:
321
- return []
322
- else:
323
- return list(set(dataset.columns) - set(self.output_cols))
324
-
325
319
  def _batch_inference_validate_snowpark(
326
320
  self,
327
321
  dataset: DataFrame,
328
322
  inference_method: str,
329
323
  ) -> List[str]:
330
- """Util method to run validate that batch inference can be run on a snowpark dataframe.
324
+ """Util method to run validate that batch inference can be run on a snowpark dataframe and
325
+ return the available package that exists in the snowflake anaconda channel
326
+
327
+ Args:
328
+ dataset: snowpark dataframe
329
+ inference_method: the inference method such as predict, score...
330
+
331
+ Raises:
332
+ SnowflakeMLException: If the estimator is not fitted, raise error
333
+ SnowflakeMLException: If the session is None, raise error
334
+
335
+ Returns:
336
+ A list of available package that exists in the snowflake anaconda channel
331
337
  """
332
338
  if not self._is_fitted:
333
339
  raise exceptions.SnowflakeMLException(
@@ -401,7 +407,7 @@ class StackingRegressor(BaseTransformer):
401
407
  transform_kwargs = dict(
402
408
  session = dataset._session,
403
409
  dependencies = self._deps,
404
- pass_through_cols = self._get_pass_through_columns(dataset),
410
+ drop_input_cols = self._drop_input_cols,
405
411
  expected_output_cols_type = expected_type_inferred,
406
412
  )
407
413
 
@@ -463,16 +469,16 @@ class StackingRegressor(BaseTransformer):
463
469
  # from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
464
470
  # based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
465
471
  # each row containing a list of values.
466
- expected_dtype = "ARRAY"
472
+ expected_dtype = "array"
467
473
 
468
474
  # If we were unable to assign a type to this transform in the factory, infer the type here.
469
475
  if expected_dtype == "":
470
- # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
476
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
471
477
  if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
472
- expected_dtype = "ARRAY"
473
- # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
478
+ expected_dtype = "array"
479
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
474
480
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
475
- expected_dtype = "ARRAY"
481
+ expected_dtype = "array"
476
482
  else:
477
483
  output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
478
484
  # We can only infer the output types from the input types if the following two statemetns are true:
@@ -490,7 +496,7 @@ class StackingRegressor(BaseTransformer):
490
496
  transform_kwargs = dict(
491
497
  session = dataset._session,
492
498
  dependencies = self._deps,
493
- pass_through_cols = self._get_pass_through_columns(dataset),
499
+ drop_input_cols = self._drop_input_cols,
494
500
  expected_output_cols_type = expected_dtype,
495
501
  )
496
502
 
@@ -541,7 +547,7 @@ class StackingRegressor(BaseTransformer):
541
547
  subproject=_SUBPROJECT,
542
548
  )
543
549
  output_result, fitted_estimator = model_trainer.train_fit_predict(
544
- pass_through_columns=self._get_pass_through_columns(dataset),
550
+ drop_input_cols=self._drop_input_cols,
545
551
  expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
546
552
  )
547
553
  self._sklearn_object = fitted_estimator
@@ -559,44 +565,6 @@ class StackingRegressor(BaseTransformer):
559
565
  assert self._sklearn_object is not None
560
566
  return self._sklearn_object.embedding_
561
567
 
562
-
563
- def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
564
- """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
565
- Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
566
- """
567
- output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
568
- if output_cols:
569
- output_cols = [
570
- identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
571
- for c in output_cols
572
- ]
573
- elif getattr(self._sklearn_object, "classes_", None) is None:
574
- output_cols = [output_cols_prefix]
575
- elif self._sklearn_object is not None:
576
- classes = self._sklearn_object.classes_
577
- if isinstance(classes, numpy.ndarray):
578
- output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
579
- elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
580
- # If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
581
- output_cols = []
582
- for i, cl in enumerate(classes):
583
- # For binary classification, there is only one output column for each class
584
- # ndarray as the two classes are complementary.
585
- if len(cl) == 2:
586
- output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
587
- else:
588
- output_cols.extend([
589
- f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
590
- ])
591
- else:
592
- output_cols = []
593
-
594
- # Make sure column names are valid snowflake identifiers.
595
- assert output_cols is not None # Make MyPy happy
596
- rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
597
-
598
- return rv
599
-
600
568
  @available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
601
569
  @telemetry.send_api_usage_telemetry(
602
570
  project=_PROJECT,
@@ -636,7 +604,7 @@ class StackingRegressor(BaseTransformer):
636
604
  transform_kwargs = dict(
637
605
  session=dataset._session,
638
606
  dependencies=self._deps,
639
- pass_through_cols=self._get_pass_through_columns(dataset),
607
+ drop_input_cols = self._drop_input_cols,
640
608
  expected_output_cols_type="float",
641
609
  )
642
610
 
@@ -701,7 +669,7 @@ class StackingRegressor(BaseTransformer):
701
669
  transform_kwargs = dict(
702
670
  session=dataset._session,
703
671
  dependencies=self._deps,
704
- pass_through_cols=self._get_pass_through_columns(dataset),
672
+ drop_input_cols = self._drop_input_cols,
705
673
  expected_output_cols_type="float",
706
674
  )
707
675
  elif isinstance(dataset, pd.DataFrame):
@@ -762,7 +730,7 @@ class StackingRegressor(BaseTransformer):
762
730
  transform_kwargs = dict(
763
731
  session=dataset._session,
764
732
  dependencies=self._deps,
765
- pass_through_cols=self._get_pass_through_columns(dataset),
733
+ drop_input_cols = self._drop_input_cols,
766
734
  expected_output_cols_type="float",
767
735
  )
768
736
 
@@ -827,7 +795,7 @@ class StackingRegressor(BaseTransformer):
827
795
  transform_kwargs = dict(
828
796
  session=dataset._session,
829
797
  dependencies=self._deps,
830
- pass_through_cols=self._get_pass_through_columns(dataset),
798
+ drop_input_cols = self._drop_input_cols,
831
799
  expected_output_cols_type="float",
832
800
  )
833
801
 
@@ -883,13 +851,17 @@ class StackingRegressor(BaseTransformer):
883
851
  transform_kwargs: ScoreKwargsTypedDict = dict()
884
852
 
885
853
  if isinstance(dataset, DataFrame):
854
+ self._deps = self._batch_inference_validate_snowpark(
855
+ dataset=dataset,
856
+ inference_method="score",
857
+ )
886
858
  selected_cols = self._get_active_columns()
887
859
  if len(selected_cols) > 0:
888
860
  dataset = dataset.select(selected_cols)
889
861
  assert isinstance(dataset._session, Session) # keep mypy happy
890
862
  transform_kwargs = dict(
891
863
  session=dataset._session,
892
- dependencies=["snowflake-snowpark-python"] + self._get_dependencies(),
864
+ dependencies=["snowflake-snowpark-python"] + self._deps,
893
865
  score_sproc_imports=['sklearn'],
894
866
  )
895
867
  elif isinstance(dataset, pd.DataFrame):
@@ -963,9 +935,9 @@ class StackingRegressor(BaseTransformer):
963
935
  transform_kwargs = dict(
964
936
  session = dataset._session,
965
937
  dependencies = self._deps,
966
- pass_through_cols = self._get_pass_through_columns(dataset),
967
- expected_output_cols_type = "array",
968
- n_neighbors = n_neighbors,
938
+ drop_input_cols = self._drop_input_cols,
939
+ expected_output_cols_type="array",
940
+ n_neighbors = n_neighbors,
969
941
  return_distance = return_distance
970
942
  )
971
943
  elif isinstance(dataset, pd.DataFrame):
@@ -298,18 +298,24 @@ class VotingClassifier(BaseTransformer):
298
298
  self._get_model_signatures(dataset)
299
299
  return self
300
300
 
301
- def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
302
- if self._drop_input_cols:
303
- return []
304
- else:
305
- return list(set(dataset.columns) - set(self.output_cols))
306
-
307
301
  def _batch_inference_validate_snowpark(
308
302
  self,
309
303
  dataset: DataFrame,
310
304
  inference_method: str,
311
305
  ) -> List[str]:
312
- """Util method to run validate that batch inference can be run on a snowpark dataframe.
306
+ """Util method to run validate that batch inference can be run on a snowpark dataframe and
307
+ return the available package that exists in the snowflake anaconda channel
308
+
309
+ Args:
310
+ dataset: snowpark dataframe
311
+ inference_method: the inference method such as predict, score...
312
+
313
+ Raises:
314
+ SnowflakeMLException: If the estimator is not fitted, raise error
315
+ SnowflakeMLException: If the session is None, raise error
316
+
317
+ Returns:
318
+ A list of available package that exists in the snowflake anaconda channel
313
319
  """
314
320
  if not self._is_fitted:
315
321
  raise exceptions.SnowflakeMLException(
@@ -383,7 +389,7 @@ class VotingClassifier(BaseTransformer):
383
389
  transform_kwargs = dict(
384
390
  session = dataset._session,
385
391
  dependencies = self._deps,
386
- pass_through_cols = self._get_pass_through_columns(dataset),
392
+ drop_input_cols = self._drop_input_cols,
387
393
  expected_output_cols_type = expected_type_inferred,
388
394
  )
389
395
 
@@ -445,16 +451,16 @@ class VotingClassifier(BaseTransformer):
445
451
  # from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
446
452
  # based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
447
453
  # each row containing a list of values.
448
- expected_dtype = "ARRAY"
454
+ expected_dtype = "array"
449
455
 
450
456
  # If we were unable to assign a type to this transform in the factory, infer the type here.
451
457
  if expected_dtype == "":
452
- # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
458
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
453
459
  if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
454
- expected_dtype = "ARRAY"
455
- # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
460
+ expected_dtype = "array"
461
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
456
462
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
457
- expected_dtype = "ARRAY"
463
+ expected_dtype = "array"
458
464
  else:
459
465
  output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
460
466
  # We can only infer the output types from the input types if the following two statemetns are true:
@@ -472,7 +478,7 @@ class VotingClassifier(BaseTransformer):
472
478
  transform_kwargs = dict(
473
479
  session = dataset._session,
474
480
  dependencies = self._deps,
475
- pass_through_cols = self._get_pass_through_columns(dataset),
481
+ drop_input_cols = self._drop_input_cols,
476
482
  expected_output_cols_type = expected_dtype,
477
483
  )
478
484
 
@@ -523,7 +529,7 @@ class VotingClassifier(BaseTransformer):
523
529
  subproject=_SUBPROJECT,
524
530
  )
525
531
  output_result, fitted_estimator = model_trainer.train_fit_predict(
526
- pass_through_columns=self._get_pass_through_columns(dataset),
532
+ drop_input_cols=self._drop_input_cols,
527
533
  expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
528
534
  )
529
535
  self._sklearn_object = fitted_estimator
@@ -541,44 +547,6 @@ class VotingClassifier(BaseTransformer):
541
547
  assert self._sklearn_object is not None
542
548
  return self._sklearn_object.embedding_
543
549
 
544
-
545
- def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
546
- """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
547
- Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
548
- """
549
- output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
550
- if output_cols:
551
- output_cols = [
552
- identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
553
- for c in output_cols
554
- ]
555
- elif getattr(self._sklearn_object, "classes_", None) is None:
556
- output_cols = [output_cols_prefix]
557
- elif self._sklearn_object is not None:
558
- classes = self._sklearn_object.classes_
559
- if isinstance(classes, numpy.ndarray):
560
- output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
561
- elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
562
- # If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
563
- output_cols = []
564
- for i, cl in enumerate(classes):
565
- # For binary classification, there is only one output column for each class
566
- # ndarray as the two classes are complementary.
567
- if len(cl) == 2:
568
- output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
569
- else:
570
- output_cols.extend([
571
- f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
572
- ])
573
- else:
574
- output_cols = []
575
-
576
- # Make sure column names are valid snowflake identifiers.
577
- assert output_cols is not None # Make MyPy happy
578
- rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
579
-
580
- return rv
581
-
582
550
  @available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
583
551
  @telemetry.send_api_usage_telemetry(
584
552
  project=_PROJECT,
@@ -620,7 +588,7 @@ class VotingClassifier(BaseTransformer):
620
588
  transform_kwargs = dict(
621
589
  session=dataset._session,
622
590
  dependencies=self._deps,
623
- pass_through_cols=self._get_pass_through_columns(dataset),
591
+ drop_input_cols = self._drop_input_cols,
624
592
  expected_output_cols_type="float",
625
593
  )
626
594
 
@@ -687,7 +655,7 @@ class VotingClassifier(BaseTransformer):
687
655
  transform_kwargs = dict(
688
656
  session=dataset._session,
689
657
  dependencies=self._deps,
690
- pass_through_cols=self._get_pass_through_columns(dataset),
658
+ drop_input_cols = self._drop_input_cols,
691
659
  expected_output_cols_type="float",
692
660
  )
693
661
  elif isinstance(dataset, pd.DataFrame):
@@ -748,7 +716,7 @@ class VotingClassifier(BaseTransformer):
748
716
  transform_kwargs = dict(
749
717
  session=dataset._session,
750
718
  dependencies=self._deps,
751
- pass_through_cols=self._get_pass_through_columns(dataset),
719
+ drop_input_cols = self._drop_input_cols,
752
720
  expected_output_cols_type="float",
753
721
  )
754
722
 
@@ -813,7 +781,7 @@ class VotingClassifier(BaseTransformer):
813
781
  transform_kwargs = dict(
814
782
  session=dataset._session,
815
783
  dependencies=self._deps,
816
- pass_through_cols=self._get_pass_through_columns(dataset),
784
+ drop_input_cols = self._drop_input_cols,
817
785
  expected_output_cols_type="float",
818
786
  )
819
787
 
@@ -869,13 +837,17 @@ class VotingClassifier(BaseTransformer):
869
837
  transform_kwargs: ScoreKwargsTypedDict = dict()
870
838
 
871
839
  if isinstance(dataset, DataFrame):
840
+ self._deps = self._batch_inference_validate_snowpark(
841
+ dataset=dataset,
842
+ inference_method="score",
843
+ )
872
844
  selected_cols = self._get_active_columns()
873
845
  if len(selected_cols) > 0:
874
846
  dataset = dataset.select(selected_cols)
875
847
  assert isinstance(dataset._session, Session) # keep mypy happy
876
848
  transform_kwargs = dict(
877
849
  session=dataset._session,
878
- dependencies=["snowflake-snowpark-python"] + self._get_dependencies(),
850
+ dependencies=["snowflake-snowpark-python"] + self._deps,
879
851
  score_sproc_imports=['sklearn'],
880
852
  )
881
853
  elif isinstance(dataset, pd.DataFrame):
@@ -949,9 +921,9 @@ class VotingClassifier(BaseTransformer):
949
921
  transform_kwargs = dict(
950
922
  session = dataset._session,
951
923
  dependencies = self._deps,
952
- pass_through_cols = self._get_pass_through_columns(dataset),
953
- expected_output_cols_type = "array",
954
- n_neighbors = n_neighbors,
924
+ drop_input_cols = self._drop_input_cols,
925
+ expected_output_cols_type="array",
926
+ n_neighbors = n_neighbors,
955
927
  return_distance = return_distance
956
928
  )
957
929
  elif isinstance(dataset, pd.DataFrame):
@@ -280,18 +280,24 @@ class VotingRegressor(BaseTransformer):
280
280
  self._get_model_signatures(dataset)
281
281
  return self
282
282
 
283
- def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
284
- if self._drop_input_cols:
285
- return []
286
- else:
287
- return list(set(dataset.columns) - set(self.output_cols))
288
-
289
283
  def _batch_inference_validate_snowpark(
290
284
  self,
291
285
  dataset: DataFrame,
292
286
  inference_method: str,
293
287
  ) -> List[str]:
294
- """Util method to run validate that batch inference can be run on a snowpark dataframe.
288
+ """Util method to run validate that batch inference can be run on a snowpark dataframe and
289
+ return the available package that exists in the snowflake anaconda channel
290
+
291
+ Args:
292
+ dataset: snowpark dataframe
293
+ inference_method: the inference method such as predict, score...
294
+
295
+ Raises:
296
+ SnowflakeMLException: If the estimator is not fitted, raise error
297
+ SnowflakeMLException: If the session is None, raise error
298
+
299
+ Returns:
300
+ A list of available package that exists in the snowflake anaconda channel
295
301
  """
296
302
  if not self._is_fitted:
297
303
  raise exceptions.SnowflakeMLException(
@@ -365,7 +371,7 @@ class VotingRegressor(BaseTransformer):
365
371
  transform_kwargs = dict(
366
372
  session = dataset._session,
367
373
  dependencies = self._deps,
368
- pass_through_cols = self._get_pass_through_columns(dataset),
374
+ drop_input_cols = self._drop_input_cols,
369
375
  expected_output_cols_type = expected_type_inferred,
370
376
  )
371
377
 
@@ -427,16 +433,16 @@ class VotingRegressor(BaseTransformer):
427
433
  # from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
428
434
  # based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
429
435
  # each row containing a list of values.
430
- expected_dtype = "ARRAY"
436
+ expected_dtype = "array"
431
437
 
432
438
  # If we were unable to assign a type to this transform in the factory, infer the type here.
433
439
  if expected_dtype == "":
434
- # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "ARRAY"
440
+ # If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
435
441
  if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
436
- expected_dtype = "ARRAY"
437
- # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "ARRAY"
442
+ expected_dtype = "array"
443
+ # If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
438
444
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
439
- expected_dtype = "ARRAY"
445
+ expected_dtype = "array"
440
446
  else:
441
447
  output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
442
448
  # We can only infer the output types from the input types if the following two statemetns are true:
@@ -454,7 +460,7 @@ class VotingRegressor(BaseTransformer):
454
460
  transform_kwargs = dict(
455
461
  session = dataset._session,
456
462
  dependencies = self._deps,
457
- pass_through_cols = self._get_pass_through_columns(dataset),
463
+ drop_input_cols = self._drop_input_cols,
458
464
  expected_output_cols_type = expected_dtype,
459
465
  )
460
466
 
@@ -505,7 +511,7 @@ class VotingRegressor(BaseTransformer):
505
511
  subproject=_SUBPROJECT,
506
512
  )
507
513
  output_result, fitted_estimator = model_trainer.train_fit_predict(
508
- pass_through_columns=self._get_pass_through_columns(dataset),
514
+ drop_input_cols=self._drop_input_cols,
509
515
  expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
510
516
  )
511
517
  self._sklearn_object = fitted_estimator
@@ -523,44 +529,6 @@ class VotingRegressor(BaseTransformer):
523
529
  assert self._sklearn_object is not None
524
530
  return self._sklearn_object.embedding_
525
531
 
526
-
527
- def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
528
- """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
529
- Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
530
- """
531
- output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
532
- if output_cols:
533
- output_cols = [
534
- identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
535
- for c in output_cols
536
- ]
537
- elif getattr(self._sklearn_object, "classes_", None) is None:
538
- output_cols = [output_cols_prefix]
539
- elif self._sklearn_object is not None:
540
- classes = self._sklearn_object.classes_
541
- if isinstance(classes, numpy.ndarray):
542
- output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
543
- elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
544
- # If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
545
- output_cols = []
546
- for i, cl in enumerate(classes):
547
- # For binary classification, there is only one output column for each class
548
- # ndarray as the two classes are complementary.
549
- if len(cl) == 2:
550
- output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
551
- else:
552
- output_cols.extend([
553
- f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
554
- ])
555
- else:
556
- output_cols = []
557
-
558
- # Make sure column names are valid snowflake identifiers.
559
- assert output_cols is not None # Make MyPy happy
560
- rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
561
-
562
- return rv
563
-
564
532
  @available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
565
533
  @telemetry.send_api_usage_telemetry(
566
534
  project=_PROJECT,
@@ -600,7 +568,7 @@ class VotingRegressor(BaseTransformer):
600
568
  transform_kwargs = dict(
601
569
  session=dataset._session,
602
570
  dependencies=self._deps,
603
- pass_through_cols=self._get_pass_through_columns(dataset),
571
+ drop_input_cols = self._drop_input_cols,
604
572
  expected_output_cols_type="float",
605
573
  )
606
574
 
@@ -665,7 +633,7 @@ class VotingRegressor(BaseTransformer):
665
633
  transform_kwargs = dict(
666
634
  session=dataset._session,
667
635
  dependencies=self._deps,
668
- pass_through_cols=self._get_pass_through_columns(dataset),
636
+ drop_input_cols = self._drop_input_cols,
669
637
  expected_output_cols_type="float",
670
638
  )
671
639
  elif isinstance(dataset, pd.DataFrame):
@@ -726,7 +694,7 @@ class VotingRegressor(BaseTransformer):
726
694
  transform_kwargs = dict(
727
695
  session=dataset._session,
728
696
  dependencies=self._deps,
729
- pass_through_cols=self._get_pass_through_columns(dataset),
697
+ drop_input_cols = self._drop_input_cols,
730
698
  expected_output_cols_type="float",
731
699
  )
732
700
 
@@ -791,7 +759,7 @@ class VotingRegressor(BaseTransformer):
791
759
  transform_kwargs = dict(
792
760
  session=dataset._session,
793
761
  dependencies=self._deps,
794
- pass_through_cols=self._get_pass_through_columns(dataset),
762
+ drop_input_cols = self._drop_input_cols,
795
763
  expected_output_cols_type="float",
796
764
  )
797
765
 
@@ -847,13 +815,17 @@ class VotingRegressor(BaseTransformer):
847
815
  transform_kwargs: ScoreKwargsTypedDict = dict()
848
816
 
849
817
  if isinstance(dataset, DataFrame):
818
+ self._deps = self._batch_inference_validate_snowpark(
819
+ dataset=dataset,
820
+ inference_method="score",
821
+ )
850
822
  selected_cols = self._get_active_columns()
851
823
  if len(selected_cols) > 0:
852
824
  dataset = dataset.select(selected_cols)
853
825
  assert isinstance(dataset._session, Session) # keep mypy happy
854
826
  transform_kwargs = dict(
855
827
  session=dataset._session,
856
- dependencies=["snowflake-snowpark-python"] + self._get_dependencies(),
828
+ dependencies=["snowflake-snowpark-python"] + self._deps,
857
829
  score_sproc_imports=['sklearn'],
858
830
  )
859
831
  elif isinstance(dataset, pd.DataFrame):
@@ -927,9 +899,9 @@ class VotingRegressor(BaseTransformer):
927
899
  transform_kwargs = dict(
928
900
  session = dataset._session,
929
901
  dependencies = self._deps,
930
- pass_through_cols = self._get_pass_through_columns(dataset),
931
- expected_output_cols_type = "array",
932
- n_neighbors = n_neighbors,
902
+ drop_input_cols = self._drop_input_cols,
903
+ expected_output_cols_type="array",
904
+ n_neighbors = n_neighbors,
933
905
  return_distance = return_distance
934
906
  )
935
907
  elif isinstance(dataset, pd.DataFrame):