snowflake-ml-python 1.6.1__py3-none-any.whl → 1.6.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (212) hide show
  1. snowflake/ml/_internal/telemetry.py +142 -20
  2. snowflake/ml/_internal/utils/identifier.py +48 -11
  3. snowflake/ml/_internal/utils/snowflake_env.py +23 -13
  4. snowflake/ml/_internal/utils/sql_identifier.py +1 -1
  5. snowflake/ml/_internal/utils/table_manager.py +19 -1
  6. snowflake/ml/_internal/utils/uri.py +2 -2
  7. snowflake/ml/data/data_connector.py +33 -7
  8. snowflake/ml/data/torch_utils.py +68 -0
  9. snowflake/ml/dataset/dataset.py +1 -3
  10. snowflake/ml/feature_store/feature_store.py +41 -17
  11. snowflake/ml/feature_store/feature_view.py +2 -2
  12. snowflake/ml/fileset/embedded_stage_fs.py +1 -1
  13. snowflake/ml/fileset/fileset.py +1 -1
  14. snowflake/ml/fileset/sfcfs.py +9 -3
  15. snowflake/ml/model/_client/model/model_version_impl.py +22 -7
  16. snowflake/ml/model/_client/ops/model_ops.py +39 -3
  17. snowflake/ml/model/_client/ops/service_ops.py +198 -7
  18. snowflake/ml/model/_client/service/model_deployment_spec.py +4 -5
  19. snowflake/ml/model/_client/service/model_deployment_spec_schema.py +1 -2
  20. snowflake/ml/model/_client/sql/service.py +85 -18
  21. snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +1 -1
  22. snowflake/ml/model/_deploy_client/snowservice/deploy.py +3 -3
  23. snowflake/ml/model/_model_composer/model_composer.py +2 -0
  24. snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +3 -8
  25. snowflake/ml/model/_packager/model_handlers/_utils.py +46 -14
  26. snowflake/ml/model/_packager/model_handlers/catboost.py +17 -15
  27. snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +23 -15
  28. snowflake/ml/model/_packager/model_handlers/lightgbm.py +15 -57
  29. snowflake/ml/model/_packager/model_handlers/llm.py +4 -2
  30. snowflake/ml/model/_packager/model_handlers/model_objective_utils.py +116 -0
  31. snowflake/ml/model/_packager/model_handlers/sklearn.py +36 -24
  32. snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +119 -6
  33. snowflake/ml/model/_packager/model_handlers/torchscript.py +2 -2
  34. snowflake/ml/model/_packager/model_handlers/xgboost.py +48 -48
  35. snowflake/ml/model/_packager/model_meta/model_meta.py +10 -7
  36. snowflake/ml/model/_packager/model_meta/model_meta_schema.py +0 -8
  37. snowflake/ml/model/_packager/model_packager.py +2 -0
  38. snowflake/ml/model/_signatures/pytorch_handler.py +1 -1
  39. snowflake/ml/model/_signatures/utils.py +9 -0
  40. snowflake/ml/model/models/llm.py +3 -1
  41. snowflake/ml/model/type_hints.py +9 -1
  42. snowflake/ml/modeling/_internal/constants.py +1 -0
  43. snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +5 -5
  44. snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py +9 -6
  45. snowflake/ml/modeling/_internal/model_specifications.py +2 -0
  46. snowflake/ml/modeling/_internal/model_trainer.py +1 -0
  47. snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +2 -2
  48. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +5 -5
  49. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +113 -160
  50. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +60 -21
  51. snowflake/ml/modeling/cluster/affinity_propagation.py +60 -21
  52. snowflake/ml/modeling/cluster/agglomerative_clustering.py +60 -21
  53. snowflake/ml/modeling/cluster/birch.py +60 -21
  54. snowflake/ml/modeling/cluster/bisecting_k_means.py +60 -21
  55. snowflake/ml/modeling/cluster/dbscan.py +60 -21
  56. snowflake/ml/modeling/cluster/feature_agglomeration.py +60 -21
  57. snowflake/ml/modeling/cluster/k_means.py +60 -21
  58. snowflake/ml/modeling/cluster/mean_shift.py +60 -21
  59. snowflake/ml/modeling/cluster/mini_batch_k_means.py +60 -21
  60. snowflake/ml/modeling/cluster/optics.py +60 -21
  61. snowflake/ml/modeling/cluster/spectral_biclustering.py +60 -21
  62. snowflake/ml/modeling/cluster/spectral_clustering.py +60 -21
  63. snowflake/ml/modeling/cluster/spectral_coclustering.py +60 -21
  64. snowflake/ml/modeling/compose/column_transformer.py +60 -21
  65. snowflake/ml/modeling/compose/transformed_target_regressor.py +60 -21
  66. snowflake/ml/modeling/covariance/elliptic_envelope.py +60 -21
  67. snowflake/ml/modeling/covariance/empirical_covariance.py +60 -21
  68. snowflake/ml/modeling/covariance/graphical_lasso.py +60 -21
  69. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +60 -21
  70. snowflake/ml/modeling/covariance/ledoit_wolf.py +60 -21
  71. snowflake/ml/modeling/covariance/min_cov_det.py +60 -21
  72. snowflake/ml/modeling/covariance/oas.py +60 -21
  73. snowflake/ml/modeling/covariance/shrunk_covariance.py +60 -21
  74. snowflake/ml/modeling/decomposition/dictionary_learning.py +60 -21
  75. snowflake/ml/modeling/decomposition/factor_analysis.py +60 -21
  76. snowflake/ml/modeling/decomposition/fast_ica.py +60 -21
  77. snowflake/ml/modeling/decomposition/incremental_pca.py +60 -21
  78. snowflake/ml/modeling/decomposition/kernel_pca.py +60 -21
  79. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +60 -21
  80. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +60 -21
  81. snowflake/ml/modeling/decomposition/pca.py +60 -21
  82. snowflake/ml/modeling/decomposition/sparse_pca.py +60 -21
  83. snowflake/ml/modeling/decomposition/truncated_svd.py +60 -21
  84. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +60 -21
  85. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +60 -21
  86. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +60 -21
  87. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +60 -21
  88. snowflake/ml/modeling/ensemble/bagging_classifier.py +60 -21
  89. snowflake/ml/modeling/ensemble/bagging_regressor.py +60 -21
  90. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +60 -21
  91. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +60 -21
  92. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +60 -21
  93. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +60 -21
  94. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +60 -21
  95. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +60 -21
  96. snowflake/ml/modeling/ensemble/isolation_forest.py +60 -21
  97. snowflake/ml/modeling/ensemble/random_forest_classifier.py +60 -21
  98. snowflake/ml/modeling/ensemble/random_forest_regressor.py +60 -21
  99. snowflake/ml/modeling/ensemble/stacking_regressor.py +60 -21
  100. snowflake/ml/modeling/ensemble/voting_classifier.py +60 -21
  101. snowflake/ml/modeling/ensemble/voting_regressor.py +60 -21
  102. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +60 -21
  103. snowflake/ml/modeling/feature_selection/select_fdr.py +60 -21
  104. snowflake/ml/modeling/feature_selection/select_fpr.py +60 -21
  105. snowflake/ml/modeling/feature_selection/select_fwe.py +60 -21
  106. snowflake/ml/modeling/feature_selection/select_k_best.py +60 -21
  107. snowflake/ml/modeling/feature_selection/select_percentile.py +60 -21
  108. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +60 -21
  109. snowflake/ml/modeling/feature_selection/variance_threshold.py +60 -21
  110. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +60 -21
  111. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +60 -21
  112. snowflake/ml/modeling/impute/iterative_imputer.py +60 -21
  113. snowflake/ml/modeling/impute/knn_imputer.py +60 -21
  114. snowflake/ml/modeling/impute/missing_indicator.py +60 -21
  115. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +60 -21
  116. snowflake/ml/modeling/kernel_approximation/nystroem.py +60 -21
  117. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +60 -21
  118. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +60 -21
  119. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +60 -21
  120. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +60 -21
  121. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +60 -21
  122. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +60 -21
  123. snowflake/ml/modeling/linear_model/ard_regression.py +60 -21
  124. snowflake/ml/modeling/linear_model/bayesian_ridge.py +60 -21
  125. snowflake/ml/modeling/linear_model/elastic_net.py +60 -21
  126. snowflake/ml/modeling/linear_model/elastic_net_cv.py +60 -21
  127. snowflake/ml/modeling/linear_model/gamma_regressor.py +60 -21
  128. snowflake/ml/modeling/linear_model/huber_regressor.py +60 -21
  129. snowflake/ml/modeling/linear_model/lars.py +60 -21
  130. snowflake/ml/modeling/linear_model/lars_cv.py +60 -21
  131. snowflake/ml/modeling/linear_model/lasso.py +60 -21
  132. snowflake/ml/modeling/linear_model/lasso_cv.py +60 -21
  133. snowflake/ml/modeling/linear_model/lasso_lars.py +60 -21
  134. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +60 -21
  135. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +60 -21
  136. snowflake/ml/modeling/linear_model/linear_regression.py +60 -21
  137. snowflake/ml/modeling/linear_model/logistic_regression.py +60 -21
  138. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +60 -21
  139. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +60 -21
  140. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +60 -21
  141. snowflake/ml/modeling/linear_model/multi_task_lasso.py +60 -21
  142. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +60 -21
  143. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +60 -21
  144. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +60 -21
  145. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +60 -21
  146. snowflake/ml/modeling/linear_model/perceptron.py +60 -21
  147. snowflake/ml/modeling/linear_model/poisson_regressor.py +60 -21
  148. snowflake/ml/modeling/linear_model/ransac_regressor.py +60 -21
  149. snowflake/ml/modeling/linear_model/ridge.py +60 -21
  150. snowflake/ml/modeling/linear_model/ridge_classifier.py +60 -21
  151. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +60 -21
  152. snowflake/ml/modeling/linear_model/ridge_cv.py +60 -21
  153. snowflake/ml/modeling/linear_model/sgd_classifier.py +60 -21
  154. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +60 -21
  155. snowflake/ml/modeling/linear_model/sgd_regressor.py +60 -21
  156. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +60 -21
  157. snowflake/ml/modeling/linear_model/tweedie_regressor.py +60 -21
  158. snowflake/ml/modeling/manifold/isomap.py +60 -21
  159. snowflake/ml/modeling/manifold/mds.py +60 -21
  160. snowflake/ml/modeling/manifold/spectral_embedding.py +60 -21
  161. snowflake/ml/modeling/manifold/tsne.py +60 -21
  162. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +60 -21
  163. snowflake/ml/modeling/mixture/gaussian_mixture.py +60 -21
  164. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +60 -21
  165. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +60 -21
  166. snowflake/ml/modeling/multiclass/output_code_classifier.py +60 -21
  167. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +60 -21
  168. snowflake/ml/modeling/naive_bayes/categorical_nb.py +60 -21
  169. snowflake/ml/modeling/naive_bayes/complement_nb.py +60 -21
  170. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +60 -21
  171. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +60 -21
  172. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +60 -21
  173. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +60 -21
  174. snowflake/ml/modeling/neighbors/kernel_density.py +60 -21
  175. snowflake/ml/modeling/neighbors/local_outlier_factor.py +60 -21
  176. snowflake/ml/modeling/neighbors/nearest_centroid.py +60 -21
  177. snowflake/ml/modeling/neighbors/nearest_neighbors.py +60 -21
  178. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +60 -21
  179. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +60 -21
  180. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +60 -21
  181. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +60 -21
  182. snowflake/ml/modeling/neural_network/mlp_classifier.py +60 -21
  183. snowflake/ml/modeling/neural_network/mlp_regressor.py +60 -21
  184. snowflake/ml/modeling/parameters/disable_model_tracer.py +5 -0
  185. snowflake/ml/modeling/pipeline/pipeline.py +1 -12
  186. snowflake/ml/modeling/preprocessing/polynomial_features.py +60 -21
  187. snowflake/ml/modeling/semi_supervised/label_propagation.py +60 -21
  188. snowflake/ml/modeling/semi_supervised/label_spreading.py +60 -21
  189. snowflake/ml/modeling/svm/linear_svc.py +60 -21
  190. snowflake/ml/modeling/svm/linear_svr.py +60 -21
  191. snowflake/ml/modeling/svm/nu_svc.py +60 -21
  192. snowflake/ml/modeling/svm/nu_svr.py +60 -21
  193. snowflake/ml/modeling/svm/svc.py +60 -21
  194. snowflake/ml/modeling/svm/svr.py +60 -21
  195. snowflake/ml/modeling/tree/decision_tree_classifier.py +60 -21
  196. snowflake/ml/modeling/tree/decision_tree_regressor.py +60 -21
  197. snowflake/ml/modeling/tree/extra_tree_classifier.py +60 -21
  198. snowflake/ml/modeling/tree/extra_tree_regressor.py +60 -21
  199. snowflake/ml/modeling/xgboost/xgb_classifier.py +63 -23
  200. snowflake/ml/modeling/xgboost/xgb_regressor.py +63 -23
  201. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +63 -23
  202. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +63 -23
  203. snowflake/ml/registry/_manager/model_manager.py +4 -0
  204. snowflake/ml/registry/model_registry.py +1 -1
  205. snowflake/ml/registry/registry.py +1 -2
  206. snowflake/ml/version.py +1 -1
  207. {snowflake_ml_python-1.6.1.dist-info → snowflake_ml_python-1.6.2.dist-info}/METADATA +23 -4
  208. {snowflake_ml_python-1.6.1.dist-info → snowflake_ml_python-1.6.2.dist-info}/RECORD +211 -209
  209. {snowflake_ml_python-1.6.1.dist-info → snowflake_ml_python-1.6.2.dist-info}/WHEEL +1 -1
  210. snowflake/ml/data/torch_dataset.py +0 -33
  211. {snowflake_ml_python-1.6.1.dist-info → snowflake_ml_python-1.6.2.dist-info}/LICENSE.txt +0 -0
  212. {snowflake_ml_python-1.6.1.dist-info → snowflake_ml_python-1.6.2.dist-info}/top_level.txt +0 -0
@@ -4,14 +4,12 @@
4
4
  #
5
5
  import inspect
6
6
  import os
7
- import posixpath
8
- from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
9
- from typing_extensions import TypeGuard
7
+ from typing import Iterable, Optional, Union, List, Any, Dict, Set, Tuple
10
8
  from uuid import uuid4
11
9
 
12
10
  import cloudpickle as cp
13
- import pandas as pd
14
11
  import numpy as np
12
+ import pandas as pd
15
13
  from numpy import typing as npt
16
14
 
17
15
 
@@ -24,12 +22,11 @@ from snowflake.ml.modeling.framework.base import BaseTransformer, _process_cols
24
22
  from snowflake.ml._internal import telemetry
25
23
  from snowflake.ml._internal.exceptions import error_codes, exceptions, modeling_error_messages
26
24
  from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
27
- from snowflake.ml._internal.utils import pkg_version_utils, identifier
25
+ from snowflake.ml._internal.utils import identifier
28
26
  from snowflake.snowpark import DataFrame, Session
29
27
  from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
28
  from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
31
29
  from snowflake.ml.modeling._internal.transformer_protocols import (
32
- ModelTransformHandlers,
33
30
  BatchInferenceKwargsTypedDict,
34
31
  ScoreKwargsTypedDict
35
32
  )
@@ -573,12 +570,23 @@ class Perceptron(BaseTransformer):
573
570
  autogenerated=self._autogenerated,
574
571
  subproject=_SUBPROJECT,
575
572
  )
576
- output_result, fitted_estimator = model_trainer.train_fit_predict(
577
- drop_input_cols=self._drop_input_cols,
578
- expected_output_cols_list=(
579
- self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix)
580
- ),
573
+ expected_output_cols = (
574
+ self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix)
581
575
  )
576
+ if isinstance(dataset, DataFrame):
577
+ expected_output_cols, example_output_pd_df = self._align_expected_output(
578
+ "fit_predict", dataset, expected_output_cols, output_cols_prefix
579
+ )
580
+ output_result, fitted_estimator = model_trainer.train_fit_predict(
581
+ drop_input_cols=self._drop_input_cols,
582
+ expected_output_cols_list=expected_output_cols,
583
+ example_output_pd_df=example_output_pd_df,
584
+ )
585
+ else:
586
+ output_result, fitted_estimator = model_trainer.train_fit_predict(
587
+ drop_input_cols=self._drop_input_cols,
588
+ expected_output_cols_list=expected_output_cols,
589
+ )
582
590
  self._sklearn_object = fitted_estimator
583
591
  self._is_fitted = True
584
592
  return output_result
@@ -657,12 +665,41 @@ class Perceptron(BaseTransformer):
657
665
 
658
666
  return rv
659
667
 
660
- def _align_expected_output_names(
661
- self, method: str, dataset: DataFrame, expected_output_cols_list: List[str], output_cols_prefix: str
662
- ) -> List[str]:
668
+ def _align_expected_output(
669
+ self, method: str, dataset: DataFrame, expected_output_cols_list: List[str], output_cols_prefix: str,
670
+ ) -> Tuple[List[str], pd.DataFrame]:
671
+ """ Run 1 line of data with the desired method, and return one tuple that consists of the output column names
672
+ and output dataframe with 1 line.
673
+ If the method is fit_predict, run 2 lines of data.
674
+ """
663
675
  # in case the inferred output column names dimension is different
664
676
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
665
- sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
677
+
678
+ # For fit_predict method, a minimum of 2 is required by MinCovDet, BayesianGaussianMixture
679
+ # so change the minimum of number of rows to 2
680
+ num_examples = 2
681
+ statement_params = telemetry.get_function_usage_statement_params(
682
+ project=_PROJECT,
683
+ subproject=_SUBPROJECT,
684
+ function_name=telemetry.get_statement_params_full_func_name(
685
+ inspect.currentframe(), Perceptron.__class__.__name__
686
+ ),
687
+ api_calls=[Session.call],
688
+ custom_tags={"autogen": True} if self._autogenerated else None,
689
+ )
690
+ if output_cols_prefix == "fit_predict_":
691
+ if hasattr(self._sklearn_object, "n_clusters"):
692
+ # cluster classes such as BisectingKMeansTest requires # of examples >= n_clusters
693
+ num_examples = self._sklearn_object.n_clusters
694
+ elif hasattr(self._sklearn_object, "min_samples"):
695
+ # OPTICS default min_samples 5, which requires at least 5 lines of data
696
+ num_examples = self._sklearn_object.min_samples
697
+ elif hasattr(self._sklearn_object, "n_neighbors") and hasattr(self._sklearn_object, "n_samples"):
698
+ # LocalOutlierFactor expects n_neighbors <= n_samples
699
+ num_examples = self._sklearn_object.n_neighbors
700
+ sample_pd_df = dataset.select(self.input_cols).limit(num_examples).to_pandas(statement_params=statement_params)
701
+ else:
702
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas(statement_params=statement_params)
666
703
 
667
704
  # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
668
705
  # seen during the fit.
@@ -674,12 +711,14 @@ class Perceptron(BaseTransformer):
674
711
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
675
712
  if self.sample_weight_col:
676
713
  output_df_columns_set -= set(self.sample_weight_col)
714
+
677
715
  # if the dimension of inferred output column names is correct; use it
678
716
  if len(expected_output_cols_list) == len(output_df_columns_set):
679
- return expected_output_cols_list
717
+ return expected_output_cols_list, output_df_pd
680
718
  # otherwise, use the sklearn estimator's output
681
719
  else:
682
- return sorted(list(output_df_columns_set), key=lambda x: output_df_columns.index(x))
720
+ expected_output_cols_list = sorted(list(output_df_columns_set), key=lambda x: output_df_columns.index(x))
721
+ return expected_output_cols_list, output_df_pd[expected_output_cols_list]
683
722
 
684
723
  @available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
685
724
  @telemetry.send_api_usage_telemetry(
@@ -725,7 +764,7 @@ class Perceptron(BaseTransformer):
725
764
  drop_input_cols=self._drop_input_cols,
726
765
  expected_output_cols_type="float",
727
766
  )
728
- expected_output_cols = self._align_expected_output_names(
767
+ expected_output_cols, _ = self._align_expected_output(
729
768
  inference_method, dataset, expected_output_cols, output_cols_prefix
730
769
  )
731
770
 
@@ -791,7 +830,7 @@ class Perceptron(BaseTransformer):
791
830
  drop_input_cols=self._drop_input_cols,
792
831
  expected_output_cols_type="float",
793
832
  )
794
- expected_output_cols = self._align_expected_output_names(
833
+ expected_output_cols, _ = self._align_expected_output(
795
834
  inference_method, dataset, expected_output_cols, output_cols_prefix
796
835
  )
797
836
  elif isinstance(dataset, pd.DataFrame):
@@ -856,7 +895,7 @@ class Perceptron(BaseTransformer):
856
895
  drop_input_cols=self._drop_input_cols,
857
896
  expected_output_cols_type="float",
858
897
  )
859
- expected_output_cols = self._align_expected_output_names(
898
+ expected_output_cols, _ = self._align_expected_output(
860
899
  inference_method, dataset, expected_output_cols, output_cols_prefix
861
900
  )
862
901
 
@@ -921,7 +960,7 @@ class Perceptron(BaseTransformer):
921
960
  drop_input_cols = self._drop_input_cols,
922
961
  expected_output_cols_type="float",
923
962
  )
924
- expected_output_cols = self._align_expected_output_names(
963
+ expected_output_cols, _ = self._align_expected_output(
925
964
  inference_method, dataset, expected_output_cols, output_cols_prefix
926
965
  )
927
966
 
@@ -4,14 +4,12 @@
4
4
  #
5
5
  import inspect
6
6
  import os
7
- import posixpath
8
- from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
9
- from typing_extensions import TypeGuard
7
+ from typing import Iterable, Optional, Union, List, Any, Dict, Set, Tuple
10
8
  from uuid import uuid4
11
9
 
12
10
  import cloudpickle as cp
13
- import pandas as pd
14
11
  import numpy as np
12
+ import pandas as pd
15
13
  from numpy import typing as npt
16
14
 
17
15
 
@@ -24,12 +22,11 @@ from snowflake.ml.modeling.framework.base import BaseTransformer, _process_cols
24
22
  from snowflake.ml._internal import telemetry
25
23
  from snowflake.ml._internal.exceptions import error_codes, exceptions, modeling_error_messages
26
24
  from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
27
- from snowflake.ml._internal.utils import pkg_version_utils, identifier
25
+ from snowflake.ml._internal.utils import identifier
28
26
  from snowflake.snowpark import DataFrame, Session
29
27
  from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
28
  from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
31
29
  from snowflake.ml.modeling._internal.transformer_protocols import (
32
- ModelTransformHandlers,
33
30
  BatchInferenceKwargsTypedDict,
34
31
  ScoreKwargsTypedDict
35
32
  )
@@ -522,12 +519,23 @@ class PoissonRegressor(BaseTransformer):
522
519
  autogenerated=self._autogenerated,
523
520
  subproject=_SUBPROJECT,
524
521
  )
525
- output_result, fitted_estimator = model_trainer.train_fit_predict(
526
- drop_input_cols=self._drop_input_cols,
527
- expected_output_cols_list=(
528
- self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix)
529
- ),
522
+ expected_output_cols = (
523
+ self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix)
530
524
  )
525
+ if isinstance(dataset, DataFrame):
526
+ expected_output_cols, example_output_pd_df = self._align_expected_output(
527
+ "fit_predict", dataset, expected_output_cols, output_cols_prefix
528
+ )
529
+ output_result, fitted_estimator = model_trainer.train_fit_predict(
530
+ drop_input_cols=self._drop_input_cols,
531
+ expected_output_cols_list=expected_output_cols,
532
+ example_output_pd_df=example_output_pd_df,
533
+ )
534
+ else:
535
+ output_result, fitted_estimator = model_trainer.train_fit_predict(
536
+ drop_input_cols=self._drop_input_cols,
537
+ expected_output_cols_list=expected_output_cols,
538
+ )
531
539
  self._sklearn_object = fitted_estimator
532
540
  self._is_fitted = True
533
541
  return output_result
@@ -606,12 +614,41 @@ class PoissonRegressor(BaseTransformer):
606
614
 
607
615
  return rv
608
616
 
609
- def _align_expected_output_names(
610
- self, method: str, dataset: DataFrame, expected_output_cols_list: List[str], output_cols_prefix: str
611
- ) -> List[str]:
617
+ def _align_expected_output(
618
+ self, method: str, dataset: DataFrame, expected_output_cols_list: List[str], output_cols_prefix: str,
619
+ ) -> Tuple[List[str], pd.DataFrame]:
620
+ """ Run 1 line of data with the desired method, and return one tuple that consists of the output column names
621
+ and output dataframe with 1 line.
622
+ If the method is fit_predict, run 2 lines of data.
623
+ """
612
624
  # in case the inferred output column names dimension is different
613
625
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
614
- sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
626
+
627
+ # For fit_predict method, a minimum of 2 is required by MinCovDet, BayesianGaussianMixture
628
+ # so change the minimum of number of rows to 2
629
+ num_examples = 2
630
+ statement_params = telemetry.get_function_usage_statement_params(
631
+ project=_PROJECT,
632
+ subproject=_SUBPROJECT,
633
+ function_name=telemetry.get_statement_params_full_func_name(
634
+ inspect.currentframe(), PoissonRegressor.__class__.__name__
635
+ ),
636
+ api_calls=[Session.call],
637
+ custom_tags={"autogen": True} if self._autogenerated else None,
638
+ )
639
+ if output_cols_prefix == "fit_predict_":
640
+ if hasattr(self._sklearn_object, "n_clusters"):
641
+ # cluster classes such as BisectingKMeansTest requires # of examples >= n_clusters
642
+ num_examples = self._sklearn_object.n_clusters
643
+ elif hasattr(self._sklearn_object, "min_samples"):
644
+ # OPTICS default min_samples 5, which requires at least 5 lines of data
645
+ num_examples = self._sklearn_object.min_samples
646
+ elif hasattr(self._sklearn_object, "n_neighbors") and hasattr(self._sklearn_object, "n_samples"):
647
+ # LocalOutlierFactor expects n_neighbors <= n_samples
648
+ num_examples = self._sklearn_object.n_neighbors
649
+ sample_pd_df = dataset.select(self.input_cols).limit(num_examples).to_pandas(statement_params=statement_params)
650
+ else:
651
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas(statement_params=statement_params)
615
652
 
616
653
  # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
617
654
  # seen during the fit.
@@ -623,12 +660,14 @@ class PoissonRegressor(BaseTransformer):
623
660
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
624
661
  if self.sample_weight_col:
625
662
  output_df_columns_set -= set(self.sample_weight_col)
663
+
626
664
  # if the dimension of inferred output column names is correct; use it
627
665
  if len(expected_output_cols_list) == len(output_df_columns_set):
628
- return expected_output_cols_list
666
+ return expected_output_cols_list, output_df_pd
629
667
  # otherwise, use the sklearn estimator's output
630
668
  else:
631
- return sorted(list(output_df_columns_set), key=lambda x: output_df_columns.index(x))
669
+ expected_output_cols_list = sorted(list(output_df_columns_set), key=lambda x: output_df_columns.index(x))
670
+ return expected_output_cols_list, output_df_pd[expected_output_cols_list]
632
671
 
633
672
  @available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
634
673
  @telemetry.send_api_usage_telemetry(
@@ -674,7 +713,7 @@ class PoissonRegressor(BaseTransformer):
674
713
  drop_input_cols=self._drop_input_cols,
675
714
  expected_output_cols_type="float",
676
715
  )
677
- expected_output_cols = self._align_expected_output_names(
716
+ expected_output_cols, _ = self._align_expected_output(
678
717
  inference_method, dataset, expected_output_cols, output_cols_prefix
679
718
  )
680
719
 
@@ -740,7 +779,7 @@ class PoissonRegressor(BaseTransformer):
740
779
  drop_input_cols=self._drop_input_cols,
741
780
  expected_output_cols_type="float",
742
781
  )
743
- expected_output_cols = self._align_expected_output_names(
782
+ expected_output_cols, _ = self._align_expected_output(
744
783
  inference_method, dataset, expected_output_cols, output_cols_prefix
745
784
  )
746
785
  elif isinstance(dataset, pd.DataFrame):
@@ -803,7 +842,7 @@ class PoissonRegressor(BaseTransformer):
803
842
  drop_input_cols=self._drop_input_cols,
804
843
  expected_output_cols_type="float",
805
844
  )
806
- expected_output_cols = self._align_expected_output_names(
845
+ expected_output_cols, _ = self._align_expected_output(
807
846
  inference_method, dataset, expected_output_cols, output_cols_prefix
808
847
  )
809
848
 
@@ -868,7 +907,7 @@ class PoissonRegressor(BaseTransformer):
868
907
  drop_input_cols = self._drop_input_cols,
869
908
  expected_output_cols_type="float",
870
909
  )
871
- expected_output_cols = self._align_expected_output_names(
910
+ expected_output_cols, _ = self._align_expected_output(
872
911
  inference_method, dataset, expected_output_cols, output_cols_prefix
873
912
  )
874
913
 
@@ -4,14 +4,12 @@
4
4
  #
5
5
  import inspect
6
6
  import os
7
- import posixpath
8
- from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
9
- from typing_extensions import TypeGuard
7
+ from typing import Iterable, Optional, Union, List, Any, Dict, Set, Tuple
10
8
  from uuid import uuid4
11
9
 
12
10
  import cloudpickle as cp
13
- import pandas as pd
14
11
  import numpy as np
12
+ import pandas as pd
15
13
  from numpy import typing as npt
16
14
 
17
15
 
@@ -24,12 +22,11 @@ from snowflake.ml.modeling.framework.base import BaseTransformer, _process_cols
24
22
  from snowflake.ml._internal import telemetry
25
23
  from snowflake.ml._internal.exceptions import error_codes, exceptions, modeling_error_messages
26
24
  from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
27
- from snowflake.ml._internal.utils import pkg_version_utils, identifier
25
+ from snowflake.ml._internal.utils import identifier
28
26
  from snowflake.snowpark import DataFrame, Session
29
27
  from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
28
  from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
31
29
  from snowflake.ml.modeling._internal.transformer_protocols import (
32
- ModelTransformHandlers,
33
30
  BatchInferenceKwargsTypedDict,
34
31
  ScoreKwargsTypedDict
35
32
  )
@@ -578,12 +575,23 @@ class RANSACRegressor(BaseTransformer):
578
575
  autogenerated=self._autogenerated,
579
576
  subproject=_SUBPROJECT,
580
577
  )
581
- output_result, fitted_estimator = model_trainer.train_fit_predict(
582
- drop_input_cols=self._drop_input_cols,
583
- expected_output_cols_list=(
584
- self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix)
585
- ),
578
+ expected_output_cols = (
579
+ self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix)
586
580
  )
581
+ if isinstance(dataset, DataFrame):
582
+ expected_output_cols, example_output_pd_df = self._align_expected_output(
583
+ "fit_predict", dataset, expected_output_cols, output_cols_prefix
584
+ )
585
+ output_result, fitted_estimator = model_trainer.train_fit_predict(
586
+ drop_input_cols=self._drop_input_cols,
587
+ expected_output_cols_list=expected_output_cols,
588
+ example_output_pd_df=example_output_pd_df,
589
+ )
590
+ else:
591
+ output_result, fitted_estimator = model_trainer.train_fit_predict(
592
+ drop_input_cols=self._drop_input_cols,
593
+ expected_output_cols_list=expected_output_cols,
594
+ )
587
595
  self._sklearn_object = fitted_estimator
588
596
  self._is_fitted = True
589
597
  return output_result
@@ -662,12 +670,41 @@ class RANSACRegressor(BaseTransformer):
662
670
 
663
671
  return rv
664
672
 
665
- def _align_expected_output_names(
666
- self, method: str, dataset: DataFrame, expected_output_cols_list: List[str], output_cols_prefix: str
667
- ) -> List[str]:
673
+ def _align_expected_output(
674
+ self, method: str, dataset: DataFrame, expected_output_cols_list: List[str], output_cols_prefix: str,
675
+ ) -> Tuple[List[str], pd.DataFrame]:
676
+ """ Run 1 line of data with the desired method, and return one tuple that consists of the output column names
677
+ and output dataframe with 1 line.
678
+ If the method is fit_predict, run 2 lines of data.
679
+ """
668
680
  # in case the inferred output column names dimension is different
669
681
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
670
- sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
682
+
683
+ # For fit_predict method, a minimum of 2 is required by MinCovDet, BayesianGaussianMixture
684
+ # so change the minimum of number of rows to 2
685
+ num_examples = 2
686
+ statement_params = telemetry.get_function_usage_statement_params(
687
+ project=_PROJECT,
688
+ subproject=_SUBPROJECT,
689
+ function_name=telemetry.get_statement_params_full_func_name(
690
+ inspect.currentframe(), RANSACRegressor.__class__.__name__
691
+ ),
692
+ api_calls=[Session.call],
693
+ custom_tags={"autogen": True} if self._autogenerated else None,
694
+ )
695
+ if output_cols_prefix == "fit_predict_":
696
+ if hasattr(self._sklearn_object, "n_clusters"):
697
+ # cluster classes such as BisectingKMeansTest requires # of examples >= n_clusters
698
+ num_examples = self._sklearn_object.n_clusters
699
+ elif hasattr(self._sklearn_object, "min_samples"):
700
+ # OPTICS default min_samples 5, which requires at least 5 lines of data
701
+ num_examples = self._sklearn_object.min_samples
702
+ elif hasattr(self._sklearn_object, "n_neighbors") and hasattr(self._sklearn_object, "n_samples"):
703
+ # LocalOutlierFactor expects n_neighbors <= n_samples
704
+ num_examples = self._sklearn_object.n_neighbors
705
+ sample_pd_df = dataset.select(self.input_cols).limit(num_examples).to_pandas(statement_params=statement_params)
706
+ else:
707
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas(statement_params=statement_params)
671
708
 
672
709
  # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
673
710
  # seen during the fit.
@@ -679,12 +716,14 @@ class RANSACRegressor(BaseTransformer):
679
716
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
680
717
  if self.sample_weight_col:
681
718
  output_df_columns_set -= set(self.sample_weight_col)
719
+
682
720
  # if the dimension of inferred output column names is correct; use it
683
721
  if len(expected_output_cols_list) == len(output_df_columns_set):
684
- return expected_output_cols_list
722
+ return expected_output_cols_list, output_df_pd
685
723
  # otherwise, use the sklearn estimator's output
686
724
  else:
687
- return sorted(list(output_df_columns_set), key=lambda x: output_df_columns.index(x))
725
+ expected_output_cols_list = sorted(list(output_df_columns_set), key=lambda x: output_df_columns.index(x))
726
+ return expected_output_cols_list, output_df_pd[expected_output_cols_list]
688
727
 
689
728
  @available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
690
729
  @telemetry.send_api_usage_telemetry(
@@ -730,7 +769,7 @@ class RANSACRegressor(BaseTransformer):
730
769
  drop_input_cols=self._drop_input_cols,
731
770
  expected_output_cols_type="float",
732
771
  )
733
- expected_output_cols = self._align_expected_output_names(
772
+ expected_output_cols, _ = self._align_expected_output(
734
773
  inference_method, dataset, expected_output_cols, output_cols_prefix
735
774
  )
736
775
 
@@ -796,7 +835,7 @@ class RANSACRegressor(BaseTransformer):
796
835
  drop_input_cols=self._drop_input_cols,
797
836
  expected_output_cols_type="float",
798
837
  )
799
- expected_output_cols = self._align_expected_output_names(
838
+ expected_output_cols, _ = self._align_expected_output(
800
839
  inference_method, dataset, expected_output_cols, output_cols_prefix
801
840
  )
802
841
  elif isinstance(dataset, pd.DataFrame):
@@ -859,7 +898,7 @@ class RANSACRegressor(BaseTransformer):
859
898
  drop_input_cols=self._drop_input_cols,
860
899
  expected_output_cols_type="float",
861
900
  )
862
- expected_output_cols = self._align_expected_output_names(
901
+ expected_output_cols, _ = self._align_expected_output(
863
902
  inference_method, dataset, expected_output_cols, output_cols_prefix
864
903
  )
865
904
 
@@ -924,7 +963,7 @@ class RANSACRegressor(BaseTransformer):
924
963
  drop_input_cols = self._drop_input_cols,
925
964
  expected_output_cols_type="float",
926
965
  )
927
- expected_output_cols = self._align_expected_output_names(
966
+ expected_output_cols, _ = self._align_expected_output(
928
967
  inference_method, dataset, expected_output_cols, output_cols_prefix
929
968
  )
930
969
 
@@ -4,14 +4,12 @@
4
4
  #
5
5
  import inspect
6
6
  import os
7
- import posixpath
8
- from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
9
- from typing_extensions import TypeGuard
7
+ from typing import Iterable, Optional, Union, List, Any, Dict, Set, Tuple
10
8
  from uuid import uuid4
11
9
 
12
10
  import cloudpickle as cp
13
- import pandas as pd
14
11
  import numpy as np
12
+ import pandas as pd
15
13
  from numpy import typing as npt
16
14
 
17
15
 
@@ -24,12 +22,11 @@ from snowflake.ml.modeling.framework.base import BaseTransformer, _process_cols
24
22
  from snowflake.ml._internal import telemetry
25
23
  from snowflake.ml._internal.exceptions import error_codes, exceptions, modeling_error_messages
26
24
  from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
27
- from snowflake.ml._internal.utils import pkg_version_utils, identifier
25
+ from snowflake.ml._internal.utils import identifier
28
26
  from snowflake.snowpark import DataFrame, Session
29
27
  from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
28
  from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
31
29
  from snowflake.ml.modeling._internal.transformer_protocols import (
32
- ModelTransformHandlers,
33
30
  BatchInferenceKwargsTypedDict,
34
31
  ScoreKwargsTypedDict
35
32
  )
@@ -570,12 +567,23 @@ class Ridge(BaseTransformer):
570
567
  autogenerated=self._autogenerated,
571
568
  subproject=_SUBPROJECT,
572
569
  )
573
- output_result, fitted_estimator = model_trainer.train_fit_predict(
574
- drop_input_cols=self._drop_input_cols,
575
- expected_output_cols_list=(
576
- self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix)
577
- ),
570
+ expected_output_cols = (
571
+ self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix)
578
572
  )
573
+ if isinstance(dataset, DataFrame):
574
+ expected_output_cols, example_output_pd_df = self._align_expected_output(
575
+ "fit_predict", dataset, expected_output_cols, output_cols_prefix
576
+ )
577
+ output_result, fitted_estimator = model_trainer.train_fit_predict(
578
+ drop_input_cols=self._drop_input_cols,
579
+ expected_output_cols_list=expected_output_cols,
580
+ example_output_pd_df=example_output_pd_df,
581
+ )
582
+ else:
583
+ output_result, fitted_estimator = model_trainer.train_fit_predict(
584
+ drop_input_cols=self._drop_input_cols,
585
+ expected_output_cols_list=expected_output_cols,
586
+ )
579
587
  self._sklearn_object = fitted_estimator
580
588
  self._is_fitted = True
581
589
  return output_result
@@ -654,12 +662,41 @@ class Ridge(BaseTransformer):
654
662
 
655
663
  return rv
656
664
 
657
- def _align_expected_output_names(
658
- self, method: str, dataset: DataFrame, expected_output_cols_list: List[str], output_cols_prefix: str
659
- ) -> List[str]:
665
+ def _align_expected_output(
666
+ self, method: str, dataset: DataFrame, expected_output_cols_list: List[str], output_cols_prefix: str,
667
+ ) -> Tuple[List[str], pd.DataFrame]:
668
+ """ Run 1 line of data with the desired method, and return one tuple that consists of the output column names
669
+ and output dataframe with 1 line.
670
+ If the method is fit_predict, run 2 lines of data.
671
+ """
660
672
  # in case the inferred output column names dimension is different
661
673
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
662
- sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
674
+
675
+ # For fit_predict method, a minimum of 2 is required by MinCovDet, BayesianGaussianMixture
676
+ # so change the minimum of number of rows to 2
677
+ num_examples = 2
678
+ statement_params = telemetry.get_function_usage_statement_params(
679
+ project=_PROJECT,
680
+ subproject=_SUBPROJECT,
681
+ function_name=telemetry.get_statement_params_full_func_name(
682
+ inspect.currentframe(), Ridge.__class__.__name__
683
+ ),
684
+ api_calls=[Session.call],
685
+ custom_tags={"autogen": True} if self._autogenerated else None,
686
+ )
687
+ if output_cols_prefix == "fit_predict_":
688
+ if hasattr(self._sklearn_object, "n_clusters"):
689
+ # cluster classes such as BisectingKMeansTest requires # of examples >= n_clusters
690
+ num_examples = self._sklearn_object.n_clusters
691
+ elif hasattr(self._sklearn_object, "min_samples"):
692
+ # OPTICS default min_samples 5, which requires at least 5 lines of data
693
+ num_examples = self._sklearn_object.min_samples
694
+ elif hasattr(self._sklearn_object, "n_neighbors") and hasattr(self._sklearn_object, "n_samples"):
695
+ # LocalOutlierFactor expects n_neighbors <= n_samples
696
+ num_examples = self._sklearn_object.n_neighbors
697
+ sample_pd_df = dataset.select(self.input_cols).limit(num_examples).to_pandas(statement_params=statement_params)
698
+ else:
699
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas(statement_params=statement_params)
663
700
 
664
701
  # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
665
702
  # seen during the fit.
@@ -671,12 +708,14 @@ class Ridge(BaseTransformer):
671
708
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
672
709
  if self.sample_weight_col:
673
710
  output_df_columns_set -= set(self.sample_weight_col)
711
+
674
712
  # if the dimension of inferred output column names is correct; use it
675
713
  if len(expected_output_cols_list) == len(output_df_columns_set):
676
- return expected_output_cols_list
714
+ return expected_output_cols_list, output_df_pd
677
715
  # otherwise, use the sklearn estimator's output
678
716
  else:
679
- return sorted(list(output_df_columns_set), key=lambda x: output_df_columns.index(x))
717
+ expected_output_cols_list = sorted(list(output_df_columns_set), key=lambda x: output_df_columns.index(x))
718
+ return expected_output_cols_list, output_df_pd[expected_output_cols_list]
680
719
 
681
720
  @available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
682
721
  @telemetry.send_api_usage_telemetry(
@@ -722,7 +761,7 @@ class Ridge(BaseTransformer):
722
761
  drop_input_cols=self._drop_input_cols,
723
762
  expected_output_cols_type="float",
724
763
  )
725
- expected_output_cols = self._align_expected_output_names(
764
+ expected_output_cols, _ = self._align_expected_output(
726
765
  inference_method, dataset, expected_output_cols, output_cols_prefix
727
766
  )
728
767
 
@@ -788,7 +827,7 @@ class Ridge(BaseTransformer):
788
827
  drop_input_cols=self._drop_input_cols,
789
828
  expected_output_cols_type="float",
790
829
  )
791
- expected_output_cols = self._align_expected_output_names(
830
+ expected_output_cols, _ = self._align_expected_output(
792
831
  inference_method, dataset, expected_output_cols, output_cols_prefix
793
832
  )
794
833
  elif isinstance(dataset, pd.DataFrame):
@@ -851,7 +890,7 @@ class Ridge(BaseTransformer):
851
890
  drop_input_cols=self._drop_input_cols,
852
891
  expected_output_cols_type="float",
853
892
  )
854
- expected_output_cols = self._align_expected_output_names(
893
+ expected_output_cols, _ = self._align_expected_output(
855
894
  inference_method, dataset, expected_output_cols, output_cols_prefix
856
895
  )
857
896
 
@@ -916,7 +955,7 @@ class Ridge(BaseTransformer):
916
955
  drop_input_cols = self._drop_input_cols,
917
956
  expected_output_cols_type="float",
918
957
  )
919
- expected_output_cols = self._align_expected_output_names(
958
+ expected_output_cols, _ = self._align_expected_output(
920
959
  inference_method, dataset, expected_output_cols, output_cols_prefix
921
960
  )
922
961