snowflake-ml-python 1.5.0__py3-none-any.whl → 1.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (197) hide show
  1. snowflake/cortex/_sentiment.py +7 -4
  2. snowflake/ml/_internal/env_utils.py +6 -0
  3. snowflake/ml/_internal/lineage/lineage_utils.py +95 -0
  4. snowflake/ml/_internal/telemetry.py +1 -0
  5. snowflake/ml/_internal/utils/identifier.py +1 -1
  6. snowflake/ml/_internal/utils/sql_identifier.py +14 -1
  7. snowflake/ml/_internal/utils/temp_file_utils.py +5 -2
  8. snowflake/ml/dataset/__init__.py +2 -1
  9. snowflake/ml/dataset/dataset.py +4 -3
  10. snowflake/ml/dataset/dataset_reader.py +5 -8
  11. snowflake/ml/feature_store/__init__.py +6 -0
  12. snowflake/ml/feature_store/access_manager.py +283 -0
  13. snowflake/ml/feature_store/feature_store.py +160 -100
  14. snowflake/ml/feature_store/feature_view.py +30 -19
  15. snowflake/ml/fileset/embedded_stage_fs.py +15 -12
  16. snowflake/ml/fileset/snowfs.py +2 -30
  17. snowflake/ml/fileset/stage_fs.py +25 -7
  18. snowflake/ml/model/_client/model/model_impl.py +46 -39
  19. snowflake/ml/model/_client/model/model_version_impl.py +24 -2
  20. snowflake/ml/model/_client/ops/metadata_ops.py +27 -4
  21. snowflake/ml/model/_client/ops/model_ops.py +174 -16
  22. snowflake/ml/model/_client/sql/_base.py +34 -0
  23. snowflake/ml/model/_client/sql/model.py +32 -39
  24. snowflake/ml/model/_client/sql/model_version.py +111 -42
  25. snowflake/ml/model/_client/sql/stage.py +6 -32
  26. snowflake/ml/model/_client/sql/tag.py +32 -56
  27. snowflake/ml/model/_model_composer/model_composer.py +8 -4
  28. snowflake/ml/model/_packager/model_handlers/mlflow.py +2 -1
  29. snowflake/ml/model/_packager/model_meta/model_meta.py +1 -3
  30. snowflake/ml/model/_packager/model_runtime/model_runtime.py +3 -27
  31. snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +90 -142
  32. snowflake/ml/modeling/_internal/snowpark_implementations/distributed_search_udf_file.py +159 -0
  33. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +81 -3
  34. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +8 -1
  35. snowflake/ml/modeling/cluster/affinity_propagation.py +8 -1
  36. snowflake/ml/modeling/cluster/agglomerative_clustering.py +8 -1
  37. snowflake/ml/modeling/cluster/birch.py +8 -1
  38. snowflake/ml/modeling/cluster/bisecting_k_means.py +8 -1
  39. snowflake/ml/modeling/cluster/dbscan.py +8 -1
  40. snowflake/ml/modeling/cluster/feature_agglomeration.py +8 -1
  41. snowflake/ml/modeling/cluster/k_means.py +8 -1
  42. snowflake/ml/modeling/cluster/mean_shift.py +8 -1
  43. snowflake/ml/modeling/cluster/mini_batch_k_means.py +8 -1
  44. snowflake/ml/modeling/cluster/optics.py +8 -1
  45. snowflake/ml/modeling/cluster/spectral_biclustering.py +8 -1
  46. snowflake/ml/modeling/cluster/spectral_clustering.py +8 -1
  47. snowflake/ml/modeling/cluster/spectral_coclustering.py +8 -1
  48. snowflake/ml/modeling/compose/column_transformer.py +8 -1
  49. snowflake/ml/modeling/compose/transformed_target_regressor.py +8 -1
  50. snowflake/ml/modeling/covariance/elliptic_envelope.py +8 -1
  51. snowflake/ml/modeling/covariance/empirical_covariance.py +8 -1
  52. snowflake/ml/modeling/covariance/graphical_lasso.py +8 -1
  53. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +8 -1
  54. snowflake/ml/modeling/covariance/ledoit_wolf.py +8 -1
  55. snowflake/ml/modeling/covariance/min_cov_det.py +8 -1
  56. snowflake/ml/modeling/covariance/oas.py +8 -1
  57. snowflake/ml/modeling/covariance/shrunk_covariance.py +8 -1
  58. snowflake/ml/modeling/decomposition/dictionary_learning.py +8 -1
  59. snowflake/ml/modeling/decomposition/factor_analysis.py +8 -1
  60. snowflake/ml/modeling/decomposition/fast_ica.py +8 -1
  61. snowflake/ml/modeling/decomposition/incremental_pca.py +8 -1
  62. snowflake/ml/modeling/decomposition/kernel_pca.py +8 -1
  63. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +8 -1
  64. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +8 -1
  65. snowflake/ml/modeling/decomposition/pca.py +8 -1
  66. snowflake/ml/modeling/decomposition/sparse_pca.py +8 -1
  67. snowflake/ml/modeling/decomposition/truncated_svd.py +8 -1
  68. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +8 -1
  69. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +8 -1
  70. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +8 -1
  71. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +8 -1
  72. snowflake/ml/modeling/ensemble/bagging_classifier.py +8 -1
  73. snowflake/ml/modeling/ensemble/bagging_regressor.py +8 -1
  74. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +8 -1
  75. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +8 -1
  76. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +8 -1
  77. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +8 -1
  78. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +8 -1
  79. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +8 -1
  80. snowflake/ml/modeling/ensemble/isolation_forest.py +8 -1
  81. snowflake/ml/modeling/ensemble/random_forest_classifier.py +8 -1
  82. snowflake/ml/modeling/ensemble/random_forest_regressor.py +8 -1
  83. snowflake/ml/modeling/ensemble/stacking_regressor.py +8 -1
  84. snowflake/ml/modeling/ensemble/voting_classifier.py +8 -1
  85. snowflake/ml/modeling/ensemble/voting_regressor.py +8 -1
  86. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +8 -1
  87. snowflake/ml/modeling/feature_selection/select_fdr.py +8 -1
  88. snowflake/ml/modeling/feature_selection/select_fpr.py +8 -1
  89. snowflake/ml/modeling/feature_selection/select_fwe.py +8 -1
  90. snowflake/ml/modeling/feature_selection/select_k_best.py +8 -1
  91. snowflake/ml/modeling/feature_selection/select_percentile.py +8 -1
  92. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +8 -1
  93. snowflake/ml/modeling/feature_selection/variance_threshold.py +8 -1
  94. snowflake/ml/modeling/framework/base.py +4 -3
  95. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +8 -1
  96. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +8 -1
  97. snowflake/ml/modeling/impute/iterative_imputer.py +8 -1
  98. snowflake/ml/modeling/impute/knn_imputer.py +8 -1
  99. snowflake/ml/modeling/impute/missing_indicator.py +8 -1
  100. snowflake/ml/modeling/impute/simple_imputer.py +21 -2
  101. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +8 -1
  102. snowflake/ml/modeling/kernel_approximation/nystroem.py +8 -1
  103. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +8 -1
  104. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +8 -1
  105. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +8 -1
  106. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +8 -1
  107. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +8 -1
  108. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +8 -1
  109. snowflake/ml/modeling/linear_model/ard_regression.py +8 -1
  110. snowflake/ml/modeling/linear_model/bayesian_ridge.py +8 -1
  111. snowflake/ml/modeling/linear_model/elastic_net.py +8 -1
  112. snowflake/ml/modeling/linear_model/elastic_net_cv.py +8 -1
  113. snowflake/ml/modeling/linear_model/gamma_regressor.py +8 -1
  114. snowflake/ml/modeling/linear_model/huber_regressor.py +8 -1
  115. snowflake/ml/modeling/linear_model/lars.py +8 -1
  116. snowflake/ml/modeling/linear_model/lars_cv.py +8 -1
  117. snowflake/ml/modeling/linear_model/lasso.py +8 -1
  118. snowflake/ml/modeling/linear_model/lasso_cv.py +8 -1
  119. snowflake/ml/modeling/linear_model/lasso_lars.py +8 -1
  120. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +8 -1
  121. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +8 -1
  122. snowflake/ml/modeling/linear_model/linear_regression.py +8 -1
  123. snowflake/ml/modeling/linear_model/logistic_regression.py +8 -1
  124. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +8 -1
  125. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +8 -1
  126. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +8 -1
  127. snowflake/ml/modeling/linear_model/multi_task_lasso.py +8 -1
  128. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +8 -1
  129. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +8 -1
  130. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +8 -1
  131. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +8 -1
  132. snowflake/ml/modeling/linear_model/perceptron.py +8 -1
  133. snowflake/ml/modeling/linear_model/poisson_regressor.py +8 -1
  134. snowflake/ml/modeling/linear_model/ransac_regressor.py +8 -1
  135. snowflake/ml/modeling/linear_model/ridge.py +8 -1
  136. snowflake/ml/modeling/linear_model/ridge_classifier.py +8 -1
  137. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +8 -1
  138. snowflake/ml/modeling/linear_model/ridge_cv.py +8 -1
  139. snowflake/ml/modeling/linear_model/sgd_classifier.py +8 -1
  140. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +8 -1
  141. snowflake/ml/modeling/linear_model/sgd_regressor.py +8 -1
  142. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +8 -1
  143. snowflake/ml/modeling/linear_model/tweedie_regressor.py +8 -1
  144. snowflake/ml/modeling/manifold/isomap.py +8 -1
  145. snowflake/ml/modeling/manifold/mds.py +8 -1
  146. snowflake/ml/modeling/manifold/spectral_embedding.py +8 -1
  147. snowflake/ml/modeling/manifold/tsne.py +8 -1
  148. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +8 -1
  149. snowflake/ml/modeling/mixture/gaussian_mixture.py +8 -1
  150. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +8 -1
  151. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +8 -1
  152. snowflake/ml/modeling/multiclass/output_code_classifier.py +8 -1
  153. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +8 -1
  154. snowflake/ml/modeling/naive_bayes/categorical_nb.py +8 -1
  155. snowflake/ml/modeling/naive_bayes/complement_nb.py +8 -1
  156. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +8 -1
  157. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +8 -1
  158. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +8 -1
  159. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +8 -1
  160. snowflake/ml/modeling/neighbors/kernel_density.py +8 -1
  161. snowflake/ml/modeling/neighbors/local_outlier_factor.py +8 -1
  162. snowflake/ml/modeling/neighbors/nearest_centroid.py +8 -1
  163. snowflake/ml/modeling/neighbors/nearest_neighbors.py +8 -1
  164. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +8 -1
  165. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +8 -1
  166. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +8 -1
  167. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +8 -1
  168. snowflake/ml/modeling/neural_network/mlp_classifier.py +8 -1
  169. snowflake/ml/modeling/neural_network/mlp_regressor.py +8 -1
  170. snowflake/ml/modeling/parameters/enable_anonymous_sproc.py +5 -0
  171. snowflake/ml/modeling/pipeline/pipeline.py +27 -7
  172. snowflake/ml/modeling/preprocessing/polynomial_features.py +8 -1
  173. snowflake/ml/modeling/semi_supervised/label_propagation.py +8 -1
  174. snowflake/ml/modeling/semi_supervised/label_spreading.py +8 -1
  175. snowflake/ml/modeling/svm/linear_svc.py +8 -1
  176. snowflake/ml/modeling/svm/linear_svr.py +8 -1
  177. snowflake/ml/modeling/svm/nu_svc.py +8 -1
  178. snowflake/ml/modeling/svm/nu_svr.py +8 -1
  179. snowflake/ml/modeling/svm/svc.py +8 -1
  180. snowflake/ml/modeling/svm/svr.py +8 -1
  181. snowflake/ml/modeling/tree/decision_tree_classifier.py +8 -1
  182. snowflake/ml/modeling/tree/decision_tree_regressor.py +8 -1
  183. snowflake/ml/modeling/tree/extra_tree_classifier.py +8 -1
  184. snowflake/ml/modeling/tree/extra_tree_regressor.py +8 -1
  185. snowflake/ml/modeling/xgboost/xgb_classifier.py +8 -1
  186. snowflake/ml/modeling/xgboost/xgb_regressor.py +8 -1
  187. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +8 -1
  188. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +8 -1
  189. snowflake/ml/registry/_manager/model_manager.py +95 -8
  190. snowflake/ml/registry/registry.py +10 -1
  191. snowflake/ml/version.py +1 -1
  192. {snowflake_ml_python-1.5.0.dist-info → snowflake_ml_python-1.5.2.dist-info}/METADATA +66 -10
  193. {snowflake_ml_python-1.5.0.dist-info → snowflake_ml_python-1.5.2.dist-info}/RECORD +196 -192
  194. snowflake/ml/_internal/lineage/dataset_dataframe.py +0 -44
  195. {snowflake_ml_python-1.5.0.dist-info → snowflake_ml_python-1.5.2.dist-info}/LICENSE.txt +0 -0
  196. {snowflake_ml_python-1.5.0.dist-info → snowflake_ml_python-1.5.2.dist-info}/WHEEL +0 -0
  197. {snowflake_ml_python-1.5.0.dist-info → snowflake_ml_python-1.5.2.dist-info}/top_level.txt +0 -0
@@ -637,7 +637,14 @@ class AgglomerativeClustering(BaseTransformer):
637
637
  ) -> List[str]:
638
638
  # in case the inferred output column names dimension is different
639
639
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
640
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
640
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
641
+
642
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
643
+ # seen during the fit.
644
+ snowpark_column_names = dataset.select(self.input_cols).columns
645
+ sample_pd_df.columns = snowpark_column_names
646
+
647
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
641
648
  output_df_columns = list(output_df_pd.columns)
642
649
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
643
650
  if self.sample_weight_col:
@@ -601,7 +601,14 @@ class Birch(BaseTransformer):
601
601
  ) -> List[str]:
602
602
  # in case the inferred output column names dimension is different
603
603
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
604
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
604
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
605
+
606
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
607
+ # seen during the fit.
608
+ snowpark_column_names = dataset.select(self.input_cols).columns
609
+ sample_pd_df.columns = snowpark_column_names
610
+
611
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
605
612
  output_df_columns = list(output_df_pd.columns)
606
613
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
607
614
  if self.sample_weight_col:
@@ -650,7 +650,14 @@ class BisectingKMeans(BaseTransformer):
650
650
  ) -> List[str]:
651
651
  # in case the inferred output column names dimension is different
652
652
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
653
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
653
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
654
+
655
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
656
+ # seen during the fit.
657
+ snowpark_column_names = dataset.select(self.input_cols).columns
658
+ sample_pd_df.columns = snowpark_column_names
659
+
660
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
654
661
  output_df_columns = list(output_df_pd.columns)
655
662
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
656
663
  if self.sample_weight_col:
@@ -612,7 +612,14 @@ class DBSCAN(BaseTransformer):
612
612
  ) -> List[str]:
613
613
  # in case the inferred output column names dimension is different
614
614
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
615
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
615
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
616
+
617
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
618
+ # seen during the fit.
619
+ snowpark_column_names = dataset.select(self.input_cols).columns
620
+ sample_pd_df.columns = snowpark_column_names
621
+
622
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
616
623
  output_df_columns = list(output_df_pd.columns)
617
624
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
618
625
  if self.sample_weight_col:
@@ -648,7 +648,14 @@ class FeatureAgglomeration(BaseTransformer):
648
648
  ) -> List[str]:
649
649
  # in case the inferred output column names dimension is different
650
650
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
651
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
651
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
652
+
653
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
654
+ # seen during the fit.
655
+ snowpark_column_names = dataset.select(self.input_cols).columns
656
+ sample_pd_df.columns = snowpark_column_names
657
+
658
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
652
659
  output_df_columns = list(output_df_pd.columns)
653
660
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
654
661
  if self.sample_weight_col:
@@ -645,7 +645,14 @@ class KMeans(BaseTransformer):
645
645
  ) -> List[str]:
646
646
  # in case the inferred output column names dimension is different
647
647
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
648
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
648
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
649
+
650
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
651
+ # seen during the fit.
652
+ snowpark_column_names = dataset.select(self.input_cols).columns
653
+ sample_pd_df.columns = snowpark_column_names
654
+
655
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
649
656
  output_df_columns = list(output_df_pd.columns)
650
657
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
651
658
  if self.sample_weight_col:
@@ -617,7 +617,14 @@ class MeanShift(BaseTransformer):
617
617
  ) -> List[str]:
618
618
  # in case the inferred output column names dimension is different
619
619
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
620
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
620
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
621
+
622
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
623
+ # seen during the fit.
624
+ snowpark_column_names = dataset.select(self.input_cols).columns
625
+ sample_pd_df.columns = snowpark_column_names
626
+
627
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
621
628
  output_df_columns = list(output_df_pd.columns)
622
629
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
623
630
  if self.sample_weight_col:
@@ -671,7 +671,14 @@ class MiniBatchKMeans(BaseTransformer):
671
671
  ) -> List[str]:
672
672
  # in case the inferred output column names dimension is different
673
673
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
674
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
674
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
675
+
676
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
677
+ # seen during the fit.
678
+ snowpark_column_names = dataset.select(self.input_cols).columns
679
+ sample_pd_df.columns = snowpark_column_names
680
+
681
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
675
682
  output_df_columns = list(output_df_pd.columns)
676
683
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
677
684
  if self.sample_weight_col:
@@ -685,7 +685,14 @@ class OPTICS(BaseTransformer):
685
685
  ) -> List[str]:
686
686
  # in case the inferred output column names dimension is different
687
687
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
688
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
688
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
689
+
690
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
691
+ # seen during the fit.
692
+ snowpark_column_names = dataset.select(self.input_cols).columns
693
+ sample_pd_df.columns = snowpark_column_names
694
+
695
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
689
696
  output_df_columns = list(output_df_pd.columns)
690
697
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
691
698
  if self.sample_weight_col:
@@ -621,7 +621,14 @@ class SpectralBiclustering(BaseTransformer):
621
621
  ) -> List[str]:
622
622
  # in case the inferred output column names dimension is different
623
623
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
624
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
624
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
625
+
626
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
627
+ # seen during the fit.
628
+ snowpark_column_names = dataset.select(self.input_cols).columns
629
+ sample_pd_df.columns = snowpark_column_names
630
+
631
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
625
632
  output_df_columns = list(output_df_pd.columns)
626
633
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
627
634
  if self.sample_weight_col:
@@ -681,7 +681,14 @@ class SpectralClustering(BaseTransformer):
681
681
  ) -> List[str]:
682
682
  # in case the inferred output column names dimension is different
683
683
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
684
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
684
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
685
+
686
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
687
+ # seen during the fit.
688
+ snowpark_column_names = dataset.select(self.input_cols).columns
689
+ sample_pd_df.columns = snowpark_column_names
690
+
691
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
685
692
  output_df_columns = list(output_df_pd.columns)
686
693
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
687
694
  if self.sample_weight_col:
@@ -600,7 +600,14 @@ class SpectralCoclustering(BaseTransformer):
600
600
  ) -> List[str]:
601
601
  # in case the inferred output column names dimension is different
602
602
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
603
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
603
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
604
+
605
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
606
+ # seen during the fit.
607
+ snowpark_column_names = dataset.select(self.input_cols).columns
608
+ sample_pd_df.columns = snowpark_column_names
609
+
610
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
604
611
  output_df_columns = list(output_df_pd.columns)
605
612
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
606
613
  if self.sample_weight_col:
@@ -634,7 +634,14 @@ class ColumnTransformer(BaseTransformer):
634
634
  ) -> List[str]:
635
635
  # in case the inferred output column names dimension is different
636
636
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
637
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
637
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
638
+
639
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
640
+ # seen during the fit.
641
+ snowpark_column_names = dataset.select(self.input_cols).columns
642
+ sample_pd_df.columns = snowpark_column_names
643
+
644
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
638
645
  output_df_columns = list(output_df_pd.columns)
639
646
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
640
647
  if self.sample_weight_col:
@@ -593,7 +593,14 @@ class TransformedTargetRegressor(BaseTransformer):
593
593
  ) -> List[str]:
594
594
  # in case the inferred output column names dimension is different
595
595
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
596
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
596
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
597
+
598
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
599
+ # seen during the fit.
600
+ snowpark_column_names = dataset.select(self.input_cols).columns
601
+ sample_pd_df.columns = snowpark_column_names
602
+
603
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
597
604
  output_df_columns = list(output_df_pd.columns)
598
605
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
599
606
  if self.sample_weight_col:
@@ -590,7 +590,14 @@ class EllipticEnvelope(BaseTransformer):
590
590
  ) -> List[str]:
591
591
  # in case the inferred output column names dimension is different
592
592
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
593
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
593
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
594
+
595
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
596
+ # seen during the fit.
597
+ snowpark_column_names = dataset.select(self.input_cols).columns
598
+ sample_pd_df.columns = snowpark_column_names
599
+
600
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
594
601
  output_df_columns = list(output_df_pd.columns)
595
602
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
596
603
  if self.sample_weight_col:
@@ -562,7 +562,14 @@ class EmpiricalCovariance(BaseTransformer):
562
562
  ) -> List[str]:
563
563
  # in case the inferred output column names dimension is different
564
564
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
565
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
565
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
566
+
567
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
568
+ # seen during the fit.
569
+ snowpark_column_names = dataset.select(self.input_cols).columns
570
+ sample_pd_df.columns = snowpark_column_names
571
+
572
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
566
573
  output_df_columns = list(output_df_pd.columns)
567
574
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
568
575
  if self.sample_weight_col:
@@ -610,7 +610,14 @@ class GraphicalLasso(BaseTransformer):
610
610
  ) -> List[str]:
611
611
  # in case the inferred output column names dimension is different
612
612
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
613
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
613
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
614
+
615
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
616
+ # seen during the fit.
617
+ snowpark_column_names = dataset.select(self.input_cols).columns
618
+ sample_pd_df.columns = snowpark_column_names
619
+
620
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
614
621
  output_df_columns = list(output_df_pd.columns)
615
622
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
616
623
  if self.sample_weight_col:
@@ -636,7 +636,14 @@ class GraphicalLassoCV(BaseTransformer):
636
636
  ) -> List[str]:
637
637
  # in case the inferred output column names dimension is different
638
638
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
639
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
639
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
640
+
641
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
642
+ # seen during the fit.
643
+ snowpark_column_names = dataset.select(self.input_cols).columns
644
+ sample_pd_df.columns = snowpark_column_names
645
+
646
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
640
647
  output_df_columns = list(output_df_pd.columns)
641
648
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
642
649
  if self.sample_weight_col:
@@ -569,7 +569,14 @@ class LedoitWolf(BaseTransformer):
569
569
  ) -> List[str]:
570
570
  # in case the inferred output column names dimension is different
571
571
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
572
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
572
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
573
+
574
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
575
+ # seen during the fit.
576
+ snowpark_column_names = dataset.select(self.input_cols).columns
577
+ sample_pd_df.columns = snowpark_column_names
578
+
579
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
573
580
  output_df_columns = list(output_df_pd.columns)
574
581
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
575
582
  if self.sample_weight_col:
@@ -581,7 +581,14 @@ class MinCovDet(BaseTransformer):
581
581
  ) -> List[str]:
582
582
  # in case the inferred output column names dimension is different
583
583
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
584
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
584
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
585
+
586
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
587
+ # seen during the fit.
588
+ snowpark_column_names = dataset.select(self.input_cols).columns
589
+ sample_pd_df.columns = snowpark_column_names
590
+
591
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
585
592
  output_df_columns = list(output_df_pd.columns)
586
593
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
587
594
  if self.sample_weight_col:
@@ -562,7 +562,14 @@ class OAS(BaseTransformer):
562
562
  ) -> List[str]:
563
563
  # in case the inferred output column names dimension is different
564
564
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
565
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
565
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
566
+
567
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
568
+ # seen during the fit.
569
+ snowpark_column_names = dataset.select(self.input_cols).columns
570
+ sample_pd_df.columns = snowpark_column_names
571
+
572
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
566
573
  output_df_columns = list(output_df_pd.columns)
567
574
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
568
575
  if self.sample_weight_col:
@@ -568,7 +568,14 @@ class ShrunkCovariance(BaseTransformer):
568
568
  ) -> List[str]:
569
569
  # in case the inferred output column names dimension is different
570
570
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
571
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
571
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
572
+
573
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
574
+ # seen during the fit.
575
+ snowpark_column_names = dataset.select(self.input_cols).columns
576
+ sample_pd_df.columns = snowpark_column_names
577
+
578
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
572
579
  output_df_columns = list(output_df_pd.columns)
573
580
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
574
581
  if self.sample_weight_col:
@@ -678,7 +678,14 @@ class DictionaryLearning(BaseTransformer):
678
678
  ) -> List[str]:
679
679
  # in case the inferred output column names dimension is different
680
680
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
681
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
681
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
682
+
683
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
684
+ # seen during the fit.
685
+ snowpark_column_names = dataset.select(self.input_cols).columns
686
+ sample_pd_df.columns = snowpark_column_names
687
+
688
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
682
689
  output_df_columns = list(output_df_pd.columns)
683
690
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
684
691
  if self.sample_weight_col:
@@ -615,7 +615,14 @@ class FactorAnalysis(BaseTransformer):
615
615
  ) -> List[str]:
616
616
  # in case the inferred output column names dimension is different
617
617
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
618
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
618
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
619
+
620
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
621
+ # seen during the fit.
622
+ snowpark_column_names = dataset.select(self.input_cols).columns
623
+ sample_pd_df.columns = snowpark_column_names
624
+
625
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
619
626
  output_df_columns = list(output_df_pd.columns)
620
627
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
621
628
  if self.sample_weight_col:
@@ -633,7 +633,14 @@ class FastICA(BaseTransformer):
633
633
  ) -> List[str]:
634
634
  # in case the inferred output column names dimension is different
635
635
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
636
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
636
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
637
+
638
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
639
+ # seen during the fit.
640
+ snowpark_column_names = dataset.select(self.input_cols).columns
641
+ sample_pd_df.columns = snowpark_column_names
642
+
643
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
637
644
  output_df_columns = list(output_df_pd.columns)
638
645
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
639
646
  if self.sample_weight_col:
@@ -585,7 +585,14 @@ class IncrementalPCA(BaseTransformer):
585
585
  ) -> List[str]:
586
586
  # in case the inferred output column names dimension is different
587
587
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
588
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
588
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
589
+
590
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
591
+ # seen during the fit.
592
+ snowpark_column_names = dataset.select(self.input_cols).columns
593
+ sample_pd_df.columns = snowpark_column_names
594
+
595
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
589
596
  output_df_columns = list(output_df_pd.columns)
590
597
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
591
598
  if self.sample_weight_col:
@@ -681,7 +681,14 @@ class KernelPCA(BaseTransformer):
681
681
  ) -> List[str]:
682
682
  # in case the inferred output column names dimension is different
683
683
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
684
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
684
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
685
+
686
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
687
+ # seen during the fit.
688
+ snowpark_column_names = dataset.select(self.input_cols).columns
689
+ sample_pd_df.columns = snowpark_column_names
690
+
691
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
685
692
  output_df_columns = list(output_df_pd.columns)
686
693
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
687
694
  if self.sample_weight_col:
@@ -703,7 +703,14 @@ class MiniBatchDictionaryLearning(BaseTransformer):
703
703
  ) -> List[str]:
704
704
  # in case the inferred output column names dimension is different
705
705
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
706
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
706
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
707
+
708
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
709
+ # seen during the fit.
710
+ snowpark_column_names = dataset.select(self.input_cols).columns
711
+ sample_pd_df.columns = snowpark_column_names
712
+
713
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
707
714
  output_df_columns = list(output_df_pd.columns)
708
715
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
709
716
  if self.sample_weight_col:
@@ -648,7 +648,14 @@ class MiniBatchSparsePCA(BaseTransformer):
648
648
  ) -> List[str]:
649
649
  # in case the inferred output column names dimension is different
650
650
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
651
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
651
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
652
+
653
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
654
+ # seen during the fit.
655
+ snowpark_column_names = dataset.select(self.input_cols).columns
656
+ sample_pd_df.columns = snowpark_column_names
657
+
658
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
652
659
  output_df_columns = list(output_df_pd.columns)
653
660
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
654
661
  if self.sample_weight_col:
@@ -650,7 +650,14 @@ class PCA(BaseTransformer):
650
650
  ) -> List[str]:
651
651
  # in case the inferred output column names dimension is different
652
652
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
653
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
653
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
654
+
655
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
656
+ # seen during the fit.
657
+ snowpark_column_names = dataset.select(self.input_cols).columns
658
+ sample_pd_df.columns = snowpark_column_names
659
+
660
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
654
661
  output_df_columns = list(output_df_pd.columns)
655
662
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
656
663
  if self.sample_weight_col:
@@ -623,7 +623,14 @@ class SparsePCA(BaseTransformer):
623
623
  ) -> List[str]:
624
624
  # in case the inferred output column names dimension is different
625
625
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
626
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
626
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
627
+
628
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
629
+ # seen during the fit.
630
+ snowpark_column_names = dataset.select(self.input_cols).columns
631
+ sample_pd_df.columns = snowpark_column_names
632
+
633
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
627
634
  output_df_columns = list(output_df_pd.columns)
628
635
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
629
636
  if self.sample_weight_col:
@@ -604,7 +604,14 @@ class TruncatedSVD(BaseTransformer):
604
604
  ) -> List[str]:
605
605
  # in case the inferred output column names dimension is different
606
606
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
607
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
607
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
608
+
609
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
610
+ # seen during the fit.
611
+ snowpark_column_names = dataset.select(self.input_cols).columns
612
+ sample_pd_df.columns = snowpark_column_names
613
+
614
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
608
615
  output_df_columns = list(output_df_pd.columns)
609
616
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
610
617
  if self.sample_weight_col:
@@ -623,7 +623,14 @@ class LinearDiscriminantAnalysis(BaseTransformer):
623
623
  ) -> List[str]:
624
624
  # in case the inferred output column names dimension is different
625
625
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
626
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
626
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
627
+
628
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
629
+ # seen during the fit.
630
+ snowpark_column_names = dataset.select(self.input_cols).columns
631
+ sample_pd_df.columns = snowpark_column_names
632
+
633
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
627
634
  output_df_columns = list(output_df_pd.columns)
628
635
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
629
636
  if self.sample_weight_col:
@@ -581,7 +581,14 @@ class QuadraticDiscriminantAnalysis(BaseTransformer):
581
581
  ) -> List[str]:
582
582
  # in case the inferred output column names dimension is different
583
583
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
584
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
584
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
585
+
586
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
587
+ # seen during the fit.
588
+ snowpark_column_names = dataset.select(self.input_cols).columns
589
+ sample_pd_df.columns = snowpark_column_names
590
+
591
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
585
592
  output_df_columns = list(output_df_pd.columns)
586
593
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
587
594
  if self.sample_weight_col:
@@ -606,7 +606,14 @@ class AdaBoostClassifier(BaseTransformer):
606
606
  ) -> List[str]:
607
607
  # in case the inferred output column names dimension is different
608
608
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
609
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
609
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
610
+
611
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
612
+ # seen during the fit.
613
+ snowpark_column_names = dataset.select(self.input_cols).columns
614
+ sample_pd_df.columns = snowpark_column_names
615
+
616
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
610
617
  output_df_columns = list(output_df_pd.columns)
611
618
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
612
619
  if self.sample_weight_col:
@@ -603,7 +603,14 @@ class AdaBoostRegressor(BaseTransformer):
603
603
  ) -> List[str]:
604
604
  # in case the inferred output column names dimension is different
605
605
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
606
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
606
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
607
+
608
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
609
+ # seen during the fit.
610
+ snowpark_column_names = dataset.select(self.input_cols).columns
611
+ sample_pd_df.columns = snowpark_column_names
612
+
613
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
607
614
  output_df_columns = list(output_df_pd.columns)
608
615
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
609
616
  if self.sample_weight_col: