snowflake-ml-python 1.5.1__py3-none-any.whl → 1.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (174) hide show
  1. snowflake/cortex/_sentiment.py +7 -4
  2. snowflake/ml/_internal/utils/temp_file_utils.py +5 -2
  3. snowflake/ml/feature_store/access_manager.py +34 -30
  4. snowflake/ml/feature_store/feature_store.py +1 -1
  5. snowflake/ml/feature_store/feature_view.py +12 -11
  6. snowflake/ml/fileset/snowfs.py +2 -31
  7. snowflake/ml/model/_client/ops/model_ops.py +43 -0
  8. snowflake/ml/model/_client/sql/model_version.py +53 -1
  9. snowflake/ml/model/_model_composer/model_composer.py +6 -2
  10. snowflake/ml/model/_packager/model_meta/model_meta.py +1 -3
  11. snowflake/ml/model/_packager/model_runtime/model_runtime.py +3 -27
  12. snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +58 -139
  13. snowflake/ml/modeling/_internal/snowpark_implementations/distributed_search_udf_file.py +159 -0
  14. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +8 -1
  15. snowflake/ml/modeling/cluster/affinity_propagation.py +8 -1
  16. snowflake/ml/modeling/cluster/agglomerative_clustering.py +8 -1
  17. snowflake/ml/modeling/cluster/birch.py +8 -1
  18. snowflake/ml/modeling/cluster/bisecting_k_means.py +8 -1
  19. snowflake/ml/modeling/cluster/dbscan.py +8 -1
  20. snowflake/ml/modeling/cluster/feature_agglomeration.py +8 -1
  21. snowflake/ml/modeling/cluster/k_means.py +8 -1
  22. snowflake/ml/modeling/cluster/mean_shift.py +8 -1
  23. snowflake/ml/modeling/cluster/mini_batch_k_means.py +8 -1
  24. snowflake/ml/modeling/cluster/optics.py +8 -1
  25. snowflake/ml/modeling/cluster/spectral_biclustering.py +8 -1
  26. snowflake/ml/modeling/cluster/spectral_clustering.py +8 -1
  27. snowflake/ml/modeling/cluster/spectral_coclustering.py +8 -1
  28. snowflake/ml/modeling/compose/column_transformer.py +8 -1
  29. snowflake/ml/modeling/compose/transformed_target_regressor.py +8 -1
  30. snowflake/ml/modeling/covariance/elliptic_envelope.py +8 -1
  31. snowflake/ml/modeling/covariance/empirical_covariance.py +8 -1
  32. snowflake/ml/modeling/covariance/graphical_lasso.py +8 -1
  33. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +8 -1
  34. snowflake/ml/modeling/covariance/ledoit_wolf.py +8 -1
  35. snowflake/ml/modeling/covariance/min_cov_det.py +8 -1
  36. snowflake/ml/modeling/covariance/oas.py +8 -1
  37. snowflake/ml/modeling/covariance/shrunk_covariance.py +8 -1
  38. snowflake/ml/modeling/decomposition/dictionary_learning.py +8 -1
  39. snowflake/ml/modeling/decomposition/factor_analysis.py +8 -1
  40. snowflake/ml/modeling/decomposition/fast_ica.py +8 -1
  41. snowflake/ml/modeling/decomposition/incremental_pca.py +8 -1
  42. snowflake/ml/modeling/decomposition/kernel_pca.py +8 -1
  43. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +8 -1
  44. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +8 -1
  45. snowflake/ml/modeling/decomposition/pca.py +8 -1
  46. snowflake/ml/modeling/decomposition/sparse_pca.py +8 -1
  47. snowflake/ml/modeling/decomposition/truncated_svd.py +8 -1
  48. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +8 -1
  49. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +8 -1
  50. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +8 -1
  51. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +8 -1
  52. snowflake/ml/modeling/ensemble/bagging_classifier.py +8 -1
  53. snowflake/ml/modeling/ensemble/bagging_regressor.py +8 -1
  54. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +8 -1
  55. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +8 -1
  56. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +8 -1
  57. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +8 -1
  58. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +8 -1
  59. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +8 -1
  60. snowflake/ml/modeling/ensemble/isolation_forest.py +8 -1
  61. snowflake/ml/modeling/ensemble/random_forest_classifier.py +8 -1
  62. snowflake/ml/modeling/ensemble/random_forest_regressor.py +8 -1
  63. snowflake/ml/modeling/ensemble/stacking_regressor.py +8 -1
  64. snowflake/ml/modeling/ensemble/voting_classifier.py +8 -1
  65. snowflake/ml/modeling/ensemble/voting_regressor.py +8 -1
  66. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +8 -1
  67. snowflake/ml/modeling/feature_selection/select_fdr.py +8 -1
  68. snowflake/ml/modeling/feature_selection/select_fpr.py +8 -1
  69. snowflake/ml/modeling/feature_selection/select_fwe.py +8 -1
  70. snowflake/ml/modeling/feature_selection/select_k_best.py +8 -1
  71. snowflake/ml/modeling/feature_selection/select_percentile.py +8 -1
  72. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +8 -1
  73. snowflake/ml/modeling/feature_selection/variance_threshold.py +8 -1
  74. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +8 -1
  75. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +8 -1
  76. snowflake/ml/modeling/impute/iterative_imputer.py +8 -1
  77. snowflake/ml/modeling/impute/knn_imputer.py +8 -1
  78. snowflake/ml/modeling/impute/missing_indicator.py +8 -1
  79. snowflake/ml/modeling/impute/simple_imputer.py +21 -2
  80. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +8 -1
  81. snowflake/ml/modeling/kernel_approximation/nystroem.py +8 -1
  82. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +8 -1
  83. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +8 -1
  84. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +8 -1
  85. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +8 -1
  86. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +8 -1
  87. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +8 -1
  88. snowflake/ml/modeling/linear_model/ard_regression.py +8 -1
  89. snowflake/ml/modeling/linear_model/bayesian_ridge.py +8 -1
  90. snowflake/ml/modeling/linear_model/elastic_net.py +8 -1
  91. snowflake/ml/modeling/linear_model/elastic_net_cv.py +8 -1
  92. snowflake/ml/modeling/linear_model/gamma_regressor.py +8 -1
  93. snowflake/ml/modeling/linear_model/huber_regressor.py +8 -1
  94. snowflake/ml/modeling/linear_model/lars.py +8 -1
  95. snowflake/ml/modeling/linear_model/lars_cv.py +8 -1
  96. snowflake/ml/modeling/linear_model/lasso.py +8 -1
  97. snowflake/ml/modeling/linear_model/lasso_cv.py +8 -1
  98. snowflake/ml/modeling/linear_model/lasso_lars.py +8 -1
  99. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +8 -1
  100. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +8 -1
  101. snowflake/ml/modeling/linear_model/linear_regression.py +8 -1
  102. snowflake/ml/modeling/linear_model/logistic_regression.py +8 -1
  103. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +8 -1
  104. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +8 -1
  105. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +8 -1
  106. snowflake/ml/modeling/linear_model/multi_task_lasso.py +8 -1
  107. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +8 -1
  108. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +8 -1
  109. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +8 -1
  110. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +8 -1
  111. snowflake/ml/modeling/linear_model/perceptron.py +8 -1
  112. snowflake/ml/modeling/linear_model/poisson_regressor.py +8 -1
  113. snowflake/ml/modeling/linear_model/ransac_regressor.py +8 -1
  114. snowflake/ml/modeling/linear_model/ridge.py +8 -1
  115. snowflake/ml/modeling/linear_model/ridge_classifier.py +8 -1
  116. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +8 -1
  117. snowflake/ml/modeling/linear_model/ridge_cv.py +8 -1
  118. snowflake/ml/modeling/linear_model/sgd_classifier.py +8 -1
  119. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +8 -1
  120. snowflake/ml/modeling/linear_model/sgd_regressor.py +8 -1
  121. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +8 -1
  122. snowflake/ml/modeling/linear_model/tweedie_regressor.py +8 -1
  123. snowflake/ml/modeling/manifold/isomap.py +8 -1
  124. snowflake/ml/modeling/manifold/mds.py +8 -1
  125. snowflake/ml/modeling/manifold/spectral_embedding.py +8 -1
  126. snowflake/ml/modeling/manifold/tsne.py +8 -1
  127. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +8 -1
  128. snowflake/ml/modeling/mixture/gaussian_mixture.py +8 -1
  129. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +8 -1
  130. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +8 -1
  131. snowflake/ml/modeling/multiclass/output_code_classifier.py +8 -1
  132. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +8 -1
  133. snowflake/ml/modeling/naive_bayes/categorical_nb.py +8 -1
  134. snowflake/ml/modeling/naive_bayes/complement_nb.py +8 -1
  135. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +8 -1
  136. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +8 -1
  137. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +8 -1
  138. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +8 -1
  139. snowflake/ml/modeling/neighbors/kernel_density.py +8 -1
  140. snowflake/ml/modeling/neighbors/local_outlier_factor.py +8 -1
  141. snowflake/ml/modeling/neighbors/nearest_centroid.py +8 -1
  142. snowflake/ml/modeling/neighbors/nearest_neighbors.py +8 -1
  143. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +8 -1
  144. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +8 -1
  145. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +8 -1
  146. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +8 -1
  147. snowflake/ml/modeling/neural_network/mlp_classifier.py +8 -1
  148. snowflake/ml/modeling/neural_network/mlp_regressor.py +8 -1
  149. snowflake/ml/modeling/parameters/enable_anonymous_sproc.py +5 -0
  150. snowflake/ml/modeling/preprocessing/polynomial_features.py +8 -1
  151. snowflake/ml/modeling/semi_supervised/label_propagation.py +8 -1
  152. snowflake/ml/modeling/semi_supervised/label_spreading.py +8 -1
  153. snowflake/ml/modeling/svm/linear_svc.py +8 -1
  154. snowflake/ml/modeling/svm/linear_svr.py +8 -1
  155. snowflake/ml/modeling/svm/nu_svc.py +8 -1
  156. snowflake/ml/modeling/svm/nu_svr.py +8 -1
  157. snowflake/ml/modeling/svm/svc.py +8 -1
  158. snowflake/ml/modeling/svm/svr.py +8 -1
  159. snowflake/ml/modeling/tree/decision_tree_classifier.py +8 -1
  160. snowflake/ml/modeling/tree/decision_tree_regressor.py +8 -1
  161. snowflake/ml/modeling/tree/extra_tree_classifier.py +8 -1
  162. snowflake/ml/modeling/tree/extra_tree_regressor.py +8 -1
  163. snowflake/ml/modeling/xgboost/xgb_classifier.py +8 -1
  164. snowflake/ml/modeling/xgboost/xgb_regressor.py +8 -1
  165. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +8 -1
  166. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +8 -1
  167. snowflake/ml/registry/_manager/model_manager.py +59 -1
  168. snowflake/ml/registry/registry.py +10 -1
  169. snowflake/ml/version.py +1 -1
  170. {snowflake_ml_python-1.5.1.dist-info → snowflake_ml_python-1.5.2.dist-info}/METADATA +13 -1
  171. {snowflake_ml_python-1.5.1.dist-info → snowflake_ml_python-1.5.2.dist-info}/RECORD +174 -172
  172. {snowflake_ml_python-1.5.1.dist-info → snowflake_ml_python-1.5.2.dist-info}/LICENSE.txt +0 -0
  173. {snowflake_ml_python-1.5.1.dist-info → snowflake_ml_python-1.5.2.dist-info}/WHEEL +0 -0
  174. {snowflake_ml_python-1.5.1.dist-info → snowflake_ml_python-1.5.2.dist-info}/top_level.txt +0 -0
@@ -600,7 +600,14 @@ class SpectralCoclustering(BaseTransformer):
600
600
  ) -> List[str]:
601
601
  # in case the inferred output column names dimension is different
602
602
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
603
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
603
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
604
+
605
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
606
+ # seen during the fit.
607
+ snowpark_column_names = dataset.select(self.input_cols).columns
608
+ sample_pd_df.columns = snowpark_column_names
609
+
610
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
604
611
  output_df_columns = list(output_df_pd.columns)
605
612
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
606
613
  if self.sample_weight_col:
@@ -634,7 +634,14 @@ class ColumnTransformer(BaseTransformer):
634
634
  ) -> List[str]:
635
635
  # in case the inferred output column names dimension is different
636
636
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
637
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
637
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
638
+
639
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
640
+ # seen during the fit.
641
+ snowpark_column_names = dataset.select(self.input_cols).columns
642
+ sample_pd_df.columns = snowpark_column_names
643
+
644
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
638
645
  output_df_columns = list(output_df_pd.columns)
639
646
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
640
647
  if self.sample_weight_col:
@@ -593,7 +593,14 @@ class TransformedTargetRegressor(BaseTransformer):
593
593
  ) -> List[str]:
594
594
  # in case the inferred output column names dimension is different
595
595
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
596
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
596
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
597
+
598
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
599
+ # seen during the fit.
600
+ snowpark_column_names = dataset.select(self.input_cols).columns
601
+ sample_pd_df.columns = snowpark_column_names
602
+
603
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
597
604
  output_df_columns = list(output_df_pd.columns)
598
605
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
599
606
  if self.sample_weight_col:
@@ -590,7 +590,14 @@ class EllipticEnvelope(BaseTransformer):
590
590
  ) -> List[str]:
591
591
  # in case the inferred output column names dimension is different
592
592
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
593
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
593
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
594
+
595
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
596
+ # seen during the fit.
597
+ snowpark_column_names = dataset.select(self.input_cols).columns
598
+ sample_pd_df.columns = snowpark_column_names
599
+
600
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
594
601
  output_df_columns = list(output_df_pd.columns)
595
602
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
596
603
  if self.sample_weight_col:
@@ -562,7 +562,14 @@ class EmpiricalCovariance(BaseTransformer):
562
562
  ) -> List[str]:
563
563
  # in case the inferred output column names dimension is different
564
564
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
565
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
565
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
566
+
567
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
568
+ # seen during the fit.
569
+ snowpark_column_names = dataset.select(self.input_cols).columns
570
+ sample_pd_df.columns = snowpark_column_names
571
+
572
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
566
573
  output_df_columns = list(output_df_pd.columns)
567
574
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
568
575
  if self.sample_weight_col:
@@ -610,7 +610,14 @@ class GraphicalLasso(BaseTransformer):
610
610
  ) -> List[str]:
611
611
  # in case the inferred output column names dimension is different
612
612
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
613
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
613
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
614
+
615
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
616
+ # seen during the fit.
617
+ snowpark_column_names = dataset.select(self.input_cols).columns
618
+ sample_pd_df.columns = snowpark_column_names
619
+
620
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
614
621
  output_df_columns = list(output_df_pd.columns)
615
622
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
616
623
  if self.sample_weight_col:
@@ -636,7 +636,14 @@ class GraphicalLassoCV(BaseTransformer):
636
636
  ) -> List[str]:
637
637
  # in case the inferred output column names dimension is different
638
638
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
639
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
639
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
640
+
641
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
642
+ # seen during the fit.
643
+ snowpark_column_names = dataset.select(self.input_cols).columns
644
+ sample_pd_df.columns = snowpark_column_names
645
+
646
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
640
647
  output_df_columns = list(output_df_pd.columns)
641
648
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
642
649
  if self.sample_weight_col:
@@ -569,7 +569,14 @@ class LedoitWolf(BaseTransformer):
569
569
  ) -> List[str]:
570
570
  # in case the inferred output column names dimension is different
571
571
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
572
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
572
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
573
+
574
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
575
+ # seen during the fit.
576
+ snowpark_column_names = dataset.select(self.input_cols).columns
577
+ sample_pd_df.columns = snowpark_column_names
578
+
579
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
573
580
  output_df_columns = list(output_df_pd.columns)
574
581
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
575
582
  if self.sample_weight_col:
@@ -581,7 +581,14 @@ class MinCovDet(BaseTransformer):
581
581
  ) -> List[str]:
582
582
  # in case the inferred output column names dimension is different
583
583
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
584
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
584
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
585
+
586
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
587
+ # seen during the fit.
588
+ snowpark_column_names = dataset.select(self.input_cols).columns
589
+ sample_pd_df.columns = snowpark_column_names
590
+
591
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
585
592
  output_df_columns = list(output_df_pd.columns)
586
593
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
587
594
  if self.sample_weight_col:
@@ -562,7 +562,14 @@ class OAS(BaseTransformer):
562
562
  ) -> List[str]:
563
563
  # in case the inferred output column names dimension is different
564
564
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
565
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
565
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
566
+
567
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
568
+ # seen during the fit.
569
+ snowpark_column_names = dataset.select(self.input_cols).columns
570
+ sample_pd_df.columns = snowpark_column_names
571
+
572
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
566
573
  output_df_columns = list(output_df_pd.columns)
567
574
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
568
575
  if self.sample_weight_col:
@@ -568,7 +568,14 @@ class ShrunkCovariance(BaseTransformer):
568
568
  ) -> List[str]:
569
569
  # in case the inferred output column names dimension is different
570
570
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
571
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
571
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
572
+
573
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
574
+ # seen during the fit.
575
+ snowpark_column_names = dataset.select(self.input_cols).columns
576
+ sample_pd_df.columns = snowpark_column_names
577
+
578
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
572
579
  output_df_columns = list(output_df_pd.columns)
573
580
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
574
581
  if self.sample_weight_col:
@@ -678,7 +678,14 @@ class DictionaryLearning(BaseTransformer):
678
678
  ) -> List[str]:
679
679
  # in case the inferred output column names dimension is different
680
680
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
681
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
681
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
682
+
683
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
684
+ # seen during the fit.
685
+ snowpark_column_names = dataset.select(self.input_cols).columns
686
+ sample_pd_df.columns = snowpark_column_names
687
+
688
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
682
689
  output_df_columns = list(output_df_pd.columns)
683
690
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
684
691
  if self.sample_weight_col:
@@ -615,7 +615,14 @@ class FactorAnalysis(BaseTransformer):
615
615
  ) -> List[str]:
616
616
  # in case the inferred output column names dimension is different
617
617
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
618
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
618
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
619
+
620
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
621
+ # seen during the fit.
622
+ snowpark_column_names = dataset.select(self.input_cols).columns
623
+ sample_pd_df.columns = snowpark_column_names
624
+
625
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
619
626
  output_df_columns = list(output_df_pd.columns)
620
627
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
621
628
  if self.sample_weight_col:
@@ -633,7 +633,14 @@ class FastICA(BaseTransformer):
633
633
  ) -> List[str]:
634
634
  # in case the inferred output column names dimension is different
635
635
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
636
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
636
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
637
+
638
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
639
+ # seen during the fit.
640
+ snowpark_column_names = dataset.select(self.input_cols).columns
641
+ sample_pd_df.columns = snowpark_column_names
642
+
643
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
637
644
  output_df_columns = list(output_df_pd.columns)
638
645
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
639
646
  if self.sample_weight_col:
@@ -585,7 +585,14 @@ class IncrementalPCA(BaseTransformer):
585
585
  ) -> List[str]:
586
586
  # in case the inferred output column names dimension is different
587
587
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
588
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
588
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
589
+
590
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
591
+ # seen during the fit.
592
+ snowpark_column_names = dataset.select(self.input_cols).columns
593
+ sample_pd_df.columns = snowpark_column_names
594
+
595
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
589
596
  output_df_columns = list(output_df_pd.columns)
590
597
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
591
598
  if self.sample_weight_col:
@@ -681,7 +681,14 @@ class KernelPCA(BaseTransformer):
681
681
  ) -> List[str]:
682
682
  # in case the inferred output column names dimension is different
683
683
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
684
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
684
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
685
+
686
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
687
+ # seen during the fit.
688
+ snowpark_column_names = dataset.select(self.input_cols).columns
689
+ sample_pd_df.columns = snowpark_column_names
690
+
691
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
685
692
  output_df_columns = list(output_df_pd.columns)
686
693
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
687
694
  if self.sample_weight_col:
@@ -703,7 +703,14 @@ class MiniBatchDictionaryLearning(BaseTransformer):
703
703
  ) -> List[str]:
704
704
  # in case the inferred output column names dimension is different
705
705
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
706
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
706
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
707
+
708
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
709
+ # seen during the fit.
710
+ snowpark_column_names = dataset.select(self.input_cols).columns
711
+ sample_pd_df.columns = snowpark_column_names
712
+
713
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
707
714
  output_df_columns = list(output_df_pd.columns)
708
715
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
709
716
  if self.sample_weight_col:
@@ -648,7 +648,14 @@ class MiniBatchSparsePCA(BaseTransformer):
648
648
  ) -> List[str]:
649
649
  # in case the inferred output column names dimension is different
650
650
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
651
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
651
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
652
+
653
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
654
+ # seen during the fit.
655
+ snowpark_column_names = dataset.select(self.input_cols).columns
656
+ sample_pd_df.columns = snowpark_column_names
657
+
658
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
652
659
  output_df_columns = list(output_df_pd.columns)
653
660
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
654
661
  if self.sample_weight_col:
@@ -650,7 +650,14 @@ class PCA(BaseTransformer):
650
650
  ) -> List[str]:
651
651
  # in case the inferred output column names dimension is different
652
652
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
653
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
653
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
654
+
655
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
656
+ # seen during the fit.
657
+ snowpark_column_names = dataset.select(self.input_cols).columns
658
+ sample_pd_df.columns = snowpark_column_names
659
+
660
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
654
661
  output_df_columns = list(output_df_pd.columns)
655
662
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
656
663
  if self.sample_weight_col:
@@ -623,7 +623,14 @@ class SparsePCA(BaseTransformer):
623
623
  ) -> List[str]:
624
624
  # in case the inferred output column names dimension is different
625
625
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
626
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
626
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
627
+
628
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
629
+ # seen during the fit.
630
+ snowpark_column_names = dataset.select(self.input_cols).columns
631
+ sample_pd_df.columns = snowpark_column_names
632
+
633
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
627
634
  output_df_columns = list(output_df_pd.columns)
628
635
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
629
636
  if self.sample_weight_col:
@@ -604,7 +604,14 @@ class TruncatedSVD(BaseTransformer):
604
604
  ) -> List[str]:
605
605
  # in case the inferred output column names dimension is different
606
606
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
607
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
607
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
608
+
609
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
610
+ # seen during the fit.
611
+ snowpark_column_names = dataset.select(self.input_cols).columns
612
+ sample_pd_df.columns = snowpark_column_names
613
+
614
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
608
615
  output_df_columns = list(output_df_pd.columns)
609
616
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
610
617
  if self.sample_weight_col:
@@ -623,7 +623,14 @@ class LinearDiscriminantAnalysis(BaseTransformer):
623
623
  ) -> List[str]:
624
624
  # in case the inferred output column names dimension is different
625
625
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
626
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
626
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
627
+
628
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
629
+ # seen during the fit.
630
+ snowpark_column_names = dataset.select(self.input_cols).columns
631
+ sample_pd_df.columns = snowpark_column_names
632
+
633
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
627
634
  output_df_columns = list(output_df_pd.columns)
628
635
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
629
636
  if self.sample_weight_col:
@@ -581,7 +581,14 @@ class QuadraticDiscriminantAnalysis(BaseTransformer):
581
581
  ) -> List[str]:
582
582
  # in case the inferred output column names dimension is different
583
583
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
584
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
584
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
585
+
586
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
587
+ # seen during the fit.
588
+ snowpark_column_names = dataset.select(self.input_cols).columns
589
+ sample_pd_df.columns = snowpark_column_names
590
+
591
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
585
592
  output_df_columns = list(output_df_pd.columns)
586
593
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
587
594
  if self.sample_weight_col:
@@ -606,7 +606,14 @@ class AdaBoostClassifier(BaseTransformer):
606
606
  ) -> List[str]:
607
607
  # in case the inferred output column names dimension is different
608
608
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
609
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
609
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
610
+
611
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
612
+ # seen during the fit.
613
+ snowpark_column_names = dataset.select(self.input_cols).columns
614
+ sample_pd_df.columns = snowpark_column_names
615
+
616
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
610
617
  output_df_columns = list(output_df_pd.columns)
611
618
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
612
619
  if self.sample_weight_col:
@@ -603,7 +603,14 @@ class AdaBoostRegressor(BaseTransformer):
603
603
  ) -> List[str]:
604
604
  # in case the inferred output column names dimension is different
605
605
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
606
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
606
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
607
+
608
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
609
+ # seen during the fit.
610
+ snowpark_column_names = dataset.select(self.input_cols).columns
611
+ sample_pd_df.columns = snowpark_column_names
612
+
613
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
607
614
  output_df_columns = list(output_df_pd.columns)
608
615
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
609
616
  if self.sample_weight_col:
@@ -638,7 +638,14 @@ class BaggingClassifier(BaseTransformer):
638
638
  ) -> List[str]:
639
639
  # in case the inferred output column names dimension is different
640
640
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
641
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
641
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
642
+
643
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
644
+ # seen during the fit.
645
+ snowpark_column_names = dataset.select(self.input_cols).columns
646
+ sample_pd_df.columns = snowpark_column_names
647
+
648
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
642
649
  output_df_columns = list(output_df_pd.columns)
643
650
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
644
651
  if self.sample_weight_col:
@@ -638,7 +638,14 @@ class BaggingRegressor(BaseTransformer):
638
638
  ) -> List[str]:
639
639
  # in case the inferred output column names dimension is different
640
640
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
641
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
641
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
642
+
643
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
644
+ # seen during the fit.
645
+ snowpark_column_names = dataset.select(self.input_cols).columns
646
+ sample_pd_df.columns = snowpark_column_names
647
+
648
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
642
649
  output_df_columns = list(output_df_pd.columns)
643
650
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
644
651
  if self.sample_weight_col:
@@ -741,7 +741,14 @@ class ExtraTreesClassifier(BaseTransformer):
741
741
  ) -> List[str]:
742
742
  # in case the inferred output column names dimension is different
743
743
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
744
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
744
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
745
+
746
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
747
+ # seen during the fit.
748
+ snowpark_column_names = dataset.select(self.input_cols).columns
749
+ sample_pd_df.columns = snowpark_column_names
750
+
751
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
745
752
  output_df_columns = list(output_df_pd.columns)
746
753
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
747
754
  if self.sample_weight_col:
@@ -720,7 +720,14 @@ class ExtraTreesRegressor(BaseTransformer):
720
720
  ) -> List[str]:
721
721
  # in case the inferred output column names dimension is different
722
722
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
723
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
723
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
724
+
725
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
726
+ # seen during the fit.
727
+ snowpark_column_names = dataset.select(self.input_cols).columns
728
+ sample_pd_df.columns = snowpark_column_names
729
+
730
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
724
731
  output_df_columns = list(output_df_pd.columns)
725
732
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
726
733
  if self.sample_weight_col:
@@ -753,7 +753,14 @@ class GradientBoostingClassifier(BaseTransformer):
753
753
  ) -> List[str]:
754
754
  # in case the inferred output column names dimension is different
755
755
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
756
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
756
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
757
+
758
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
759
+ # seen during the fit.
760
+ snowpark_column_names = dataset.select(self.input_cols).columns
761
+ sample_pd_df.columns = snowpark_column_names
762
+
763
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
757
764
  output_df_columns = list(output_df_pd.columns)
758
765
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
759
766
  if self.sample_weight_col:
@@ -762,7 +762,14 @@ class GradientBoostingRegressor(BaseTransformer):
762
762
  ) -> List[str]:
763
763
  # in case the inferred output column names dimension is different
764
764
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
765
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
765
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
766
+
767
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
768
+ # seen during the fit.
769
+ snowpark_column_names = dataset.select(self.input_cols).columns
770
+ sample_pd_df.columns = snowpark_column_names
771
+
772
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
766
773
  output_df_columns = list(output_df_pd.columns)
767
774
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
768
775
  if self.sample_weight_col:
@@ -734,7 +734,14 @@ class HistGradientBoostingClassifier(BaseTransformer):
734
734
  ) -> List[str]:
735
735
  # in case the inferred output column names dimension is different
736
736
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
737
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
737
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
738
+
739
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
740
+ # seen during the fit.
741
+ snowpark_column_names = dataset.select(self.input_cols).columns
742
+ sample_pd_df.columns = snowpark_column_names
743
+
744
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
738
745
  output_df_columns = list(output_df_pd.columns)
739
746
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
740
747
  if self.sample_weight_col:
@@ -725,7 +725,14 @@ class HistGradientBoostingRegressor(BaseTransformer):
725
725
  ) -> List[str]:
726
726
  # in case the inferred output column names dimension is different
727
727
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
728
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
728
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
729
+
730
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
731
+ # seen during the fit.
732
+ snowpark_column_names = dataset.select(self.input_cols).columns
733
+ sample_pd_df.columns = snowpark_column_names
734
+
735
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
729
736
  output_df_columns = list(output_df_pd.columns)
730
737
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
731
738
  if self.sample_weight_col:
@@ -627,7 +627,14 @@ class IsolationForest(BaseTransformer):
627
627
  ) -> List[str]:
628
628
  # in case the inferred output column names dimension is different
629
629
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
630
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
630
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
631
+
632
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
633
+ # seen during the fit.
634
+ snowpark_column_names = dataset.select(self.input_cols).columns
635
+ sample_pd_df.columns = snowpark_column_names
636
+
637
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
631
638
  output_df_columns = list(output_df_pd.columns)
632
639
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
633
640
  if self.sample_weight_col:
@@ -737,7 +737,14 @@ class RandomForestClassifier(BaseTransformer):
737
737
  ) -> List[str]:
738
738
  # in case the inferred output column names dimension is different
739
739
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
740
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
740
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
741
+
742
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
743
+ # seen during the fit.
744
+ snowpark_column_names = dataset.select(self.input_cols).columns
745
+ sample_pd_df.columns = snowpark_column_names
746
+
747
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
741
748
  output_df_columns = list(output_df_pd.columns)
742
749
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
743
750
  if self.sample_weight_col:
@@ -716,7 +716,14 @@ class RandomForestRegressor(BaseTransformer):
716
716
  ) -> List[str]:
717
717
  # in case the inferred output column names dimension is different
718
718
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
719
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
719
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
720
+
721
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
722
+ # seen during the fit.
723
+ snowpark_column_names = dataset.select(self.input_cols).columns
724
+ sample_pd_df.columns = snowpark_column_names
725
+
726
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
720
727
  output_df_columns = list(output_df_pd.columns)
721
728
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
722
729
  if self.sample_weight_col: