snowflake-ml-python 1.5.1__py3-none-any.whl → 1.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (174) hide show
  1. snowflake/cortex/_sentiment.py +7 -4
  2. snowflake/ml/_internal/utils/temp_file_utils.py +5 -2
  3. snowflake/ml/feature_store/access_manager.py +34 -30
  4. snowflake/ml/feature_store/feature_store.py +1 -1
  5. snowflake/ml/feature_store/feature_view.py +12 -11
  6. snowflake/ml/fileset/snowfs.py +2 -31
  7. snowflake/ml/model/_client/ops/model_ops.py +43 -0
  8. snowflake/ml/model/_client/sql/model_version.py +53 -1
  9. snowflake/ml/model/_model_composer/model_composer.py +6 -2
  10. snowflake/ml/model/_packager/model_meta/model_meta.py +1 -3
  11. snowflake/ml/model/_packager/model_runtime/model_runtime.py +3 -27
  12. snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +58 -139
  13. snowflake/ml/modeling/_internal/snowpark_implementations/distributed_search_udf_file.py +159 -0
  14. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +8 -1
  15. snowflake/ml/modeling/cluster/affinity_propagation.py +8 -1
  16. snowflake/ml/modeling/cluster/agglomerative_clustering.py +8 -1
  17. snowflake/ml/modeling/cluster/birch.py +8 -1
  18. snowflake/ml/modeling/cluster/bisecting_k_means.py +8 -1
  19. snowflake/ml/modeling/cluster/dbscan.py +8 -1
  20. snowflake/ml/modeling/cluster/feature_agglomeration.py +8 -1
  21. snowflake/ml/modeling/cluster/k_means.py +8 -1
  22. snowflake/ml/modeling/cluster/mean_shift.py +8 -1
  23. snowflake/ml/modeling/cluster/mini_batch_k_means.py +8 -1
  24. snowflake/ml/modeling/cluster/optics.py +8 -1
  25. snowflake/ml/modeling/cluster/spectral_biclustering.py +8 -1
  26. snowflake/ml/modeling/cluster/spectral_clustering.py +8 -1
  27. snowflake/ml/modeling/cluster/spectral_coclustering.py +8 -1
  28. snowflake/ml/modeling/compose/column_transformer.py +8 -1
  29. snowflake/ml/modeling/compose/transformed_target_regressor.py +8 -1
  30. snowflake/ml/modeling/covariance/elliptic_envelope.py +8 -1
  31. snowflake/ml/modeling/covariance/empirical_covariance.py +8 -1
  32. snowflake/ml/modeling/covariance/graphical_lasso.py +8 -1
  33. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +8 -1
  34. snowflake/ml/modeling/covariance/ledoit_wolf.py +8 -1
  35. snowflake/ml/modeling/covariance/min_cov_det.py +8 -1
  36. snowflake/ml/modeling/covariance/oas.py +8 -1
  37. snowflake/ml/modeling/covariance/shrunk_covariance.py +8 -1
  38. snowflake/ml/modeling/decomposition/dictionary_learning.py +8 -1
  39. snowflake/ml/modeling/decomposition/factor_analysis.py +8 -1
  40. snowflake/ml/modeling/decomposition/fast_ica.py +8 -1
  41. snowflake/ml/modeling/decomposition/incremental_pca.py +8 -1
  42. snowflake/ml/modeling/decomposition/kernel_pca.py +8 -1
  43. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +8 -1
  44. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +8 -1
  45. snowflake/ml/modeling/decomposition/pca.py +8 -1
  46. snowflake/ml/modeling/decomposition/sparse_pca.py +8 -1
  47. snowflake/ml/modeling/decomposition/truncated_svd.py +8 -1
  48. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +8 -1
  49. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +8 -1
  50. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +8 -1
  51. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +8 -1
  52. snowflake/ml/modeling/ensemble/bagging_classifier.py +8 -1
  53. snowflake/ml/modeling/ensemble/bagging_regressor.py +8 -1
  54. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +8 -1
  55. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +8 -1
  56. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +8 -1
  57. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +8 -1
  58. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +8 -1
  59. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +8 -1
  60. snowflake/ml/modeling/ensemble/isolation_forest.py +8 -1
  61. snowflake/ml/modeling/ensemble/random_forest_classifier.py +8 -1
  62. snowflake/ml/modeling/ensemble/random_forest_regressor.py +8 -1
  63. snowflake/ml/modeling/ensemble/stacking_regressor.py +8 -1
  64. snowflake/ml/modeling/ensemble/voting_classifier.py +8 -1
  65. snowflake/ml/modeling/ensemble/voting_regressor.py +8 -1
  66. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +8 -1
  67. snowflake/ml/modeling/feature_selection/select_fdr.py +8 -1
  68. snowflake/ml/modeling/feature_selection/select_fpr.py +8 -1
  69. snowflake/ml/modeling/feature_selection/select_fwe.py +8 -1
  70. snowflake/ml/modeling/feature_selection/select_k_best.py +8 -1
  71. snowflake/ml/modeling/feature_selection/select_percentile.py +8 -1
  72. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +8 -1
  73. snowflake/ml/modeling/feature_selection/variance_threshold.py +8 -1
  74. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +8 -1
  75. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +8 -1
  76. snowflake/ml/modeling/impute/iterative_imputer.py +8 -1
  77. snowflake/ml/modeling/impute/knn_imputer.py +8 -1
  78. snowflake/ml/modeling/impute/missing_indicator.py +8 -1
  79. snowflake/ml/modeling/impute/simple_imputer.py +21 -2
  80. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +8 -1
  81. snowflake/ml/modeling/kernel_approximation/nystroem.py +8 -1
  82. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +8 -1
  83. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +8 -1
  84. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +8 -1
  85. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +8 -1
  86. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +8 -1
  87. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +8 -1
  88. snowflake/ml/modeling/linear_model/ard_regression.py +8 -1
  89. snowflake/ml/modeling/linear_model/bayesian_ridge.py +8 -1
  90. snowflake/ml/modeling/linear_model/elastic_net.py +8 -1
  91. snowflake/ml/modeling/linear_model/elastic_net_cv.py +8 -1
  92. snowflake/ml/modeling/linear_model/gamma_regressor.py +8 -1
  93. snowflake/ml/modeling/linear_model/huber_regressor.py +8 -1
  94. snowflake/ml/modeling/linear_model/lars.py +8 -1
  95. snowflake/ml/modeling/linear_model/lars_cv.py +8 -1
  96. snowflake/ml/modeling/linear_model/lasso.py +8 -1
  97. snowflake/ml/modeling/linear_model/lasso_cv.py +8 -1
  98. snowflake/ml/modeling/linear_model/lasso_lars.py +8 -1
  99. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +8 -1
  100. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +8 -1
  101. snowflake/ml/modeling/linear_model/linear_regression.py +8 -1
  102. snowflake/ml/modeling/linear_model/logistic_regression.py +8 -1
  103. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +8 -1
  104. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +8 -1
  105. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +8 -1
  106. snowflake/ml/modeling/linear_model/multi_task_lasso.py +8 -1
  107. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +8 -1
  108. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +8 -1
  109. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +8 -1
  110. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +8 -1
  111. snowflake/ml/modeling/linear_model/perceptron.py +8 -1
  112. snowflake/ml/modeling/linear_model/poisson_regressor.py +8 -1
  113. snowflake/ml/modeling/linear_model/ransac_regressor.py +8 -1
  114. snowflake/ml/modeling/linear_model/ridge.py +8 -1
  115. snowflake/ml/modeling/linear_model/ridge_classifier.py +8 -1
  116. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +8 -1
  117. snowflake/ml/modeling/linear_model/ridge_cv.py +8 -1
  118. snowflake/ml/modeling/linear_model/sgd_classifier.py +8 -1
  119. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +8 -1
  120. snowflake/ml/modeling/linear_model/sgd_regressor.py +8 -1
  121. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +8 -1
  122. snowflake/ml/modeling/linear_model/tweedie_regressor.py +8 -1
  123. snowflake/ml/modeling/manifold/isomap.py +8 -1
  124. snowflake/ml/modeling/manifold/mds.py +8 -1
  125. snowflake/ml/modeling/manifold/spectral_embedding.py +8 -1
  126. snowflake/ml/modeling/manifold/tsne.py +8 -1
  127. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +8 -1
  128. snowflake/ml/modeling/mixture/gaussian_mixture.py +8 -1
  129. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +8 -1
  130. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +8 -1
  131. snowflake/ml/modeling/multiclass/output_code_classifier.py +8 -1
  132. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +8 -1
  133. snowflake/ml/modeling/naive_bayes/categorical_nb.py +8 -1
  134. snowflake/ml/modeling/naive_bayes/complement_nb.py +8 -1
  135. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +8 -1
  136. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +8 -1
  137. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +8 -1
  138. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +8 -1
  139. snowflake/ml/modeling/neighbors/kernel_density.py +8 -1
  140. snowflake/ml/modeling/neighbors/local_outlier_factor.py +8 -1
  141. snowflake/ml/modeling/neighbors/nearest_centroid.py +8 -1
  142. snowflake/ml/modeling/neighbors/nearest_neighbors.py +8 -1
  143. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +8 -1
  144. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +8 -1
  145. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +8 -1
  146. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +8 -1
  147. snowflake/ml/modeling/neural_network/mlp_classifier.py +8 -1
  148. snowflake/ml/modeling/neural_network/mlp_regressor.py +8 -1
  149. snowflake/ml/modeling/parameters/enable_anonymous_sproc.py +5 -0
  150. snowflake/ml/modeling/preprocessing/polynomial_features.py +8 -1
  151. snowflake/ml/modeling/semi_supervised/label_propagation.py +8 -1
  152. snowflake/ml/modeling/semi_supervised/label_spreading.py +8 -1
  153. snowflake/ml/modeling/svm/linear_svc.py +8 -1
  154. snowflake/ml/modeling/svm/linear_svr.py +8 -1
  155. snowflake/ml/modeling/svm/nu_svc.py +8 -1
  156. snowflake/ml/modeling/svm/nu_svr.py +8 -1
  157. snowflake/ml/modeling/svm/svc.py +8 -1
  158. snowflake/ml/modeling/svm/svr.py +8 -1
  159. snowflake/ml/modeling/tree/decision_tree_classifier.py +8 -1
  160. snowflake/ml/modeling/tree/decision_tree_regressor.py +8 -1
  161. snowflake/ml/modeling/tree/extra_tree_classifier.py +8 -1
  162. snowflake/ml/modeling/tree/extra_tree_regressor.py +8 -1
  163. snowflake/ml/modeling/xgboost/xgb_classifier.py +8 -1
  164. snowflake/ml/modeling/xgboost/xgb_regressor.py +8 -1
  165. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +8 -1
  166. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +8 -1
  167. snowflake/ml/registry/_manager/model_manager.py +59 -1
  168. snowflake/ml/registry/registry.py +10 -1
  169. snowflake/ml/version.py +1 -1
  170. {snowflake_ml_python-1.5.1.dist-info → snowflake_ml_python-1.5.2.dist-info}/METADATA +13 -1
  171. {snowflake_ml_python-1.5.1.dist-info → snowflake_ml_python-1.5.2.dist-info}/RECORD +174 -172
  172. {snowflake_ml_python-1.5.1.dist-info → snowflake_ml_python-1.5.2.dist-info}/LICENSE.txt +0 -0
  173. {snowflake_ml_python-1.5.1.dist-info → snowflake_ml_python-1.5.2.dist-info}/WHEEL +0 -0
  174. {snowflake_ml_python-1.5.1.dist-info → snowflake_ml_python-1.5.2.dist-info}/top_level.txt +0 -0
@@ -652,7 +652,14 @@ class LassoCV(BaseTransformer):
652
652
  ) -> List[str]:
653
653
  # in case the inferred output column names dimension is different
654
654
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
655
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
655
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
656
+
657
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
658
+ # seen during the fit.
659
+ snowpark_column_names = dataset.select(self.input_cols).columns
660
+ sample_pd_df.columns = snowpark_column_names
661
+
662
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
656
663
  output_df_columns = list(output_df_pd.columns)
657
664
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
658
665
  if self.sample_weight_col:
@@ -644,7 +644,14 @@ class LassoLars(BaseTransformer):
644
644
  ) -> List[str]:
645
645
  # in case the inferred output column names dimension is different
646
646
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
647
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
647
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
648
+
649
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
650
+ # seen during the fit.
651
+ snowpark_column_names = dataset.select(self.input_cols).columns
652
+ sample_pd_df.columns = snowpark_column_names
653
+
654
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
648
655
  output_df_columns = list(output_df_pd.columns)
649
656
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
650
657
  if self.sample_weight_col:
@@ -645,7 +645,14 @@ class LassoLarsCV(BaseTransformer):
645
645
  ) -> List[str]:
646
646
  # in case the inferred output column names dimension is different
647
647
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
648
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
648
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
649
+
650
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
651
+ # seen during the fit.
652
+ snowpark_column_names = dataset.select(self.input_cols).columns
653
+ sample_pd_df.columns = snowpark_column_names
654
+
655
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
649
656
  output_df_columns = list(output_df_pd.columns)
650
657
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
651
658
  if self.sample_weight_col:
@@ -628,7 +628,14 @@ class LassoLarsIC(BaseTransformer):
628
628
  ) -> List[str]:
629
629
  # in case the inferred output column names dimension is different
630
630
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
631
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
631
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
632
+
633
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
634
+ # seen during the fit.
635
+ snowpark_column_names = dataset.select(self.input_cols).columns
636
+ sample_pd_df.columns = snowpark_column_names
637
+
638
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
632
639
  output_df_columns = list(output_df_pd.columns)
633
640
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
634
641
  if self.sample_weight_col:
@@ -581,7 +581,14 @@ class LinearRegression(BaseTransformer):
581
581
  ) -> List[str]:
582
582
  # in case the inferred output column names dimension is different
583
583
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
584
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
584
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
585
+
586
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
587
+ # seen during the fit.
588
+ snowpark_column_names = dataset.select(self.input_cols).columns
589
+ sample_pd_df.columns = snowpark_column_names
590
+
591
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
585
592
  output_df_columns = list(output_df_pd.columns)
586
593
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
587
594
  if self.sample_weight_col:
@@ -695,7 +695,14 @@ class LogisticRegression(BaseTransformer):
695
695
  ) -> List[str]:
696
696
  # in case the inferred output column names dimension is different
697
697
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
698
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
698
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
699
+
700
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
701
+ # seen during the fit.
702
+ snowpark_column_names = dataset.select(self.input_cols).columns
703
+ sample_pd_df.columns = snowpark_column_names
704
+
705
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
699
706
  output_df_columns = list(output_df_pd.columns)
700
707
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
701
708
  if self.sample_weight_col:
@@ -716,7 +716,14 @@ class LogisticRegressionCV(BaseTransformer):
716
716
  ) -> List[str]:
717
717
  # in case the inferred output column names dimension is different
718
718
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
719
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
719
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
720
+
721
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
722
+ # seen during the fit.
723
+ snowpark_column_names = dataset.select(self.input_cols).columns
724
+ sample_pd_df.columns = snowpark_column_names
725
+
726
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
720
727
  output_df_columns = list(output_df_pd.columns)
721
728
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
722
729
  if self.sample_weight_col:
@@ -614,7 +614,14 @@ class MultiTaskElasticNet(BaseTransformer):
614
614
  ) -> List[str]:
615
615
  # in case the inferred output column names dimension is different
616
616
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
617
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
617
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
618
+
619
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
620
+ # seen during the fit.
621
+ snowpark_column_names = dataset.select(self.input_cols).columns
622
+ sample_pd_df.columns = snowpark_column_names
623
+
624
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
618
625
  output_df_columns = list(output_df_pd.columns)
619
626
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
620
627
  if self.sample_weight_col:
@@ -655,7 +655,14 @@ class MultiTaskElasticNetCV(BaseTransformer):
655
655
  ) -> List[str]:
656
656
  # in case the inferred output column names dimension is different
657
657
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
658
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
658
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
659
+
660
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
661
+ # seen during the fit.
662
+ snowpark_column_names = dataset.select(self.input_cols).columns
663
+ sample_pd_df.columns = snowpark_column_names
664
+
665
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
659
666
  output_df_columns = list(output_df_pd.columns)
660
667
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
661
668
  if self.sample_weight_col:
@@ -606,7 +606,14 @@ class MultiTaskLasso(BaseTransformer):
606
606
  ) -> List[str]:
607
607
  # in case the inferred output column names dimension is different
608
608
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
609
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
609
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
610
+
611
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
612
+ # seen during the fit.
613
+ snowpark_column_names = dataset.select(self.input_cols).columns
614
+ sample_pd_df.columns = snowpark_column_names
615
+
616
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
610
617
  output_df_columns = list(output_df_pd.columns)
611
618
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
612
619
  if self.sample_weight_col:
@@ -641,7 +641,14 @@ class MultiTaskLassoCV(BaseTransformer):
641
641
  ) -> List[str]:
642
642
  # in case the inferred output column names dimension is different
643
643
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
644
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
644
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
645
+
646
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
647
+ # seen during the fit.
648
+ snowpark_column_names = dataset.select(self.input_cols).columns
649
+ sample_pd_df.columns = snowpark_column_names
650
+
651
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
645
652
  output_df_columns = list(output_df_pd.columns)
646
653
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
647
654
  if self.sample_weight_col:
@@ -589,7 +589,14 @@ class OrthogonalMatchingPursuit(BaseTransformer):
589
589
  ) -> List[str]:
590
590
  # in case the inferred output column names dimension is different
591
591
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
592
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
592
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
593
+
594
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
595
+ # seen during the fit.
596
+ snowpark_column_names = dataset.select(self.input_cols).columns
597
+ sample_pd_df.columns = snowpark_column_names
598
+
599
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
593
600
  output_df_columns = list(output_df_pd.columns)
594
601
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
595
602
  if self.sample_weight_col:
@@ -663,7 +663,14 @@ class PassiveAggressiveClassifier(BaseTransformer):
663
663
  ) -> List[str]:
664
664
  # in case the inferred output column names dimension is different
665
665
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
666
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
666
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
667
+
668
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
669
+ # seen during the fit.
670
+ snowpark_column_names = dataset.select(self.input_cols).columns
671
+ sample_pd_df.columns = snowpark_column_names
672
+
673
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
667
674
  output_df_columns = list(output_df_pd.columns)
668
675
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
669
676
  if self.sample_weight_col:
@@ -649,7 +649,14 @@ class PassiveAggressiveRegressor(BaseTransformer):
649
649
  ) -> List[str]:
650
650
  # in case the inferred output column names dimension is different
651
651
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
652
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
652
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
653
+
654
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
655
+ # seen during the fit.
656
+ snowpark_column_names = dataset.select(self.input_cols).columns
657
+ sample_pd_df.columns = snowpark_column_names
658
+
659
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
653
660
  output_df_columns = list(output_df_pd.columns)
654
661
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
655
662
  if self.sample_weight_col:
@@ -662,7 +662,14 @@ class Perceptron(BaseTransformer):
662
662
  ) -> List[str]:
663
663
  # in case the inferred output column names dimension is different
664
664
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
665
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
665
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
666
+
667
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
668
+ # seen during the fit.
669
+ snowpark_column_names = dataset.select(self.input_cols).columns
670
+ sample_pd_df.columns = snowpark_column_names
671
+
672
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
666
673
  output_df_columns = list(output_df_pd.columns)
667
674
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
668
675
  if self.sample_weight_col:
@@ -611,7 +611,14 @@ class PoissonRegressor(BaseTransformer):
611
611
  ) -> List[str]:
612
612
  # in case the inferred output column names dimension is different
613
613
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
614
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
614
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
615
+
616
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
617
+ # seen during the fit.
618
+ snowpark_column_names = dataset.select(self.input_cols).columns
619
+ sample_pd_df.columns = snowpark_column_names
620
+
621
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
615
622
  output_df_columns = list(output_df_pd.columns)
616
623
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
617
624
  if self.sample_weight_col:
@@ -667,7 +667,14 @@ class RANSACRegressor(BaseTransformer):
667
667
  ) -> List[str]:
668
668
  # in case the inferred output column names dimension is different
669
669
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
670
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
670
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
671
+
672
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
673
+ # seen during the fit.
674
+ snowpark_column_names = dataset.select(self.input_cols).columns
675
+ sample_pd_df.columns = snowpark_column_names
676
+
677
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
671
678
  output_df_columns = list(output_df_pd.columns)
672
679
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
673
680
  if self.sample_weight_col:
@@ -659,7 +659,14 @@ class Ridge(BaseTransformer):
659
659
  ) -> List[str]:
660
660
  # in case the inferred output column names dimension is different
661
661
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
662
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
662
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
663
+
664
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
665
+ # seen during the fit.
666
+ snowpark_column_names = dataset.select(self.input_cols).columns
667
+ sample_pd_df.columns = snowpark_column_names
668
+
669
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
663
670
  output_df_columns = list(output_df_pd.columns)
664
671
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
665
672
  if self.sample_weight_col:
@@ -659,7 +659,14 @@ class RidgeClassifier(BaseTransformer):
659
659
  ) -> List[str]:
660
660
  # in case the inferred output column names dimension is different
661
661
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
662
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
662
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
663
+
664
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
665
+ # seen during the fit.
666
+ snowpark_column_names = dataset.select(self.input_cols).columns
667
+ sample_pd_df.columns = snowpark_column_names
668
+
669
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
663
670
  output_df_columns = list(output_df_pd.columns)
664
671
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
665
672
  if self.sample_weight_col:
@@ -610,7 +610,14 @@ class RidgeClassifierCV(BaseTransformer):
610
610
  ) -> List[str]:
611
611
  # in case the inferred output column names dimension is different
612
612
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
613
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
613
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
614
+
615
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
616
+ # seen during the fit.
617
+ snowpark_column_names = dataset.select(self.input_cols).columns
618
+ sample_pd_df.columns = snowpark_column_names
619
+
620
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
614
621
  output_df_columns = list(output_df_pd.columns)
615
622
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
616
623
  if self.sample_weight_col:
@@ -631,7 +631,14 @@ class RidgeCV(BaseTransformer):
631
631
  ) -> List[str]:
632
632
  # in case the inferred output column names dimension is different
633
633
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
634
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
634
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
635
+
636
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
637
+ # seen during the fit.
638
+ snowpark_column_names = dataset.select(self.input_cols).columns
639
+ sample_pd_df.columns = snowpark_column_names
640
+
641
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
635
642
  output_df_columns = list(output_df_pd.columns)
636
643
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
637
644
  if self.sample_weight_col:
@@ -750,7 +750,14 @@ class SGDClassifier(BaseTransformer):
750
750
  ) -> List[str]:
751
751
  # in case the inferred output column names dimension is different
752
752
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
753
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
753
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
754
+
755
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
756
+ # seen during the fit.
757
+ snowpark_column_names = dataset.select(self.input_cols).columns
758
+ sample_pd_df.columns = snowpark_column_names
759
+
760
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
754
761
  output_df_columns = list(output_df_pd.columns)
755
762
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
756
763
  if self.sample_weight_col:
@@ -650,7 +650,14 @@ class SGDOneClassSVM(BaseTransformer):
650
650
  ) -> List[str]:
651
651
  # in case the inferred output column names dimension is different
652
652
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
653
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
653
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
654
+
655
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
656
+ # seen during the fit.
657
+ snowpark_column_names = dataset.select(self.input_cols).columns
658
+ sample_pd_df.columns = snowpark_column_names
659
+
660
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
654
661
  output_df_columns = list(output_df_pd.columns)
655
662
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
656
663
  if self.sample_weight_col:
@@ -716,7 +716,14 @@ class SGDRegressor(BaseTransformer):
716
716
  ) -> List[str]:
717
717
  # in case the inferred output column names dimension is different
718
718
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
719
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
719
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
720
+
721
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
722
+ # seen during the fit.
723
+ snowpark_column_names = dataset.select(self.input_cols).columns
724
+ sample_pd_df.columns = snowpark_column_names
725
+
726
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
720
727
  output_df_columns = list(output_df_pd.columns)
721
728
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
722
729
  if self.sample_weight_col:
@@ -618,7 +618,14 @@ class TheilSenRegressor(BaseTransformer):
618
618
  ) -> List[str]:
619
619
  # in case the inferred output column names dimension is different
620
620
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
621
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
621
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
622
+
623
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
624
+ # seen during the fit.
625
+ snowpark_column_names = dataset.select(self.input_cols).columns
626
+ sample_pd_df.columns = snowpark_column_names
627
+
628
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
622
629
  output_df_columns = list(output_df_pd.columns)
623
630
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
624
631
  if self.sample_weight_col:
@@ -644,7 +644,14 @@ class TweedieRegressor(BaseTransformer):
644
644
  ) -> List[str]:
645
645
  # in case the inferred output column names dimension is different
646
646
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
647
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
647
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
648
+
649
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
650
+ # seen during the fit.
651
+ snowpark_column_names = dataset.select(self.input_cols).columns
652
+ sample_pd_df.columns = snowpark_column_names
653
+
654
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
648
655
  output_df_columns = list(output_df_pd.columns)
649
656
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
650
657
  if self.sample_weight_col:
@@ -642,7 +642,14 @@ class Isomap(BaseTransformer):
642
642
  ) -> List[str]:
643
643
  # in case the inferred output column names dimension is different
644
644
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
645
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
645
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
646
+
647
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
648
+ # seen during the fit.
649
+ snowpark_column_names = dataset.select(self.input_cols).columns
650
+ sample_pd_df.columns = snowpark_column_names
651
+
652
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
646
653
  output_df_columns = list(output_df_pd.columns)
647
654
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
648
655
  if self.sample_weight_col:
@@ -623,7 +623,14 @@ class MDS(BaseTransformer):
623
623
  ) -> List[str]:
624
624
  # in case the inferred output column names dimension is different
625
625
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
626
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
626
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
627
+
628
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
629
+ # seen during the fit.
630
+ snowpark_column_names = dataset.select(self.input_cols).columns
631
+ sample_pd_df.columns = snowpark_column_names
632
+
633
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
627
634
  output_df_columns = list(output_df_pd.columns)
628
635
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
629
636
  if self.sample_weight_col:
@@ -625,7 +625,14 @@ class SpectralEmbedding(BaseTransformer):
625
625
  ) -> List[str]:
626
626
  # in case the inferred output column names dimension is different
627
627
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
628
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
628
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
629
+
630
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
631
+ # seen during the fit.
632
+ snowpark_column_names = dataset.select(self.input_cols).columns
633
+ sample_pd_df.columns = snowpark_column_names
634
+
635
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
629
636
  output_df_columns = list(output_df_pd.columns)
630
637
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
631
638
  if self.sample_weight_col:
@@ -684,7 +684,14 @@ class TSNE(BaseTransformer):
684
684
  ) -> List[str]:
685
685
  # in case the inferred output column names dimension is different
686
686
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
687
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
687
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
688
+
689
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
690
+ # seen during the fit.
691
+ snowpark_column_names = dataset.select(self.input_cols).columns
692
+ sample_pd_df.columns = snowpark_column_names
693
+
694
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
688
695
  output_df_columns = list(output_df_pd.columns)
689
696
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
690
697
  if self.sample_weight_col:
@@ -689,7 +689,14 @@ class BayesianGaussianMixture(BaseTransformer):
689
689
  ) -> List[str]:
690
690
  # in case the inferred output column names dimension is different
691
691
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
692
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
692
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
693
+
694
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
695
+ # seen during the fit.
696
+ snowpark_column_names = dataset.select(self.input_cols).columns
697
+ sample_pd_df.columns = snowpark_column_names
698
+
699
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
693
700
  output_df_columns = list(output_df_pd.columns)
694
701
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
695
702
  if self.sample_weight_col:
@@ -662,7 +662,14 @@ class GaussianMixture(BaseTransformer):
662
662
  ) -> List[str]:
663
663
  # in case the inferred output column names dimension is different
664
664
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
665
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
665
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
666
+
667
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
668
+ # seen during the fit.
669
+ snowpark_column_names = dataset.select(self.input_cols).columns
670
+ sample_pd_df.columns = snowpark_column_names
671
+
672
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
666
673
  output_df_columns = list(output_df_pd.columns)
667
674
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
668
675
  if self.sample_weight_col:
@@ -572,7 +572,14 @@ class OneVsOneClassifier(BaseTransformer):
572
572
  ) -> List[str]:
573
573
  # in case the inferred output column names dimension is different
574
574
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
575
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
575
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
576
+
577
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
578
+ # seen during the fit.
579
+ snowpark_column_names = dataset.select(self.input_cols).columns
580
+ sample_pd_df.columns = snowpark_column_names
581
+
582
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
576
583
  output_df_columns = list(output_df_pd.columns)
577
584
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
578
585
  if self.sample_weight_col:
@@ -581,7 +581,14 @@ class OneVsRestClassifier(BaseTransformer):
581
581
  ) -> List[str]:
582
582
  # in case the inferred output column names dimension is different
583
583
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
584
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
584
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
585
+
586
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
587
+ # seen during the fit.
588
+ snowpark_column_names = dataset.select(self.input_cols).columns
589
+ sample_pd_df.columns = snowpark_column_names
590
+
591
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
585
592
  output_df_columns = list(output_df_pd.columns)
586
593
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
587
594
  if self.sample_weight_col:
@@ -584,7 +584,14 @@ class OutputCodeClassifier(BaseTransformer):
584
584
  ) -> List[str]:
585
585
  # in case the inferred output column names dimension is different
586
586
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
587
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
587
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
588
+
589
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
590
+ # seen during the fit.
591
+ snowpark_column_names = dataset.select(self.input_cols).columns
592
+ sample_pd_df.columns = snowpark_column_names
593
+
594
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
588
595
  output_df_columns = list(output_df_pd.columns)
589
596
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
590
597
  if self.sample_weight_col:
@@ -584,7 +584,14 @@ class BernoulliNB(BaseTransformer):
584
584
  ) -> List[str]:
585
585
  # in case the inferred output column names dimension is different
586
586
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
587
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
587
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
588
+
589
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
590
+ # seen during the fit.
591
+ snowpark_column_names = dataset.select(self.input_cols).columns
592
+ sample_pd_df.columns = snowpark_column_names
593
+
594
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
588
595
  output_df_columns = list(output_df_pd.columns)
589
596
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
590
597
  if self.sample_weight_col: