snowflake-ml-python 1.5.1__py3-none-any.whl → 1.5.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (207) hide show
  1. snowflake/cortex/_complete.py +26 -5
  2. snowflake/cortex/_sentiment.py +7 -4
  3. snowflake/cortex/_sse_client.py +81 -0
  4. snowflake/cortex/_util.py +105 -8
  5. snowflake/ml/_internal/lineage/lineage_utils.py +34 -25
  6. snowflake/ml/_internal/utils/temp_file_utils.py +5 -2
  7. snowflake/ml/dataset/dataset.py +15 -12
  8. snowflake/ml/dataset/dataset_factory.py +3 -4
  9. snowflake/ml/feature_store/access_manager.py +34 -30
  10. snowflake/ml/feature_store/feature_store.py +3 -3
  11. snowflake/ml/feature_store/feature_view.py +12 -11
  12. snowflake/ml/fileset/snowfs.py +2 -31
  13. snowflake/ml/model/_client/ops/model_ops.py +43 -0
  14. snowflake/ml/model/_client/sql/model_version.py +55 -3
  15. snowflake/ml/model/_model_composer/model_composer.py +7 -3
  16. snowflake/ml/model/_model_composer/model_method/infer_table_function.py_template +3 -1
  17. snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
  18. snowflake/ml/model/_packager/model_meta/model_meta.py +1 -3
  19. snowflake/ml/model/_packager/model_runtime/_snowml_inference_alternative_requirements.py +1 -1
  20. snowflake/ml/model/_packager/model_runtime/model_runtime.py +3 -27
  21. snowflake/ml/model/_signatures/builtins_handler.py +2 -1
  22. snowflake/ml/model/_signatures/core.py +13 -1
  23. snowflake/ml/model/_signatures/pandas_handler.py +2 -0
  24. snowflake/ml/model/_signatures/snowpark_handler.py +3 -3
  25. snowflake/ml/model/model_signature.py +2 -0
  26. snowflake/ml/model/type_hints.py +1 -0
  27. snowflake/ml/modeling/_internal/estimator_utils.py +58 -1
  28. snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +196 -242
  29. snowflake/ml/modeling/_internal/snowpark_implementations/distributed_search_udf_file.py +161 -0
  30. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +38 -18
  31. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +82 -134
  32. snowflake/ml/modeling/_internal/snowpark_implementations/xgboost_external_memory_trainer.py +21 -17
  33. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +9 -2
  34. snowflake/ml/modeling/cluster/affinity_propagation.py +9 -2
  35. snowflake/ml/modeling/cluster/agglomerative_clustering.py +9 -2
  36. snowflake/ml/modeling/cluster/birch.py +9 -2
  37. snowflake/ml/modeling/cluster/bisecting_k_means.py +9 -2
  38. snowflake/ml/modeling/cluster/dbscan.py +9 -2
  39. snowflake/ml/modeling/cluster/feature_agglomeration.py +9 -2
  40. snowflake/ml/modeling/cluster/k_means.py +9 -2
  41. snowflake/ml/modeling/cluster/mean_shift.py +9 -2
  42. snowflake/ml/modeling/cluster/mini_batch_k_means.py +9 -2
  43. snowflake/ml/modeling/cluster/optics.py +9 -2
  44. snowflake/ml/modeling/cluster/spectral_biclustering.py +9 -2
  45. snowflake/ml/modeling/cluster/spectral_clustering.py +9 -2
  46. snowflake/ml/modeling/cluster/spectral_coclustering.py +9 -2
  47. snowflake/ml/modeling/compose/column_transformer.py +9 -2
  48. snowflake/ml/modeling/compose/transformed_target_regressor.py +9 -2
  49. snowflake/ml/modeling/covariance/elliptic_envelope.py +9 -2
  50. snowflake/ml/modeling/covariance/empirical_covariance.py +9 -2
  51. snowflake/ml/modeling/covariance/graphical_lasso.py +9 -2
  52. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +9 -2
  53. snowflake/ml/modeling/covariance/ledoit_wolf.py +9 -2
  54. snowflake/ml/modeling/covariance/min_cov_det.py +9 -2
  55. snowflake/ml/modeling/covariance/oas.py +9 -2
  56. snowflake/ml/modeling/covariance/shrunk_covariance.py +9 -2
  57. snowflake/ml/modeling/decomposition/dictionary_learning.py +9 -2
  58. snowflake/ml/modeling/decomposition/factor_analysis.py +9 -2
  59. snowflake/ml/modeling/decomposition/fast_ica.py +9 -2
  60. snowflake/ml/modeling/decomposition/incremental_pca.py +9 -2
  61. snowflake/ml/modeling/decomposition/kernel_pca.py +9 -2
  62. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +9 -2
  63. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +9 -2
  64. snowflake/ml/modeling/decomposition/pca.py +9 -2
  65. snowflake/ml/modeling/decomposition/sparse_pca.py +9 -2
  66. snowflake/ml/modeling/decomposition/truncated_svd.py +9 -2
  67. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +9 -2
  68. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +9 -2
  69. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +9 -2
  70. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +9 -2
  71. snowflake/ml/modeling/ensemble/bagging_classifier.py +9 -2
  72. snowflake/ml/modeling/ensemble/bagging_regressor.py +9 -2
  73. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +9 -2
  74. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +9 -2
  75. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +9 -2
  76. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +9 -2
  77. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +9 -2
  78. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +9 -2
  79. snowflake/ml/modeling/ensemble/isolation_forest.py +9 -2
  80. snowflake/ml/modeling/ensemble/random_forest_classifier.py +9 -2
  81. snowflake/ml/modeling/ensemble/random_forest_regressor.py +9 -2
  82. snowflake/ml/modeling/ensemble/stacking_regressor.py +9 -2
  83. snowflake/ml/modeling/ensemble/voting_classifier.py +9 -2
  84. snowflake/ml/modeling/ensemble/voting_regressor.py +9 -2
  85. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +9 -2
  86. snowflake/ml/modeling/feature_selection/select_fdr.py +9 -2
  87. snowflake/ml/modeling/feature_selection/select_fpr.py +9 -2
  88. snowflake/ml/modeling/feature_selection/select_fwe.py +9 -2
  89. snowflake/ml/modeling/feature_selection/select_k_best.py +9 -2
  90. snowflake/ml/modeling/feature_selection/select_percentile.py +9 -2
  91. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +9 -2
  92. snowflake/ml/modeling/feature_selection/variance_threshold.py +9 -2
  93. snowflake/ml/modeling/framework/base.py +3 -8
  94. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +9 -2
  95. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +9 -2
  96. snowflake/ml/modeling/impute/iterative_imputer.py +9 -2
  97. snowflake/ml/modeling/impute/knn_imputer.py +9 -2
  98. snowflake/ml/modeling/impute/missing_indicator.py +9 -2
  99. snowflake/ml/modeling/impute/simple_imputer.py +28 -5
  100. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +9 -2
  101. snowflake/ml/modeling/kernel_approximation/nystroem.py +9 -2
  102. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +9 -2
  103. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +9 -2
  104. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +9 -2
  105. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +9 -2
  106. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +9 -2
  107. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +9 -2
  108. snowflake/ml/modeling/linear_model/ard_regression.py +9 -2
  109. snowflake/ml/modeling/linear_model/bayesian_ridge.py +9 -2
  110. snowflake/ml/modeling/linear_model/elastic_net.py +9 -2
  111. snowflake/ml/modeling/linear_model/elastic_net_cv.py +9 -2
  112. snowflake/ml/modeling/linear_model/gamma_regressor.py +9 -2
  113. snowflake/ml/modeling/linear_model/huber_regressor.py +9 -2
  114. snowflake/ml/modeling/linear_model/lars.py +9 -2
  115. snowflake/ml/modeling/linear_model/lars_cv.py +9 -2
  116. snowflake/ml/modeling/linear_model/lasso.py +9 -2
  117. snowflake/ml/modeling/linear_model/lasso_cv.py +9 -2
  118. snowflake/ml/modeling/linear_model/lasso_lars.py +9 -2
  119. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +9 -2
  120. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +9 -2
  121. snowflake/ml/modeling/linear_model/linear_regression.py +9 -2
  122. snowflake/ml/modeling/linear_model/logistic_regression.py +9 -2
  123. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +9 -2
  124. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +9 -2
  125. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +9 -2
  126. snowflake/ml/modeling/linear_model/multi_task_lasso.py +9 -2
  127. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +9 -2
  128. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +9 -2
  129. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +9 -2
  130. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +9 -2
  131. snowflake/ml/modeling/linear_model/perceptron.py +9 -2
  132. snowflake/ml/modeling/linear_model/poisson_regressor.py +9 -2
  133. snowflake/ml/modeling/linear_model/ransac_regressor.py +9 -2
  134. snowflake/ml/modeling/linear_model/ridge.py +9 -2
  135. snowflake/ml/modeling/linear_model/ridge_classifier.py +9 -2
  136. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +9 -2
  137. snowflake/ml/modeling/linear_model/ridge_cv.py +9 -2
  138. snowflake/ml/modeling/linear_model/sgd_classifier.py +9 -2
  139. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +9 -2
  140. snowflake/ml/modeling/linear_model/sgd_regressor.py +9 -2
  141. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +9 -2
  142. snowflake/ml/modeling/linear_model/tweedie_regressor.py +9 -2
  143. snowflake/ml/modeling/manifold/isomap.py +9 -2
  144. snowflake/ml/modeling/manifold/mds.py +9 -2
  145. snowflake/ml/modeling/manifold/spectral_embedding.py +9 -2
  146. snowflake/ml/modeling/manifold/tsne.py +9 -2
  147. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +9 -2
  148. snowflake/ml/modeling/mixture/gaussian_mixture.py +9 -2
  149. snowflake/ml/modeling/model_selection/grid_search_cv.py +1 -5
  150. snowflake/ml/modeling/model_selection/randomized_search_cv.py +1 -5
  151. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +9 -2
  152. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +9 -2
  153. snowflake/ml/modeling/multiclass/output_code_classifier.py +9 -2
  154. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +9 -2
  155. snowflake/ml/modeling/naive_bayes/categorical_nb.py +9 -2
  156. snowflake/ml/modeling/naive_bayes/complement_nb.py +9 -2
  157. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +9 -2
  158. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +9 -2
  159. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +9 -2
  160. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +9 -2
  161. snowflake/ml/modeling/neighbors/kernel_density.py +9 -2
  162. snowflake/ml/modeling/neighbors/local_outlier_factor.py +9 -2
  163. snowflake/ml/modeling/neighbors/nearest_centroid.py +9 -2
  164. snowflake/ml/modeling/neighbors/nearest_neighbors.py +9 -2
  165. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +9 -2
  166. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +9 -2
  167. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +9 -2
  168. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +9 -2
  169. snowflake/ml/modeling/neural_network/mlp_classifier.py +9 -2
  170. snowflake/ml/modeling/neural_network/mlp_regressor.py +9 -2
  171. snowflake/ml/modeling/parameters/enable_anonymous_sproc.py +5 -0
  172. snowflake/ml/modeling/pipeline/pipeline.py +5 -0
  173. snowflake/ml/modeling/preprocessing/binarizer.py +7 -3
  174. snowflake/ml/modeling/preprocessing/k_bins_discretizer.py +7 -2
  175. snowflake/ml/modeling/preprocessing/label_encoder.py +8 -7
  176. snowflake/ml/modeling/preprocessing/max_abs_scaler.py +7 -3
  177. snowflake/ml/modeling/preprocessing/min_max_scaler.py +7 -4
  178. snowflake/ml/modeling/preprocessing/normalizer.py +7 -3
  179. snowflake/ml/modeling/preprocessing/one_hot_encoder.py +10 -2
  180. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +8 -5
  181. snowflake/ml/modeling/preprocessing/polynomial_features.py +9 -2
  182. snowflake/ml/modeling/preprocessing/robust_scaler.py +7 -4
  183. snowflake/ml/modeling/preprocessing/standard_scaler.py +7 -3
  184. snowflake/ml/modeling/semi_supervised/label_propagation.py +9 -2
  185. snowflake/ml/modeling/semi_supervised/label_spreading.py +9 -2
  186. snowflake/ml/modeling/svm/linear_svc.py +9 -2
  187. snowflake/ml/modeling/svm/linear_svr.py +9 -2
  188. snowflake/ml/modeling/svm/nu_svc.py +9 -2
  189. snowflake/ml/modeling/svm/nu_svr.py +9 -2
  190. snowflake/ml/modeling/svm/svc.py +9 -2
  191. snowflake/ml/modeling/svm/svr.py +9 -2
  192. snowflake/ml/modeling/tree/decision_tree_classifier.py +9 -2
  193. snowflake/ml/modeling/tree/decision_tree_regressor.py +9 -2
  194. snowflake/ml/modeling/tree/extra_tree_classifier.py +9 -2
  195. snowflake/ml/modeling/tree/extra_tree_regressor.py +9 -2
  196. snowflake/ml/modeling/xgboost/xgb_classifier.py +9 -2
  197. snowflake/ml/modeling/xgboost/xgb_regressor.py +9 -2
  198. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +9 -2
  199. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +9 -2
  200. snowflake/ml/registry/_manager/model_manager.py +59 -1
  201. snowflake/ml/registry/registry.py +10 -1
  202. snowflake/ml/version.py +1 -1
  203. {snowflake_ml_python-1.5.1.dist-info → snowflake_ml_python-1.5.3.dist-info}/METADATA +32 -4
  204. {snowflake_ml_python-1.5.1.dist-info → snowflake_ml_python-1.5.3.dist-info}/RECORD +207 -204
  205. {snowflake_ml_python-1.5.1.dist-info → snowflake_ml_python-1.5.3.dist-info}/LICENSE.txt +0 -0
  206. {snowflake_ml_python-1.5.1.dist-info → snowflake_ml_python-1.5.3.dist-info}/WHEEL +0 -0
  207. {snowflake_ml_python-1.5.1.dist-info → snowflake_ml_python-1.5.3.dist-info}/top_level.txt +0 -0
@@ -250,7 +250,7 @@ class IncrementalPCA(BaseTransformer):
250
250
  inspect.currentframe(), IncrementalPCA.__class__.__name__
251
251
  ),
252
252
  api_calls=[Session.call],
253
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
253
+ custom_tags={"autogen": True} if self._autogenerated else None,
254
254
  )
255
255
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
256
256
  pd_df.columns = dataset.columns
@@ -585,7 +585,14 @@ class IncrementalPCA(BaseTransformer):
585
585
  ) -> List[str]:
586
586
  # in case the inferred output column names dimension is different
587
587
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
588
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
588
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
589
+
590
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
591
+ # seen during the fit.
592
+ snowpark_column_names = dataset.select(self.input_cols).columns
593
+ sample_pd_df.columns = snowpark_column_names
594
+
595
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
589
596
  output_df_columns = list(output_df_pd.columns)
590
597
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
591
598
  if self.sample_weight_col:
@@ -346,7 +346,7 @@ class KernelPCA(BaseTransformer):
346
346
  inspect.currentframe(), KernelPCA.__class__.__name__
347
347
  ),
348
348
  api_calls=[Session.call],
349
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
349
+ custom_tags={"autogen": True} if self._autogenerated else None,
350
350
  )
351
351
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
352
352
  pd_df.columns = dataset.columns
@@ -681,7 +681,14 @@ class KernelPCA(BaseTransformer):
681
681
  ) -> List[str]:
682
682
  # in case the inferred output column names dimension is different
683
683
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
684
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
684
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
685
+
686
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
687
+ # seen during the fit.
688
+ snowpark_column_names = dataset.select(self.input_cols).columns
689
+ sample_pd_df.columns = snowpark_column_names
690
+
691
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
685
692
  output_df_columns = list(output_df_pd.columns)
686
693
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
687
694
  if self.sample_weight_col:
@@ -368,7 +368,7 @@ class MiniBatchDictionaryLearning(BaseTransformer):
368
368
  inspect.currentframe(), MiniBatchDictionaryLearning.__class__.__name__
369
369
  ),
370
370
  api_calls=[Session.call],
371
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
371
+ custom_tags={"autogen": True} if self._autogenerated else None,
372
372
  )
373
373
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
374
374
  pd_df.columns = dataset.columns
@@ -703,7 +703,14 @@ class MiniBatchDictionaryLearning(BaseTransformer):
703
703
  ) -> List[str]:
704
704
  # in case the inferred output column names dimension is different
705
705
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
706
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
706
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
707
+
708
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
709
+ # seen during the fit.
710
+ snowpark_column_names = dataset.select(self.input_cols).columns
711
+ sample_pd_df.columns = snowpark_column_names
712
+
713
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
707
714
  output_df_columns = list(output_df_pd.columns)
708
715
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
709
716
  if self.sample_weight_col:
@@ -313,7 +313,7 @@ class MiniBatchSparsePCA(BaseTransformer):
313
313
  inspect.currentframe(), MiniBatchSparsePCA.__class__.__name__
314
314
  ),
315
315
  api_calls=[Session.call],
316
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
316
+ custom_tags={"autogen": True} if self._autogenerated else None,
317
317
  )
318
318
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
319
319
  pd_df.columns = dataset.columns
@@ -648,7 +648,14 @@ class MiniBatchSparsePCA(BaseTransformer):
648
648
  ) -> List[str]:
649
649
  # in case the inferred output column names dimension is different
650
650
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
651
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
651
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
652
+
653
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
654
+ # seen during the fit.
655
+ snowpark_column_names = dataset.select(self.input_cols).columns
656
+ sample_pd_df.columns = snowpark_column_names
657
+
658
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
652
659
  output_df_columns = list(output_df_pd.columns)
653
660
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
654
661
  if self.sample_weight_col:
@@ -315,7 +315,7 @@ class PCA(BaseTransformer):
315
315
  inspect.currentframe(), PCA.__class__.__name__
316
316
  ),
317
317
  api_calls=[Session.call],
318
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
318
+ custom_tags={"autogen": True} if self._autogenerated else None,
319
319
  )
320
320
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
321
321
  pd_df.columns = dataset.columns
@@ -650,7 +650,14 @@ class PCA(BaseTransformer):
650
650
  ) -> List[str]:
651
651
  # in case the inferred output column names dimension is different
652
652
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
653
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
653
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
654
+
655
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
656
+ # seen during the fit.
657
+ snowpark_column_names = dataset.select(self.input_cols).columns
658
+ sample_pd_df.columns = snowpark_column_names
659
+
660
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
654
661
  output_df_columns = list(output_df_pd.columns)
655
662
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
656
663
  if self.sample_weight_col:
@@ -288,7 +288,7 @@ class SparsePCA(BaseTransformer):
288
288
  inspect.currentframe(), SparsePCA.__class__.__name__
289
289
  ),
290
290
  api_calls=[Session.call],
291
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
291
+ custom_tags={"autogen": True} if self._autogenerated else None,
292
292
  )
293
293
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
294
294
  pd_df.columns = dataset.columns
@@ -623,7 +623,14 @@ class SparsePCA(BaseTransformer):
623
623
  ) -> List[str]:
624
624
  # in case the inferred output column names dimension is different
625
625
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
626
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
626
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
627
+
628
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
629
+ # seen during the fit.
630
+ snowpark_column_names = dataset.select(self.input_cols).columns
631
+ sample_pd_df.columns = snowpark_column_names
632
+
633
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
627
634
  output_df_columns = list(output_df_pd.columns)
628
635
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
629
636
  if self.sample_weight_col:
@@ -269,7 +269,7 @@ class TruncatedSVD(BaseTransformer):
269
269
  inspect.currentframe(), TruncatedSVD.__class__.__name__
270
270
  ),
271
271
  api_calls=[Session.call],
272
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
272
+ custom_tags={"autogen": True} if self._autogenerated else None,
273
273
  )
274
274
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
275
275
  pd_df.columns = dataset.columns
@@ -604,7 +604,14 @@ class TruncatedSVD(BaseTransformer):
604
604
  ) -> List[str]:
605
605
  # in case the inferred output column names dimension is different
606
606
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
607
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
607
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
608
+
609
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
610
+ # seen during the fit.
611
+ snowpark_column_names = dataset.select(self.input_cols).columns
612
+ sample_pd_df.columns = snowpark_column_names
613
+
614
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
608
615
  output_df_columns = list(output_df_pd.columns)
609
616
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
610
617
  if self.sample_weight_col:
@@ -286,7 +286,7 @@ class LinearDiscriminantAnalysis(BaseTransformer):
286
286
  inspect.currentframe(), LinearDiscriminantAnalysis.__class__.__name__
287
287
  ),
288
288
  api_calls=[Session.call],
289
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
289
+ custom_tags={"autogen": True} if self._autogenerated else None,
290
290
  )
291
291
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
292
292
  pd_df.columns = dataset.columns
@@ -623,7 +623,14 @@ class LinearDiscriminantAnalysis(BaseTransformer):
623
623
  ) -> List[str]:
624
624
  # in case the inferred output column names dimension is different
625
625
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
626
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
626
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
627
+
628
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
629
+ # seen during the fit.
630
+ snowpark_column_names = dataset.select(self.input_cols).columns
631
+ sample_pd_df.columns = snowpark_column_names
632
+
633
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
627
634
  output_df_columns = list(output_df_pd.columns)
628
635
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
629
636
  if self.sample_weight_col:
@@ -248,7 +248,7 @@ class QuadraticDiscriminantAnalysis(BaseTransformer):
248
248
  inspect.currentframe(), QuadraticDiscriminantAnalysis.__class__.__name__
249
249
  ),
250
250
  api_calls=[Session.call],
251
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
251
+ custom_tags={"autogen": True} if self._autogenerated else None,
252
252
  )
253
253
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
254
254
  pd_df.columns = dataset.columns
@@ -581,7 +581,14 @@ class QuadraticDiscriminantAnalysis(BaseTransformer):
581
581
  ) -> List[str]:
582
582
  # in case the inferred output column names dimension is different
583
583
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
584
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
584
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
585
+
586
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
587
+ # seen during the fit.
588
+ snowpark_column_names = dataset.select(self.input_cols).columns
589
+ sample_pd_df.columns = snowpark_column_names
590
+
591
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
585
592
  output_df_columns = list(output_df_pd.columns)
586
593
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
587
594
  if self.sample_weight_col:
@@ -273,7 +273,7 @@ class AdaBoostClassifier(BaseTransformer):
273
273
  inspect.currentframe(), AdaBoostClassifier.__class__.__name__
274
274
  ),
275
275
  api_calls=[Session.call],
276
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
276
+ custom_tags={"autogen": True} if self._autogenerated else None,
277
277
  )
278
278
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
279
279
  pd_df.columns = dataset.columns
@@ -606,7 +606,14 @@ class AdaBoostClassifier(BaseTransformer):
606
606
  ) -> List[str]:
607
607
  # in case the inferred output column names dimension is different
608
608
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
609
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
609
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
610
+
611
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
612
+ # seen during the fit.
613
+ snowpark_column_names = dataset.select(self.input_cols).columns
614
+ sample_pd_df.columns = snowpark_column_names
615
+
616
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
610
617
  output_df_columns = list(output_df_pd.columns)
611
618
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
612
619
  if self.sample_weight_col:
@@ -270,7 +270,7 @@ class AdaBoostRegressor(BaseTransformer):
270
270
  inspect.currentframe(), AdaBoostRegressor.__class__.__name__
271
271
  ),
272
272
  api_calls=[Session.call],
273
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
273
+ custom_tags={"autogen": True} if self._autogenerated else None,
274
274
  )
275
275
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
276
276
  pd_df.columns = dataset.columns
@@ -603,7 +603,14 @@ class AdaBoostRegressor(BaseTransformer):
603
603
  ) -> List[str]:
604
604
  # in case the inferred output column names dimension is different
605
605
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
606
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
606
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
607
+
608
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
609
+ # seen during the fit.
610
+ snowpark_column_names = dataset.select(self.input_cols).columns
611
+ sample_pd_df.columns = snowpark_column_names
612
+
613
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
607
614
  output_df_columns = list(output_df_pd.columns)
608
615
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
609
616
  if self.sample_weight_col:
@@ -305,7 +305,7 @@ class BaggingClassifier(BaseTransformer):
305
305
  inspect.currentframe(), BaggingClassifier.__class__.__name__
306
306
  ),
307
307
  api_calls=[Session.call],
308
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
308
+ custom_tags={"autogen": True} if self._autogenerated else None,
309
309
  )
310
310
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
311
311
  pd_df.columns = dataset.columns
@@ -638,7 +638,14 @@ class BaggingClassifier(BaseTransformer):
638
638
  ) -> List[str]:
639
639
  # in case the inferred output column names dimension is different
640
640
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
641
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
641
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
642
+
643
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
644
+ # seen during the fit.
645
+ snowpark_column_names = dataset.select(self.input_cols).columns
646
+ sample_pd_df.columns = snowpark_column_names
647
+
648
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
642
649
  output_df_columns = list(output_df_pd.columns)
643
650
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
644
651
  if self.sample_weight_col:
@@ -305,7 +305,7 @@ class BaggingRegressor(BaseTransformer):
305
305
  inspect.currentframe(), BaggingRegressor.__class__.__name__
306
306
  ),
307
307
  api_calls=[Session.call],
308
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
308
+ custom_tags={"autogen": True} if self._autogenerated else None,
309
309
  )
310
310
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
311
311
  pd_df.columns = dataset.columns
@@ -638,7 +638,14 @@ class BaggingRegressor(BaseTransformer):
638
638
  ) -> List[str]:
639
639
  # in case the inferred output column names dimension is different
640
640
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
641
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
641
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
642
+
643
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
644
+ # seen during the fit.
645
+ snowpark_column_names = dataset.select(self.input_cols).columns
646
+ sample_pd_df.columns = snowpark_column_names
647
+
648
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
642
649
  output_df_columns = list(output_df_pd.columns)
643
650
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
644
651
  if self.sample_weight_col:
@@ -408,7 +408,7 @@ class ExtraTreesClassifier(BaseTransformer):
408
408
  inspect.currentframe(), ExtraTreesClassifier.__class__.__name__
409
409
  ),
410
410
  api_calls=[Session.call],
411
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
411
+ custom_tags={"autogen": True} if self._autogenerated else None,
412
412
  )
413
413
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
414
414
  pd_df.columns = dataset.columns
@@ -741,7 +741,14 @@ class ExtraTreesClassifier(BaseTransformer):
741
741
  ) -> List[str]:
742
742
  # in case the inferred output column names dimension is different
743
743
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
744
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
744
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
745
+
746
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
747
+ # seen during the fit.
748
+ snowpark_column_names = dataset.select(self.input_cols).columns
749
+ sample_pd_df.columns = snowpark_column_names
750
+
751
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
745
752
  output_df_columns = list(output_df_pd.columns)
746
753
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
747
754
  if self.sample_weight_col:
@@ -387,7 +387,7 @@ class ExtraTreesRegressor(BaseTransformer):
387
387
  inspect.currentframe(), ExtraTreesRegressor.__class__.__name__
388
388
  ),
389
389
  api_calls=[Session.call],
390
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
390
+ custom_tags={"autogen": True} if self._autogenerated else None,
391
391
  )
392
392
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
393
393
  pd_df.columns = dataset.columns
@@ -720,7 +720,14 @@ class ExtraTreesRegressor(BaseTransformer):
720
720
  ) -> List[str]:
721
721
  # in case the inferred output column names dimension is different
722
722
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
723
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
723
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
724
+
725
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
726
+ # seen during the fit.
727
+ snowpark_column_names = dataset.select(self.input_cols).columns
728
+ sample_pd_df.columns = snowpark_column_names
729
+
730
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
724
731
  output_df_columns = list(output_df_pd.columns)
725
732
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
726
733
  if self.sample_weight_col:
@@ -420,7 +420,7 @@ class GradientBoostingClassifier(BaseTransformer):
420
420
  inspect.currentframe(), GradientBoostingClassifier.__class__.__name__
421
421
  ),
422
422
  api_calls=[Session.call],
423
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
423
+ custom_tags={"autogen": True} if self._autogenerated else None,
424
424
  )
425
425
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
426
426
  pd_df.columns = dataset.columns
@@ -753,7 +753,14 @@ class GradientBoostingClassifier(BaseTransformer):
753
753
  ) -> List[str]:
754
754
  # in case the inferred output column names dimension is different
755
755
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
756
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
756
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
757
+
758
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
759
+ # seen during the fit.
760
+ snowpark_column_names = dataset.select(self.input_cols).columns
761
+ sample_pd_df.columns = snowpark_column_names
762
+
763
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
757
764
  output_df_columns = list(output_df_pd.columns)
758
765
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
759
766
  if self.sample_weight_col:
@@ -429,7 +429,7 @@ class GradientBoostingRegressor(BaseTransformer):
429
429
  inspect.currentframe(), GradientBoostingRegressor.__class__.__name__
430
430
  ),
431
431
  api_calls=[Session.call],
432
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
432
+ custom_tags={"autogen": True} if self._autogenerated else None,
433
433
  )
434
434
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
435
435
  pd_df.columns = dataset.columns
@@ -762,7 +762,14 @@ class GradientBoostingRegressor(BaseTransformer):
762
762
  ) -> List[str]:
763
763
  # in case the inferred output column names dimension is different
764
764
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
765
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
765
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
766
+
767
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
768
+ # seen during the fit.
769
+ snowpark_column_names = dataset.select(self.input_cols).columns
770
+ sample_pd_df.columns = snowpark_column_names
771
+
772
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
766
773
  output_df_columns = list(output_df_pd.columns)
767
774
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
768
775
  if self.sample_weight_col:
@@ -401,7 +401,7 @@ class HistGradientBoostingClassifier(BaseTransformer):
401
401
  inspect.currentframe(), HistGradientBoostingClassifier.__class__.__name__
402
402
  ),
403
403
  api_calls=[Session.call],
404
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
404
+ custom_tags={"autogen": True} if self._autogenerated else None,
405
405
  )
406
406
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
407
407
  pd_df.columns = dataset.columns
@@ -734,7 +734,14 @@ class HistGradientBoostingClassifier(BaseTransformer):
734
734
  ) -> List[str]:
735
735
  # in case the inferred output column names dimension is different
736
736
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
737
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
737
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
738
+
739
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
740
+ # seen during the fit.
741
+ snowpark_column_names = dataset.select(self.input_cols).columns
742
+ sample_pd_df.columns = snowpark_column_names
743
+
744
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
738
745
  output_df_columns = list(output_df_pd.columns)
739
746
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
740
747
  if self.sample_weight_col:
@@ -392,7 +392,7 @@ class HistGradientBoostingRegressor(BaseTransformer):
392
392
  inspect.currentframe(), HistGradientBoostingRegressor.__class__.__name__
393
393
  ),
394
394
  api_calls=[Session.call],
395
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
395
+ custom_tags={"autogen": True} if self._autogenerated else None,
396
396
  )
397
397
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
398
398
  pd_df.columns = dataset.columns
@@ -725,7 +725,14 @@ class HistGradientBoostingRegressor(BaseTransformer):
725
725
  ) -> List[str]:
726
726
  # in case the inferred output column names dimension is different
727
727
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
728
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
728
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
729
+
730
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
731
+ # seen during the fit.
732
+ snowpark_column_names = dataset.select(self.input_cols).columns
733
+ sample_pd_df.columns = snowpark_column_names
734
+
735
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
729
736
  output_df_columns = list(output_df_pd.columns)
730
737
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
731
738
  if self.sample_weight_col:
@@ -292,7 +292,7 @@ class IsolationForest(BaseTransformer):
292
292
  inspect.currentframe(), IsolationForest.__class__.__name__
293
293
  ),
294
294
  api_calls=[Session.call],
295
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
295
+ custom_tags={"autogen": True} if self._autogenerated else None,
296
296
  )
297
297
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
298
298
  pd_df.columns = dataset.columns
@@ -627,7 +627,14 @@ class IsolationForest(BaseTransformer):
627
627
  ) -> List[str]:
628
628
  # in case the inferred output column names dimension is different
629
629
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
630
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
630
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
631
+
632
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
633
+ # seen during the fit.
634
+ snowpark_column_names = dataset.select(self.input_cols).columns
635
+ sample_pd_df.columns = snowpark_column_names
636
+
637
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
631
638
  output_df_columns = list(output_df_pd.columns)
632
639
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
633
640
  if self.sample_weight_col:
@@ -404,7 +404,7 @@ class RandomForestClassifier(BaseTransformer):
404
404
  inspect.currentframe(), RandomForestClassifier.__class__.__name__
405
405
  ),
406
406
  api_calls=[Session.call],
407
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
407
+ custom_tags={"autogen": True} if self._autogenerated else None,
408
408
  )
409
409
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
410
410
  pd_df.columns = dataset.columns
@@ -737,7 +737,14 @@ class RandomForestClassifier(BaseTransformer):
737
737
  ) -> List[str]:
738
738
  # in case the inferred output column names dimension is different
739
739
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
740
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
740
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
741
+
742
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
743
+ # seen during the fit.
744
+ snowpark_column_names = dataset.select(self.input_cols).columns
745
+ sample_pd_df.columns = snowpark_column_names
746
+
747
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
741
748
  output_df_columns = list(output_df_pd.columns)
742
749
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
743
750
  if self.sample_weight_col:
@@ -383,7 +383,7 @@ class RandomForestRegressor(BaseTransformer):
383
383
  inspect.currentframe(), RandomForestRegressor.__class__.__name__
384
384
  ),
385
385
  api_calls=[Session.call],
386
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
386
+ custom_tags={"autogen": True} if self._autogenerated else None,
387
387
  )
388
388
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
389
389
  pd_df.columns = dataset.columns
@@ -716,7 +716,14 @@ class RandomForestRegressor(BaseTransformer):
716
716
  ) -> List[str]:
717
717
  # in case the inferred output column names dimension is different
718
718
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
719
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
719
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
720
+
721
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
722
+ # seen during the fit.
723
+ snowpark_column_names = dataset.select(self.input_cols).columns
724
+ sample_pd_df.columns = snowpark_column_names
725
+
726
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
720
727
  output_df_columns = list(output_df_pd.columns)
721
728
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
722
729
  if self.sample_weight_col:
@@ -284,7 +284,7 @@ class StackingRegressor(BaseTransformer):
284
284
  inspect.currentframe(), StackingRegressor.__class__.__name__
285
285
  ),
286
286
  api_calls=[Session.call],
287
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
287
+ custom_tags={"autogen": True} if self._autogenerated else None,
288
288
  )
289
289
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
290
290
  pd_df.columns = dataset.columns
@@ -621,7 +621,14 @@ class StackingRegressor(BaseTransformer):
621
621
  ) -> List[str]:
622
622
  # in case the inferred output column names dimension is different
623
623
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
624
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
624
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
625
+
626
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
627
+ # seen during the fit.
628
+ snowpark_column_names = dataset.select(self.input_cols).columns
629
+ sample_pd_df.columns = snowpark_column_names
630
+
631
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
625
632
  output_df_columns = list(output_df_pd.columns)
626
633
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
627
634
  if self.sample_weight_col:
@@ -266,7 +266,7 @@ class VotingClassifier(BaseTransformer):
266
266
  inspect.currentframe(), VotingClassifier.__class__.__name__
267
267
  ),
268
268
  api_calls=[Session.call],
269
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
269
+ custom_tags={"autogen": True} if self._autogenerated else None,
270
270
  )
271
271
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
272
272
  pd_df.columns = dataset.columns
@@ -603,7 +603,14 @@ class VotingClassifier(BaseTransformer):
603
603
  ) -> List[str]:
604
604
  # in case the inferred output column names dimension is different
605
605
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
606
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
606
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
607
+
608
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
609
+ # seen during the fit.
610
+ snowpark_column_names = dataset.select(self.input_cols).columns
611
+ sample_pd_df.columns = snowpark_column_names
612
+
613
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
607
614
  output_df_columns = list(output_df_pd.columns)
608
615
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
609
616
  if self.sample_weight_col: