snowflake-ml-python 1.5.0__py3-none-any.whl → 1.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (197) hide show
  1. snowflake/cortex/_sentiment.py +7 -4
  2. snowflake/ml/_internal/env_utils.py +6 -0
  3. snowflake/ml/_internal/lineage/lineage_utils.py +95 -0
  4. snowflake/ml/_internal/telemetry.py +1 -0
  5. snowflake/ml/_internal/utils/identifier.py +1 -1
  6. snowflake/ml/_internal/utils/sql_identifier.py +14 -1
  7. snowflake/ml/_internal/utils/temp_file_utils.py +5 -2
  8. snowflake/ml/dataset/__init__.py +2 -1
  9. snowflake/ml/dataset/dataset.py +4 -3
  10. snowflake/ml/dataset/dataset_reader.py +5 -8
  11. snowflake/ml/feature_store/__init__.py +6 -0
  12. snowflake/ml/feature_store/access_manager.py +283 -0
  13. snowflake/ml/feature_store/feature_store.py +160 -100
  14. snowflake/ml/feature_store/feature_view.py +30 -19
  15. snowflake/ml/fileset/embedded_stage_fs.py +15 -12
  16. snowflake/ml/fileset/snowfs.py +2 -30
  17. snowflake/ml/fileset/stage_fs.py +25 -7
  18. snowflake/ml/model/_client/model/model_impl.py +46 -39
  19. snowflake/ml/model/_client/model/model_version_impl.py +24 -2
  20. snowflake/ml/model/_client/ops/metadata_ops.py +27 -4
  21. snowflake/ml/model/_client/ops/model_ops.py +174 -16
  22. snowflake/ml/model/_client/sql/_base.py +34 -0
  23. snowflake/ml/model/_client/sql/model.py +32 -39
  24. snowflake/ml/model/_client/sql/model_version.py +111 -42
  25. snowflake/ml/model/_client/sql/stage.py +6 -32
  26. snowflake/ml/model/_client/sql/tag.py +32 -56
  27. snowflake/ml/model/_model_composer/model_composer.py +8 -4
  28. snowflake/ml/model/_packager/model_handlers/mlflow.py +2 -1
  29. snowflake/ml/model/_packager/model_meta/model_meta.py +1 -3
  30. snowflake/ml/model/_packager/model_runtime/model_runtime.py +3 -27
  31. snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +90 -142
  32. snowflake/ml/modeling/_internal/snowpark_implementations/distributed_search_udf_file.py +159 -0
  33. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +81 -3
  34. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +8 -1
  35. snowflake/ml/modeling/cluster/affinity_propagation.py +8 -1
  36. snowflake/ml/modeling/cluster/agglomerative_clustering.py +8 -1
  37. snowflake/ml/modeling/cluster/birch.py +8 -1
  38. snowflake/ml/modeling/cluster/bisecting_k_means.py +8 -1
  39. snowflake/ml/modeling/cluster/dbscan.py +8 -1
  40. snowflake/ml/modeling/cluster/feature_agglomeration.py +8 -1
  41. snowflake/ml/modeling/cluster/k_means.py +8 -1
  42. snowflake/ml/modeling/cluster/mean_shift.py +8 -1
  43. snowflake/ml/modeling/cluster/mini_batch_k_means.py +8 -1
  44. snowflake/ml/modeling/cluster/optics.py +8 -1
  45. snowflake/ml/modeling/cluster/spectral_biclustering.py +8 -1
  46. snowflake/ml/modeling/cluster/spectral_clustering.py +8 -1
  47. snowflake/ml/modeling/cluster/spectral_coclustering.py +8 -1
  48. snowflake/ml/modeling/compose/column_transformer.py +8 -1
  49. snowflake/ml/modeling/compose/transformed_target_regressor.py +8 -1
  50. snowflake/ml/modeling/covariance/elliptic_envelope.py +8 -1
  51. snowflake/ml/modeling/covariance/empirical_covariance.py +8 -1
  52. snowflake/ml/modeling/covariance/graphical_lasso.py +8 -1
  53. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +8 -1
  54. snowflake/ml/modeling/covariance/ledoit_wolf.py +8 -1
  55. snowflake/ml/modeling/covariance/min_cov_det.py +8 -1
  56. snowflake/ml/modeling/covariance/oas.py +8 -1
  57. snowflake/ml/modeling/covariance/shrunk_covariance.py +8 -1
  58. snowflake/ml/modeling/decomposition/dictionary_learning.py +8 -1
  59. snowflake/ml/modeling/decomposition/factor_analysis.py +8 -1
  60. snowflake/ml/modeling/decomposition/fast_ica.py +8 -1
  61. snowflake/ml/modeling/decomposition/incremental_pca.py +8 -1
  62. snowflake/ml/modeling/decomposition/kernel_pca.py +8 -1
  63. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +8 -1
  64. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +8 -1
  65. snowflake/ml/modeling/decomposition/pca.py +8 -1
  66. snowflake/ml/modeling/decomposition/sparse_pca.py +8 -1
  67. snowflake/ml/modeling/decomposition/truncated_svd.py +8 -1
  68. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +8 -1
  69. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +8 -1
  70. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +8 -1
  71. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +8 -1
  72. snowflake/ml/modeling/ensemble/bagging_classifier.py +8 -1
  73. snowflake/ml/modeling/ensemble/bagging_regressor.py +8 -1
  74. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +8 -1
  75. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +8 -1
  76. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +8 -1
  77. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +8 -1
  78. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +8 -1
  79. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +8 -1
  80. snowflake/ml/modeling/ensemble/isolation_forest.py +8 -1
  81. snowflake/ml/modeling/ensemble/random_forest_classifier.py +8 -1
  82. snowflake/ml/modeling/ensemble/random_forest_regressor.py +8 -1
  83. snowflake/ml/modeling/ensemble/stacking_regressor.py +8 -1
  84. snowflake/ml/modeling/ensemble/voting_classifier.py +8 -1
  85. snowflake/ml/modeling/ensemble/voting_regressor.py +8 -1
  86. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +8 -1
  87. snowflake/ml/modeling/feature_selection/select_fdr.py +8 -1
  88. snowflake/ml/modeling/feature_selection/select_fpr.py +8 -1
  89. snowflake/ml/modeling/feature_selection/select_fwe.py +8 -1
  90. snowflake/ml/modeling/feature_selection/select_k_best.py +8 -1
  91. snowflake/ml/modeling/feature_selection/select_percentile.py +8 -1
  92. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +8 -1
  93. snowflake/ml/modeling/feature_selection/variance_threshold.py +8 -1
  94. snowflake/ml/modeling/framework/base.py +4 -3
  95. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +8 -1
  96. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +8 -1
  97. snowflake/ml/modeling/impute/iterative_imputer.py +8 -1
  98. snowflake/ml/modeling/impute/knn_imputer.py +8 -1
  99. snowflake/ml/modeling/impute/missing_indicator.py +8 -1
  100. snowflake/ml/modeling/impute/simple_imputer.py +21 -2
  101. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +8 -1
  102. snowflake/ml/modeling/kernel_approximation/nystroem.py +8 -1
  103. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +8 -1
  104. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +8 -1
  105. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +8 -1
  106. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +8 -1
  107. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +8 -1
  108. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +8 -1
  109. snowflake/ml/modeling/linear_model/ard_regression.py +8 -1
  110. snowflake/ml/modeling/linear_model/bayesian_ridge.py +8 -1
  111. snowflake/ml/modeling/linear_model/elastic_net.py +8 -1
  112. snowflake/ml/modeling/linear_model/elastic_net_cv.py +8 -1
  113. snowflake/ml/modeling/linear_model/gamma_regressor.py +8 -1
  114. snowflake/ml/modeling/linear_model/huber_regressor.py +8 -1
  115. snowflake/ml/modeling/linear_model/lars.py +8 -1
  116. snowflake/ml/modeling/linear_model/lars_cv.py +8 -1
  117. snowflake/ml/modeling/linear_model/lasso.py +8 -1
  118. snowflake/ml/modeling/linear_model/lasso_cv.py +8 -1
  119. snowflake/ml/modeling/linear_model/lasso_lars.py +8 -1
  120. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +8 -1
  121. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +8 -1
  122. snowflake/ml/modeling/linear_model/linear_regression.py +8 -1
  123. snowflake/ml/modeling/linear_model/logistic_regression.py +8 -1
  124. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +8 -1
  125. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +8 -1
  126. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +8 -1
  127. snowflake/ml/modeling/linear_model/multi_task_lasso.py +8 -1
  128. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +8 -1
  129. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +8 -1
  130. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +8 -1
  131. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +8 -1
  132. snowflake/ml/modeling/linear_model/perceptron.py +8 -1
  133. snowflake/ml/modeling/linear_model/poisson_regressor.py +8 -1
  134. snowflake/ml/modeling/linear_model/ransac_regressor.py +8 -1
  135. snowflake/ml/modeling/linear_model/ridge.py +8 -1
  136. snowflake/ml/modeling/linear_model/ridge_classifier.py +8 -1
  137. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +8 -1
  138. snowflake/ml/modeling/linear_model/ridge_cv.py +8 -1
  139. snowflake/ml/modeling/linear_model/sgd_classifier.py +8 -1
  140. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +8 -1
  141. snowflake/ml/modeling/linear_model/sgd_regressor.py +8 -1
  142. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +8 -1
  143. snowflake/ml/modeling/linear_model/tweedie_regressor.py +8 -1
  144. snowflake/ml/modeling/manifold/isomap.py +8 -1
  145. snowflake/ml/modeling/manifold/mds.py +8 -1
  146. snowflake/ml/modeling/manifold/spectral_embedding.py +8 -1
  147. snowflake/ml/modeling/manifold/tsne.py +8 -1
  148. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +8 -1
  149. snowflake/ml/modeling/mixture/gaussian_mixture.py +8 -1
  150. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +8 -1
  151. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +8 -1
  152. snowflake/ml/modeling/multiclass/output_code_classifier.py +8 -1
  153. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +8 -1
  154. snowflake/ml/modeling/naive_bayes/categorical_nb.py +8 -1
  155. snowflake/ml/modeling/naive_bayes/complement_nb.py +8 -1
  156. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +8 -1
  157. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +8 -1
  158. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +8 -1
  159. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +8 -1
  160. snowflake/ml/modeling/neighbors/kernel_density.py +8 -1
  161. snowflake/ml/modeling/neighbors/local_outlier_factor.py +8 -1
  162. snowflake/ml/modeling/neighbors/nearest_centroid.py +8 -1
  163. snowflake/ml/modeling/neighbors/nearest_neighbors.py +8 -1
  164. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +8 -1
  165. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +8 -1
  166. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +8 -1
  167. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +8 -1
  168. snowflake/ml/modeling/neural_network/mlp_classifier.py +8 -1
  169. snowflake/ml/modeling/neural_network/mlp_regressor.py +8 -1
  170. snowflake/ml/modeling/parameters/enable_anonymous_sproc.py +5 -0
  171. snowflake/ml/modeling/pipeline/pipeline.py +27 -7
  172. snowflake/ml/modeling/preprocessing/polynomial_features.py +8 -1
  173. snowflake/ml/modeling/semi_supervised/label_propagation.py +8 -1
  174. snowflake/ml/modeling/semi_supervised/label_spreading.py +8 -1
  175. snowflake/ml/modeling/svm/linear_svc.py +8 -1
  176. snowflake/ml/modeling/svm/linear_svr.py +8 -1
  177. snowflake/ml/modeling/svm/nu_svc.py +8 -1
  178. snowflake/ml/modeling/svm/nu_svr.py +8 -1
  179. snowflake/ml/modeling/svm/svc.py +8 -1
  180. snowflake/ml/modeling/svm/svr.py +8 -1
  181. snowflake/ml/modeling/tree/decision_tree_classifier.py +8 -1
  182. snowflake/ml/modeling/tree/decision_tree_regressor.py +8 -1
  183. snowflake/ml/modeling/tree/extra_tree_classifier.py +8 -1
  184. snowflake/ml/modeling/tree/extra_tree_regressor.py +8 -1
  185. snowflake/ml/modeling/xgboost/xgb_classifier.py +8 -1
  186. snowflake/ml/modeling/xgboost/xgb_regressor.py +8 -1
  187. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +8 -1
  188. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +8 -1
  189. snowflake/ml/registry/_manager/model_manager.py +95 -8
  190. snowflake/ml/registry/registry.py +10 -1
  191. snowflake/ml/version.py +1 -1
  192. {snowflake_ml_python-1.5.0.dist-info → snowflake_ml_python-1.5.2.dist-info}/METADATA +66 -10
  193. {snowflake_ml_python-1.5.0.dist-info → snowflake_ml_python-1.5.2.dist-info}/RECORD +196 -192
  194. snowflake/ml/_internal/lineage/dataset_dataframe.py +0 -44
  195. {snowflake_ml_python-1.5.0.dist-info → snowflake_ml_python-1.5.2.dist-info}/LICENSE.txt +0 -0
  196. {snowflake_ml_python-1.5.0.dist-info → snowflake_ml_python-1.5.2.dist-info}/WHEEL +0 -0
  197. {snowflake_ml_python-1.5.0.dist-info → snowflake_ml_python-1.5.2.dist-info}/top_level.txt +0 -0
@@ -606,7 +606,14 @@ class KernelRidge(BaseTransformer):
606
606
  ) -> List[str]:
607
607
  # in case the inferred output column names dimension is different
608
608
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
609
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
609
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
610
+
611
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
612
+ # seen during the fit.
613
+ snowpark_column_names = dataset.select(self.input_cols).columns
614
+ sample_pd_df.columns = snowpark_column_names
615
+
616
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
610
617
  output_df_columns = list(output_df_pd.columns)
611
618
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
612
619
  if self.sample_weight_col:
@@ -595,7 +595,14 @@ class LGBMClassifier(BaseTransformer):
595
595
  ) -> List[str]:
596
596
  # in case the inferred output column names dimension is different
597
597
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
598
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
598
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
599
+
600
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
601
+ # seen during the fit.
602
+ snowpark_column_names = dataset.select(self.input_cols).columns
603
+ sample_pd_df.columns = snowpark_column_names
604
+
605
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
599
606
  output_df_columns = list(output_df_pd.columns)
600
607
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
601
608
  if self.sample_weight_col:
@@ -595,7 +595,14 @@ class LGBMRegressor(BaseTransformer):
595
595
  ) -> List[str]:
596
596
  # in case the inferred output column names dimension is different
597
597
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
598
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
598
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
599
+
600
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
601
+ # seen during the fit.
602
+ snowpark_column_names = dataset.select(self.input_cols).columns
603
+ sample_pd_df.columns = snowpark_column_names
604
+
605
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
599
606
  output_df_columns = list(output_df_pd.columns)
600
607
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
601
608
  if self.sample_weight_col:
@@ -620,7 +620,14 @@ class ARDRegression(BaseTransformer):
620
620
  ) -> List[str]:
621
621
  # in case the inferred output column names dimension is different
622
622
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
623
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
623
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
624
+
625
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
626
+ # seen during the fit.
627
+ snowpark_column_names = dataset.select(self.input_cols).columns
628
+ sample_pd_df.columns = snowpark_column_names
629
+
630
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
624
631
  output_df_columns = list(output_df_pd.columns)
625
632
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
626
633
  if self.sample_weight_col:
@@ -631,7 +631,14 @@ class BayesianRidge(BaseTransformer):
631
631
  ) -> List[str]:
632
632
  # in case the inferred output column names dimension is different
633
633
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
634
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
634
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
635
+
636
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
637
+ # seen during the fit.
638
+ snowpark_column_names = dataset.select(self.input_cols).columns
639
+ sample_pd_df.columns = snowpark_column_names
640
+
641
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
635
642
  output_df_columns = list(output_df_pd.columns)
636
643
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
637
644
  if self.sample_weight_col:
@@ -630,7 +630,14 @@ class ElasticNet(BaseTransformer):
630
630
  ) -> List[str]:
631
631
  # in case the inferred output column names dimension is different
632
632
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
633
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
633
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
634
+
635
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
636
+ # seen during the fit.
637
+ snowpark_column_names = dataset.select(self.input_cols).columns
638
+ sample_pd_df.columns = snowpark_column_names
639
+
640
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
634
641
  output_df_columns = list(output_df_pd.columns)
635
642
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
636
643
  if self.sample_weight_col:
@@ -666,7 +666,14 @@ class ElasticNetCV(BaseTransformer):
666
666
  ) -> List[str]:
667
667
  # in case the inferred output column names dimension is different
668
668
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
669
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
669
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
670
+
671
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
672
+ # seen during the fit.
673
+ snowpark_column_names = dataset.select(self.input_cols).columns
674
+ sample_pd_df.columns = snowpark_column_names
675
+
676
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
670
677
  output_df_columns = list(output_df_pd.columns)
671
678
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
672
679
  if self.sample_weight_col:
@@ -611,7 +611,14 @@ class GammaRegressor(BaseTransformer):
611
611
  ) -> List[str]:
612
612
  # in case the inferred output column names dimension is different
613
613
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
614
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
614
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
615
+
616
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
617
+ # seen during the fit.
618
+ snowpark_column_names = dataset.select(self.input_cols).columns
619
+ sample_pd_df.columns = snowpark_column_names
620
+
621
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
615
622
  output_df_columns = list(output_df_pd.columns)
616
623
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
617
624
  if self.sample_weight_col:
@@ -594,7 +594,14 @@ class HuberRegressor(BaseTransformer):
594
594
  ) -> List[str]:
595
595
  # in case the inferred output column names dimension is different
596
596
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
597
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
597
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
598
+
599
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
600
+ # seen during the fit.
601
+ snowpark_column_names = dataset.select(self.input_cols).columns
602
+ sample_pd_df.columns = snowpark_column_names
603
+
604
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
598
605
  output_df_columns = list(output_df_pd.columns)
599
606
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
600
607
  if self.sample_weight_col:
@@ -623,7 +623,14 @@ class Lars(BaseTransformer):
623
623
  ) -> List[str]:
624
624
  # in case the inferred output column names dimension is different
625
625
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
626
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
626
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
627
+
628
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
629
+ # seen during the fit.
630
+ snowpark_column_names = dataset.select(self.input_cols).columns
631
+ sample_pd_df.columns = snowpark_column_names
632
+
633
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
627
634
  output_df_columns = list(output_df_pd.columns)
628
635
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
629
636
  if self.sample_weight_col:
@@ -631,7 +631,14 @@ class LarsCV(BaseTransformer):
631
631
  ) -> List[str]:
632
632
  # in case the inferred output column names dimension is different
633
633
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
634
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
634
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
635
+
636
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
637
+ # seen during the fit.
638
+ snowpark_column_names = dataset.select(self.input_cols).columns
639
+ sample_pd_df.columns = snowpark_column_names
640
+
641
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
635
642
  output_df_columns = list(output_df_pd.columns)
636
643
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
637
644
  if self.sample_weight_col:
@@ -624,7 +624,14 @@ class Lasso(BaseTransformer):
624
624
  ) -> List[str]:
625
625
  # in case the inferred output column names dimension is different
626
626
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
627
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
627
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
628
+
629
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
630
+ # seen during the fit.
631
+ snowpark_column_names = dataset.select(self.input_cols).columns
632
+ sample_pd_df.columns = snowpark_column_names
633
+
634
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
628
635
  output_df_columns = list(output_df_pd.columns)
629
636
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
630
637
  if self.sample_weight_col:
@@ -652,7 +652,14 @@ class LassoCV(BaseTransformer):
652
652
  ) -> List[str]:
653
653
  # in case the inferred output column names dimension is different
654
654
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
655
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
655
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
656
+
657
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
658
+ # seen during the fit.
659
+ snowpark_column_names = dataset.select(self.input_cols).columns
660
+ sample_pd_df.columns = snowpark_column_names
661
+
662
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
656
663
  output_df_columns = list(output_df_pd.columns)
657
664
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
658
665
  if self.sample_weight_col:
@@ -644,7 +644,14 @@ class LassoLars(BaseTransformer):
644
644
  ) -> List[str]:
645
645
  # in case the inferred output column names dimension is different
646
646
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
647
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
647
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
648
+
649
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
650
+ # seen during the fit.
651
+ snowpark_column_names = dataset.select(self.input_cols).columns
652
+ sample_pd_df.columns = snowpark_column_names
653
+
654
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
648
655
  output_df_columns = list(output_df_pd.columns)
649
656
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
650
657
  if self.sample_weight_col:
@@ -645,7 +645,14 @@ class LassoLarsCV(BaseTransformer):
645
645
  ) -> List[str]:
646
646
  # in case the inferred output column names dimension is different
647
647
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
648
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
648
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
649
+
650
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
651
+ # seen during the fit.
652
+ snowpark_column_names = dataset.select(self.input_cols).columns
653
+ sample_pd_df.columns = snowpark_column_names
654
+
655
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
649
656
  output_df_columns = list(output_df_pd.columns)
650
657
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
651
658
  if self.sample_weight_col:
@@ -628,7 +628,14 @@ class LassoLarsIC(BaseTransformer):
628
628
  ) -> List[str]:
629
629
  # in case the inferred output column names dimension is different
630
630
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
631
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
631
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
632
+
633
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
634
+ # seen during the fit.
635
+ snowpark_column_names = dataset.select(self.input_cols).columns
636
+ sample_pd_df.columns = snowpark_column_names
637
+
638
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
632
639
  output_df_columns = list(output_df_pd.columns)
633
640
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
634
641
  if self.sample_weight_col:
@@ -581,7 +581,14 @@ class LinearRegression(BaseTransformer):
581
581
  ) -> List[str]:
582
582
  # in case the inferred output column names dimension is different
583
583
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
584
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
584
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
585
+
586
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
587
+ # seen during the fit.
588
+ snowpark_column_names = dataset.select(self.input_cols).columns
589
+ sample_pd_df.columns = snowpark_column_names
590
+
591
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
585
592
  output_df_columns = list(output_df_pd.columns)
586
593
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
587
594
  if self.sample_weight_col:
@@ -695,7 +695,14 @@ class LogisticRegression(BaseTransformer):
695
695
  ) -> List[str]:
696
696
  # in case the inferred output column names dimension is different
697
697
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
698
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
698
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
699
+
700
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
701
+ # seen during the fit.
702
+ snowpark_column_names = dataset.select(self.input_cols).columns
703
+ sample_pd_df.columns = snowpark_column_names
704
+
705
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
699
706
  output_df_columns = list(output_df_pd.columns)
700
707
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
701
708
  if self.sample_weight_col:
@@ -716,7 +716,14 @@ class LogisticRegressionCV(BaseTransformer):
716
716
  ) -> List[str]:
717
717
  # in case the inferred output column names dimension is different
718
718
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
719
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
719
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
720
+
721
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
722
+ # seen during the fit.
723
+ snowpark_column_names = dataset.select(self.input_cols).columns
724
+ sample_pd_df.columns = snowpark_column_names
725
+
726
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
720
727
  output_df_columns = list(output_df_pd.columns)
721
728
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
722
729
  if self.sample_weight_col:
@@ -614,7 +614,14 @@ class MultiTaskElasticNet(BaseTransformer):
614
614
  ) -> List[str]:
615
615
  # in case the inferred output column names dimension is different
616
616
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
617
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
617
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
618
+
619
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
620
+ # seen during the fit.
621
+ snowpark_column_names = dataset.select(self.input_cols).columns
622
+ sample_pd_df.columns = snowpark_column_names
623
+
624
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
618
625
  output_df_columns = list(output_df_pd.columns)
619
626
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
620
627
  if self.sample_weight_col:
@@ -655,7 +655,14 @@ class MultiTaskElasticNetCV(BaseTransformer):
655
655
  ) -> List[str]:
656
656
  # in case the inferred output column names dimension is different
657
657
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
658
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
658
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
659
+
660
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
661
+ # seen during the fit.
662
+ snowpark_column_names = dataset.select(self.input_cols).columns
663
+ sample_pd_df.columns = snowpark_column_names
664
+
665
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
659
666
  output_df_columns = list(output_df_pd.columns)
660
667
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
661
668
  if self.sample_weight_col:
@@ -606,7 +606,14 @@ class MultiTaskLasso(BaseTransformer):
606
606
  ) -> List[str]:
607
607
  # in case the inferred output column names dimension is different
608
608
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
609
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
609
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
610
+
611
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
612
+ # seen during the fit.
613
+ snowpark_column_names = dataset.select(self.input_cols).columns
614
+ sample_pd_df.columns = snowpark_column_names
615
+
616
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
610
617
  output_df_columns = list(output_df_pd.columns)
611
618
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
612
619
  if self.sample_weight_col:
@@ -641,7 +641,14 @@ class MultiTaskLassoCV(BaseTransformer):
641
641
  ) -> List[str]:
642
642
  # in case the inferred output column names dimension is different
643
643
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
644
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
644
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
645
+
646
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
647
+ # seen during the fit.
648
+ snowpark_column_names = dataset.select(self.input_cols).columns
649
+ sample_pd_df.columns = snowpark_column_names
650
+
651
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
645
652
  output_df_columns = list(output_df_pd.columns)
646
653
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
647
654
  if self.sample_weight_col:
@@ -589,7 +589,14 @@ class OrthogonalMatchingPursuit(BaseTransformer):
589
589
  ) -> List[str]:
590
590
  # in case the inferred output column names dimension is different
591
591
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
592
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
592
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
593
+
594
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
595
+ # seen during the fit.
596
+ snowpark_column_names = dataset.select(self.input_cols).columns
597
+ sample_pd_df.columns = snowpark_column_names
598
+
599
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
593
600
  output_df_columns = list(output_df_pd.columns)
594
601
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
595
602
  if self.sample_weight_col:
@@ -663,7 +663,14 @@ class PassiveAggressiveClassifier(BaseTransformer):
663
663
  ) -> List[str]:
664
664
  # in case the inferred output column names dimension is different
665
665
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
666
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
666
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
667
+
668
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
669
+ # seen during the fit.
670
+ snowpark_column_names = dataset.select(self.input_cols).columns
671
+ sample_pd_df.columns = snowpark_column_names
672
+
673
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
667
674
  output_df_columns = list(output_df_pd.columns)
668
675
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
669
676
  if self.sample_weight_col:
@@ -649,7 +649,14 @@ class PassiveAggressiveRegressor(BaseTransformer):
649
649
  ) -> List[str]:
650
650
  # in case the inferred output column names dimension is different
651
651
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
652
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
652
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
653
+
654
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
655
+ # seen during the fit.
656
+ snowpark_column_names = dataset.select(self.input_cols).columns
657
+ sample_pd_df.columns = snowpark_column_names
658
+
659
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
653
660
  output_df_columns = list(output_df_pd.columns)
654
661
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
655
662
  if self.sample_weight_col:
@@ -662,7 +662,14 @@ class Perceptron(BaseTransformer):
662
662
  ) -> List[str]:
663
663
  # in case the inferred output column names dimension is different
664
664
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
665
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
665
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
666
+
667
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
668
+ # seen during the fit.
669
+ snowpark_column_names = dataset.select(self.input_cols).columns
670
+ sample_pd_df.columns = snowpark_column_names
671
+
672
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
666
673
  output_df_columns = list(output_df_pd.columns)
667
674
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
668
675
  if self.sample_weight_col:
@@ -611,7 +611,14 @@ class PoissonRegressor(BaseTransformer):
611
611
  ) -> List[str]:
612
612
  # in case the inferred output column names dimension is different
613
613
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
614
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
614
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
615
+
616
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
617
+ # seen during the fit.
618
+ snowpark_column_names = dataset.select(self.input_cols).columns
619
+ sample_pd_df.columns = snowpark_column_names
620
+
621
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
615
622
  output_df_columns = list(output_df_pd.columns)
616
623
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
617
624
  if self.sample_weight_col:
@@ -667,7 +667,14 @@ class RANSACRegressor(BaseTransformer):
667
667
  ) -> List[str]:
668
668
  # in case the inferred output column names dimension is different
669
669
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
670
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
670
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
671
+
672
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
673
+ # seen during the fit.
674
+ snowpark_column_names = dataset.select(self.input_cols).columns
675
+ sample_pd_df.columns = snowpark_column_names
676
+
677
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
671
678
  output_df_columns = list(output_df_pd.columns)
672
679
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
673
680
  if self.sample_weight_col:
@@ -659,7 +659,14 @@ class Ridge(BaseTransformer):
659
659
  ) -> List[str]:
660
660
  # in case the inferred output column names dimension is different
661
661
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
662
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
662
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
663
+
664
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
665
+ # seen during the fit.
666
+ snowpark_column_names = dataset.select(self.input_cols).columns
667
+ sample_pd_df.columns = snowpark_column_names
668
+
669
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
663
670
  output_df_columns = list(output_df_pd.columns)
664
671
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
665
672
  if self.sample_weight_col:
@@ -659,7 +659,14 @@ class RidgeClassifier(BaseTransformer):
659
659
  ) -> List[str]:
660
660
  # in case the inferred output column names dimension is different
661
661
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
662
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
662
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
663
+
664
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
665
+ # seen during the fit.
666
+ snowpark_column_names = dataset.select(self.input_cols).columns
667
+ sample_pd_df.columns = snowpark_column_names
668
+
669
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
663
670
  output_df_columns = list(output_df_pd.columns)
664
671
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
665
672
  if self.sample_weight_col:
@@ -610,7 +610,14 @@ class RidgeClassifierCV(BaseTransformer):
610
610
  ) -> List[str]:
611
611
  # in case the inferred output column names dimension is different
612
612
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
613
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
613
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
614
+
615
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
616
+ # seen during the fit.
617
+ snowpark_column_names = dataset.select(self.input_cols).columns
618
+ sample_pd_df.columns = snowpark_column_names
619
+
620
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
614
621
  output_df_columns = list(output_df_pd.columns)
615
622
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
616
623
  if self.sample_weight_col:
@@ -631,7 +631,14 @@ class RidgeCV(BaseTransformer):
631
631
  ) -> List[str]:
632
632
  # in case the inferred output column names dimension is different
633
633
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
634
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
634
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
635
+
636
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
637
+ # seen during the fit.
638
+ snowpark_column_names = dataset.select(self.input_cols).columns
639
+ sample_pd_df.columns = snowpark_column_names
640
+
641
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
635
642
  output_df_columns = list(output_df_pd.columns)
636
643
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
637
644
  if self.sample_weight_col:
@@ -750,7 +750,14 @@ class SGDClassifier(BaseTransformer):
750
750
  ) -> List[str]:
751
751
  # in case the inferred output column names dimension is different
752
752
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
753
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
753
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
754
+
755
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
756
+ # seen during the fit.
757
+ snowpark_column_names = dataset.select(self.input_cols).columns
758
+ sample_pd_df.columns = snowpark_column_names
759
+
760
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
754
761
  output_df_columns = list(output_df_pd.columns)
755
762
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
756
763
  if self.sample_weight_col:
@@ -650,7 +650,14 @@ class SGDOneClassSVM(BaseTransformer):
650
650
  ) -> List[str]:
651
651
  # in case the inferred output column names dimension is different
652
652
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
653
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
653
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
654
+
655
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
656
+ # seen during the fit.
657
+ snowpark_column_names = dataset.select(self.input_cols).columns
658
+ sample_pd_df.columns = snowpark_column_names
659
+
660
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
654
661
  output_df_columns = list(output_df_pd.columns)
655
662
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
656
663
  if self.sample_weight_col:
@@ -716,7 +716,14 @@ class SGDRegressor(BaseTransformer):
716
716
  ) -> List[str]:
717
717
  # in case the inferred output column names dimension is different
718
718
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
719
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
719
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
720
+
721
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
722
+ # seen during the fit.
723
+ snowpark_column_names = dataset.select(self.input_cols).columns
724
+ sample_pd_df.columns = snowpark_column_names
725
+
726
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
720
727
  output_df_columns = list(output_df_pd.columns)
721
728
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
722
729
  if self.sample_weight_col: