snowflake-ml-python 1.5.1__py3-none-any.whl → 1.5.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (207) hide show
  1. snowflake/cortex/_complete.py +26 -5
  2. snowflake/cortex/_sentiment.py +7 -4
  3. snowflake/cortex/_sse_client.py +81 -0
  4. snowflake/cortex/_util.py +105 -8
  5. snowflake/ml/_internal/lineage/lineage_utils.py +34 -25
  6. snowflake/ml/_internal/utils/temp_file_utils.py +5 -2
  7. snowflake/ml/dataset/dataset.py +15 -12
  8. snowflake/ml/dataset/dataset_factory.py +3 -4
  9. snowflake/ml/feature_store/access_manager.py +34 -30
  10. snowflake/ml/feature_store/feature_store.py +3 -3
  11. snowflake/ml/feature_store/feature_view.py +12 -11
  12. snowflake/ml/fileset/snowfs.py +2 -31
  13. snowflake/ml/model/_client/ops/model_ops.py +43 -0
  14. snowflake/ml/model/_client/sql/model_version.py +55 -3
  15. snowflake/ml/model/_model_composer/model_composer.py +7 -3
  16. snowflake/ml/model/_model_composer/model_method/infer_table_function.py_template +3 -1
  17. snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
  18. snowflake/ml/model/_packager/model_meta/model_meta.py +1 -3
  19. snowflake/ml/model/_packager/model_runtime/_snowml_inference_alternative_requirements.py +1 -1
  20. snowflake/ml/model/_packager/model_runtime/model_runtime.py +3 -27
  21. snowflake/ml/model/_signatures/builtins_handler.py +2 -1
  22. snowflake/ml/model/_signatures/core.py +13 -1
  23. snowflake/ml/model/_signatures/pandas_handler.py +2 -0
  24. snowflake/ml/model/_signatures/snowpark_handler.py +3 -3
  25. snowflake/ml/model/model_signature.py +2 -0
  26. snowflake/ml/model/type_hints.py +1 -0
  27. snowflake/ml/modeling/_internal/estimator_utils.py +58 -1
  28. snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +196 -242
  29. snowflake/ml/modeling/_internal/snowpark_implementations/distributed_search_udf_file.py +161 -0
  30. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +38 -18
  31. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +82 -134
  32. snowflake/ml/modeling/_internal/snowpark_implementations/xgboost_external_memory_trainer.py +21 -17
  33. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +9 -2
  34. snowflake/ml/modeling/cluster/affinity_propagation.py +9 -2
  35. snowflake/ml/modeling/cluster/agglomerative_clustering.py +9 -2
  36. snowflake/ml/modeling/cluster/birch.py +9 -2
  37. snowflake/ml/modeling/cluster/bisecting_k_means.py +9 -2
  38. snowflake/ml/modeling/cluster/dbscan.py +9 -2
  39. snowflake/ml/modeling/cluster/feature_agglomeration.py +9 -2
  40. snowflake/ml/modeling/cluster/k_means.py +9 -2
  41. snowflake/ml/modeling/cluster/mean_shift.py +9 -2
  42. snowflake/ml/modeling/cluster/mini_batch_k_means.py +9 -2
  43. snowflake/ml/modeling/cluster/optics.py +9 -2
  44. snowflake/ml/modeling/cluster/spectral_biclustering.py +9 -2
  45. snowflake/ml/modeling/cluster/spectral_clustering.py +9 -2
  46. snowflake/ml/modeling/cluster/spectral_coclustering.py +9 -2
  47. snowflake/ml/modeling/compose/column_transformer.py +9 -2
  48. snowflake/ml/modeling/compose/transformed_target_regressor.py +9 -2
  49. snowflake/ml/modeling/covariance/elliptic_envelope.py +9 -2
  50. snowflake/ml/modeling/covariance/empirical_covariance.py +9 -2
  51. snowflake/ml/modeling/covariance/graphical_lasso.py +9 -2
  52. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +9 -2
  53. snowflake/ml/modeling/covariance/ledoit_wolf.py +9 -2
  54. snowflake/ml/modeling/covariance/min_cov_det.py +9 -2
  55. snowflake/ml/modeling/covariance/oas.py +9 -2
  56. snowflake/ml/modeling/covariance/shrunk_covariance.py +9 -2
  57. snowflake/ml/modeling/decomposition/dictionary_learning.py +9 -2
  58. snowflake/ml/modeling/decomposition/factor_analysis.py +9 -2
  59. snowflake/ml/modeling/decomposition/fast_ica.py +9 -2
  60. snowflake/ml/modeling/decomposition/incremental_pca.py +9 -2
  61. snowflake/ml/modeling/decomposition/kernel_pca.py +9 -2
  62. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +9 -2
  63. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +9 -2
  64. snowflake/ml/modeling/decomposition/pca.py +9 -2
  65. snowflake/ml/modeling/decomposition/sparse_pca.py +9 -2
  66. snowflake/ml/modeling/decomposition/truncated_svd.py +9 -2
  67. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +9 -2
  68. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +9 -2
  69. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +9 -2
  70. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +9 -2
  71. snowflake/ml/modeling/ensemble/bagging_classifier.py +9 -2
  72. snowflake/ml/modeling/ensemble/bagging_regressor.py +9 -2
  73. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +9 -2
  74. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +9 -2
  75. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +9 -2
  76. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +9 -2
  77. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +9 -2
  78. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +9 -2
  79. snowflake/ml/modeling/ensemble/isolation_forest.py +9 -2
  80. snowflake/ml/modeling/ensemble/random_forest_classifier.py +9 -2
  81. snowflake/ml/modeling/ensemble/random_forest_regressor.py +9 -2
  82. snowflake/ml/modeling/ensemble/stacking_regressor.py +9 -2
  83. snowflake/ml/modeling/ensemble/voting_classifier.py +9 -2
  84. snowflake/ml/modeling/ensemble/voting_regressor.py +9 -2
  85. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +9 -2
  86. snowflake/ml/modeling/feature_selection/select_fdr.py +9 -2
  87. snowflake/ml/modeling/feature_selection/select_fpr.py +9 -2
  88. snowflake/ml/modeling/feature_selection/select_fwe.py +9 -2
  89. snowflake/ml/modeling/feature_selection/select_k_best.py +9 -2
  90. snowflake/ml/modeling/feature_selection/select_percentile.py +9 -2
  91. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +9 -2
  92. snowflake/ml/modeling/feature_selection/variance_threshold.py +9 -2
  93. snowflake/ml/modeling/framework/base.py +3 -8
  94. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +9 -2
  95. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +9 -2
  96. snowflake/ml/modeling/impute/iterative_imputer.py +9 -2
  97. snowflake/ml/modeling/impute/knn_imputer.py +9 -2
  98. snowflake/ml/modeling/impute/missing_indicator.py +9 -2
  99. snowflake/ml/modeling/impute/simple_imputer.py +28 -5
  100. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +9 -2
  101. snowflake/ml/modeling/kernel_approximation/nystroem.py +9 -2
  102. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +9 -2
  103. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +9 -2
  104. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +9 -2
  105. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +9 -2
  106. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +9 -2
  107. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +9 -2
  108. snowflake/ml/modeling/linear_model/ard_regression.py +9 -2
  109. snowflake/ml/modeling/linear_model/bayesian_ridge.py +9 -2
  110. snowflake/ml/modeling/linear_model/elastic_net.py +9 -2
  111. snowflake/ml/modeling/linear_model/elastic_net_cv.py +9 -2
  112. snowflake/ml/modeling/linear_model/gamma_regressor.py +9 -2
  113. snowflake/ml/modeling/linear_model/huber_regressor.py +9 -2
  114. snowflake/ml/modeling/linear_model/lars.py +9 -2
  115. snowflake/ml/modeling/linear_model/lars_cv.py +9 -2
  116. snowflake/ml/modeling/linear_model/lasso.py +9 -2
  117. snowflake/ml/modeling/linear_model/lasso_cv.py +9 -2
  118. snowflake/ml/modeling/linear_model/lasso_lars.py +9 -2
  119. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +9 -2
  120. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +9 -2
  121. snowflake/ml/modeling/linear_model/linear_regression.py +9 -2
  122. snowflake/ml/modeling/linear_model/logistic_regression.py +9 -2
  123. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +9 -2
  124. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +9 -2
  125. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +9 -2
  126. snowflake/ml/modeling/linear_model/multi_task_lasso.py +9 -2
  127. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +9 -2
  128. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +9 -2
  129. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +9 -2
  130. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +9 -2
  131. snowflake/ml/modeling/linear_model/perceptron.py +9 -2
  132. snowflake/ml/modeling/linear_model/poisson_regressor.py +9 -2
  133. snowflake/ml/modeling/linear_model/ransac_regressor.py +9 -2
  134. snowflake/ml/modeling/linear_model/ridge.py +9 -2
  135. snowflake/ml/modeling/linear_model/ridge_classifier.py +9 -2
  136. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +9 -2
  137. snowflake/ml/modeling/linear_model/ridge_cv.py +9 -2
  138. snowflake/ml/modeling/linear_model/sgd_classifier.py +9 -2
  139. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +9 -2
  140. snowflake/ml/modeling/linear_model/sgd_regressor.py +9 -2
  141. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +9 -2
  142. snowflake/ml/modeling/linear_model/tweedie_regressor.py +9 -2
  143. snowflake/ml/modeling/manifold/isomap.py +9 -2
  144. snowflake/ml/modeling/manifold/mds.py +9 -2
  145. snowflake/ml/modeling/manifold/spectral_embedding.py +9 -2
  146. snowflake/ml/modeling/manifold/tsne.py +9 -2
  147. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +9 -2
  148. snowflake/ml/modeling/mixture/gaussian_mixture.py +9 -2
  149. snowflake/ml/modeling/model_selection/grid_search_cv.py +1 -5
  150. snowflake/ml/modeling/model_selection/randomized_search_cv.py +1 -5
  151. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +9 -2
  152. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +9 -2
  153. snowflake/ml/modeling/multiclass/output_code_classifier.py +9 -2
  154. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +9 -2
  155. snowflake/ml/modeling/naive_bayes/categorical_nb.py +9 -2
  156. snowflake/ml/modeling/naive_bayes/complement_nb.py +9 -2
  157. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +9 -2
  158. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +9 -2
  159. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +9 -2
  160. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +9 -2
  161. snowflake/ml/modeling/neighbors/kernel_density.py +9 -2
  162. snowflake/ml/modeling/neighbors/local_outlier_factor.py +9 -2
  163. snowflake/ml/modeling/neighbors/nearest_centroid.py +9 -2
  164. snowflake/ml/modeling/neighbors/nearest_neighbors.py +9 -2
  165. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +9 -2
  166. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +9 -2
  167. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +9 -2
  168. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +9 -2
  169. snowflake/ml/modeling/neural_network/mlp_classifier.py +9 -2
  170. snowflake/ml/modeling/neural_network/mlp_regressor.py +9 -2
  171. snowflake/ml/modeling/parameters/enable_anonymous_sproc.py +5 -0
  172. snowflake/ml/modeling/pipeline/pipeline.py +5 -0
  173. snowflake/ml/modeling/preprocessing/binarizer.py +7 -3
  174. snowflake/ml/modeling/preprocessing/k_bins_discretizer.py +7 -2
  175. snowflake/ml/modeling/preprocessing/label_encoder.py +8 -7
  176. snowflake/ml/modeling/preprocessing/max_abs_scaler.py +7 -3
  177. snowflake/ml/modeling/preprocessing/min_max_scaler.py +7 -4
  178. snowflake/ml/modeling/preprocessing/normalizer.py +7 -3
  179. snowflake/ml/modeling/preprocessing/one_hot_encoder.py +10 -2
  180. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +8 -5
  181. snowflake/ml/modeling/preprocessing/polynomial_features.py +9 -2
  182. snowflake/ml/modeling/preprocessing/robust_scaler.py +7 -4
  183. snowflake/ml/modeling/preprocessing/standard_scaler.py +7 -3
  184. snowflake/ml/modeling/semi_supervised/label_propagation.py +9 -2
  185. snowflake/ml/modeling/semi_supervised/label_spreading.py +9 -2
  186. snowflake/ml/modeling/svm/linear_svc.py +9 -2
  187. snowflake/ml/modeling/svm/linear_svr.py +9 -2
  188. snowflake/ml/modeling/svm/nu_svc.py +9 -2
  189. snowflake/ml/modeling/svm/nu_svr.py +9 -2
  190. snowflake/ml/modeling/svm/svc.py +9 -2
  191. snowflake/ml/modeling/svm/svr.py +9 -2
  192. snowflake/ml/modeling/tree/decision_tree_classifier.py +9 -2
  193. snowflake/ml/modeling/tree/decision_tree_regressor.py +9 -2
  194. snowflake/ml/modeling/tree/extra_tree_classifier.py +9 -2
  195. snowflake/ml/modeling/tree/extra_tree_regressor.py +9 -2
  196. snowflake/ml/modeling/xgboost/xgb_classifier.py +9 -2
  197. snowflake/ml/modeling/xgboost/xgb_regressor.py +9 -2
  198. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +9 -2
  199. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +9 -2
  200. snowflake/ml/registry/_manager/model_manager.py +59 -1
  201. snowflake/ml/registry/registry.py +10 -1
  202. snowflake/ml/version.py +1 -1
  203. {snowflake_ml_python-1.5.1.dist-info → snowflake_ml_python-1.5.3.dist-info}/METADATA +32 -4
  204. {snowflake_ml_python-1.5.1.dist-info → snowflake_ml_python-1.5.3.dist-info}/RECORD +207 -204
  205. {snowflake_ml_python-1.5.1.dist-info → snowflake_ml_python-1.5.3.dist-info}/LICENSE.txt +0 -0
  206. {snowflake_ml_python-1.5.1.dist-info → snowflake_ml_python-1.5.3.dist-info}/WHEEL +0 -0
  207. {snowflake_ml_python-1.5.1.dist-info → snowflake_ml_python-1.5.3.dist-info}/top_level.txt +0 -0
@@ -316,7 +316,7 @@ class PassiveAggressiveRegressor(BaseTransformer):
316
316
  inspect.currentframe(), PassiveAggressiveRegressor.__class__.__name__
317
317
  ),
318
318
  api_calls=[Session.call],
319
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
319
+ custom_tags={"autogen": True} if self._autogenerated else None,
320
320
  )
321
321
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
322
322
  pd_df.columns = dataset.columns
@@ -649,7 +649,14 @@ class PassiveAggressiveRegressor(BaseTransformer):
649
649
  ) -> List[str]:
650
650
  # in case the inferred output column names dimension is different
651
651
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
652
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
652
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
653
+
654
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
655
+ # seen during the fit.
656
+ snowpark_column_names = dataset.select(self.input_cols).columns
657
+ sample_pd_df.columns = snowpark_column_names
658
+
659
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
653
660
  output_df_columns = list(output_df_pd.columns)
654
661
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
655
662
  if self.sample_weight_col:
@@ -329,7 +329,7 @@ class Perceptron(BaseTransformer):
329
329
  inspect.currentframe(), Perceptron.__class__.__name__
330
330
  ),
331
331
  api_calls=[Session.call],
332
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
332
+ custom_tags={"autogen": True} if self._autogenerated else None,
333
333
  )
334
334
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
335
335
  pd_df.columns = dataset.columns
@@ -662,7 +662,14 @@ class Perceptron(BaseTransformer):
662
662
  ) -> List[str]:
663
663
  # in case the inferred output column names dimension is different
664
664
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
665
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
665
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
666
+
667
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
668
+ # seen during the fit.
669
+ snowpark_column_names = dataset.select(self.input_cols).columns
670
+ sample_pd_df.columns = snowpark_column_names
671
+
672
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
666
673
  output_df_columns = list(output_df_pd.columns)
667
674
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
668
675
  if self.sample_weight_col:
@@ -278,7 +278,7 @@ class PoissonRegressor(BaseTransformer):
278
278
  inspect.currentframe(), PoissonRegressor.__class__.__name__
279
279
  ),
280
280
  api_calls=[Session.call],
281
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
281
+ custom_tags={"autogen": True} if self._autogenerated else None,
282
282
  )
283
283
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
284
284
  pd_df.columns = dataset.columns
@@ -611,7 +611,14 @@ class PoissonRegressor(BaseTransformer):
611
611
  ) -> List[str]:
612
612
  # in case the inferred output column names dimension is different
613
613
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
614
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
614
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
615
+
616
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
617
+ # seen during the fit.
618
+ snowpark_column_names = dataset.select(self.input_cols).columns
619
+ sample_pd_df.columns = snowpark_column_names
620
+
621
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
615
622
  output_df_columns = list(output_df_pd.columns)
616
623
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
617
624
  if self.sample_weight_col:
@@ -334,7 +334,7 @@ class RANSACRegressor(BaseTransformer):
334
334
  inspect.currentframe(), RANSACRegressor.__class__.__name__
335
335
  ),
336
336
  api_calls=[Session.call],
337
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
337
+ custom_tags={"autogen": True} if self._autogenerated else None,
338
338
  )
339
339
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
340
340
  pd_df.columns = dataset.columns
@@ -667,7 +667,14 @@ class RANSACRegressor(BaseTransformer):
667
667
  ) -> List[str]:
668
668
  # in case the inferred output column names dimension is different
669
669
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
670
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
670
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
671
+
672
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
673
+ # seen during the fit.
674
+ snowpark_column_names = dataset.select(self.input_cols).columns
675
+ sample_pd_df.columns = snowpark_column_names
676
+
677
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
671
678
  output_df_columns = list(output_df_pd.columns)
672
679
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
673
680
  if self.sample_weight_col:
@@ -326,7 +326,7 @@ class Ridge(BaseTransformer):
326
326
  inspect.currentframe(), Ridge.__class__.__name__
327
327
  ),
328
328
  api_calls=[Session.call],
329
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
329
+ custom_tags={"autogen": True} if self._autogenerated else None,
330
330
  )
331
331
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
332
332
  pd_df.columns = dataset.columns
@@ -659,7 +659,14 @@ class Ridge(BaseTransformer):
659
659
  ) -> List[str]:
660
660
  # in case the inferred output column names dimension is different
661
661
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
662
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
662
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
663
+
664
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
665
+ # seen during the fit.
666
+ snowpark_column_names = dataset.select(self.input_cols).columns
667
+ sample_pd_df.columns = snowpark_column_names
668
+
669
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
663
670
  output_df_columns = list(output_df_pd.columns)
664
671
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
665
672
  if self.sample_weight_col:
@@ -326,7 +326,7 @@ class RidgeClassifier(BaseTransformer):
326
326
  inspect.currentframe(), RidgeClassifier.__class__.__name__
327
327
  ),
328
328
  api_calls=[Session.call],
329
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
329
+ custom_tags={"autogen": True} if self._autogenerated else None,
330
330
  )
331
331
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
332
332
  pd_df.columns = dataset.columns
@@ -659,7 +659,14 @@ class RidgeClassifier(BaseTransformer):
659
659
  ) -> List[str]:
660
660
  # in case the inferred output column names dimension is different
661
661
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
662
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
662
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
663
+
664
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
665
+ # seen during the fit.
666
+ snowpark_column_names = dataset.select(self.input_cols).columns
667
+ sample_pd_df.columns = snowpark_column_names
668
+
669
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
663
670
  output_df_columns = list(output_df_pd.columns)
664
671
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
665
672
  if self.sample_weight_col:
@@ -277,7 +277,7 @@ class RidgeClassifierCV(BaseTransformer):
277
277
  inspect.currentframe(), RidgeClassifierCV.__class__.__name__
278
278
  ),
279
279
  api_calls=[Session.call],
280
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
280
+ custom_tags={"autogen": True} if self._autogenerated else None,
281
281
  )
282
282
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
283
283
  pd_df.columns = dataset.columns
@@ -610,7 +610,14 @@ class RidgeClassifierCV(BaseTransformer):
610
610
  ) -> List[str]:
611
611
  # in case the inferred output column names dimension is different
612
612
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
613
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
613
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
614
+
615
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
616
+ # seen during the fit.
617
+ snowpark_column_names = dataset.select(self.input_cols).columns
618
+ sample_pd_df.columns = snowpark_column_names
619
+
620
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
614
621
  output_df_columns = list(output_df_pd.columns)
615
622
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
616
623
  if self.sample_weight_col:
@@ -298,7 +298,7 @@ class RidgeCV(BaseTransformer):
298
298
  inspect.currentframe(), RidgeCV.__class__.__name__
299
299
  ),
300
300
  api_calls=[Session.call],
301
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
301
+ custom_tags={"autogen": True} if self._autogenerated else None,
302
302
  )
303
303
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
304
304
  pd_df.columns = dataset.columns
@@ -631,7 +631,14 @@ class RidgeCV(BaseTransformer):
631
631
  ) -> List[str]:
632
632
  # in case the inferred output column names dimension is different
633
633
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
634
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
634
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
635
+
636
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
637
+ # seen during the fit.
638
+ snowpark_column_names = dataset.select(self.input_cols).columns
639
+ sample_pd_df.columns = snowpark_column_names
640
+
641
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
635
642
  output_df_columns = list(output_df_pd.columns)
636
643
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
637
644
  if self.sample_weight_col:
@@ -417,7 +417,7 @@ class SGDClassifier(BaseTransformer):
417
417
  inspect.currentframe(), SGDClassifier.__class__.__name__
418
418
  ),
419
419
  api_calls=[Session.call],
420
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
420
+ custom_tags={"autogen": True} if self._autogenerated else None,
421
421
  )
422
422
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
423
423
  pd_df.columns = dataset.columns
@@ -750,7 +750,14 @@ class SGDClassifier(BaseTransformer):
750
750
  ) -> List[str]:
751
751
  # in case the inferred output column names dimension is different
752
752
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
753
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
753
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
754
+
755
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
756
+ # seen during the fit.
757
+ snowpark_column_names = dataset.select(self.input_cols).columns
758
+ sample_pd_df.columns = snowpark_column_names
759
+
760
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
754
761
  output_df_columns = list(output_df_pd.columns)
755
762
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
756
763
  if self.sample_weight_col:
@@ -315,7 +315,7 @@ class SGDOneClassSVM(BaseTransformer):
315
315
  inspect.currentframe(), SGDOneClassSVM.__class__.__name__
316
316
  ),
317
317
  api_calls=[Session.call],
318
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
318
+ custom_tags={"autogen": True} if self._autogenerated else None,
319
319
  )
320
320
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
321
321
  pd_df.columns = dataset.columns
@@ -650,7 +650,14 @@ class SGDOneClassSVM(BaseTransformer):
650
650
  ) -> List[str]:
651
651
  # in case the inferred output column names dimension is different
652
652
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
653
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
653
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
654
+
655
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
656
+ # seen during the fit.
657
+ snowpark_column_names = dataset.select(self.input_cols).columns
658
+ sample_pd_df.columns = snowpark_column_names
659
+
660
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
654
661
  output_df_columns = list(output_df_pd.columns)
655
662
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
656
663
  if self.sample_weight_col:
@@ -383,7 +383,7 @@ class SGDRegressor(BaseTransformer):
383
383
  inspect.currentframe(), SGDRegressor.__class__.__name__
384
384
  ),
385
385
  api_calls=[Session.call],
386
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
386
+ custom_tags={"autogen": True} if self._autogenerated else None,
387
387
  )
388
388
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
389
389
  pd_df.columns = dataset.columns
@@ -716,7 +716,14 @@ class SGDRegressor(BaseTransformer):
716
716
  ) -> List[str]:
717
717
  # in case the inferred output column names dimension is different
718
718
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
719
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
719
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
720
+
721
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
722
+ # seen during the fit.
723
+ snowpark_column_names = dataset.select(self.input_cols).columns
724
+ sample_pd_df.columns = snowpark_column_names
725
+
726
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
720
727
  output_df_columns = list(output_df_pd.columns)
721
728
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
722
729
  if self.sample_weight_col:
@@ -285,7 +285,7 @@ class TheilSenRegressor(BaseTransformer):
285
285
  inspect.currentframe(), TheilSenRegressor.__class__.__name__
286
286
  ),
287
287
  api_calls=[Session.call],
288
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
288
+ custom_tags={"autogen": True} if self._autogenerated else None,
289
289
  )
290
290
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
291
291
  pd_df.columns = dataset.columns
@@ -618,7 +618,14 @@ class TheilSenRegressor(BaseTransformer):
618
618
  ) -> List[str]:
619
619
  # in case the inferred output column names dimension is different
620
620
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
621
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
621
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
622
+
623
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
624
+ # seen during the fit.
625
+ snowpark_column_names = dataset.select(self.input_cols).columns
626
+ sample_pd_df.columns = snowpark_column_names
627
+
628
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
622
629
  output_df_columns = list(output_df_pd.columns)
623
630
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
624
631
  if self.sample_weight_col:
@@ -311,7 +311,7 @@ class TweedieRegressor(BaseTransformer):
311
311
  inspect.currentframe(), TweedieRegressor.__class__.__name__
312
312
  ),
313
313
  api_calls=[Session.call],
314
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
314
+ custom_tags={"autogen": True} if self._autogenerated else None,
315
315
  )
316
316
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
317
317
  pd_df.columns = dataset.columns
@@ -644,7 +644,14 @@ class TweedieRegressor(BaseTransformer):
644
644
  ) -> List[str]:
645
645
  # in case the inferred output column names dimension is different
646
646
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
647
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
647
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
648
+
649
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
650
+ # seen during the fit.
651
+ snowpark_column_names = dataset.select(self.input_cols).columns
652
+ sample_pd_df.columns = snowpark_column_names
653
+
654
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
648
655
  output_df_columns = list(output_df_pd.columns)
649
656
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
650
657
  if self.sample_weight_col:
@@ -307,7 +307,7 @@ class Isomap(BaseTransformer):
307
307
  inspect.currentframe(), Isomap.__class__.__name__
308
308
  ),
309
309
  api_calls=[Session.call],
310
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
310
+ custom_tags={"autogen": True} if self._autogenerated else None,
311
311
  )
312
312
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
313
313
  pd_df.columns = dataset.columns
@@ -642,7 +642,14 @@ class Isomap(BaseTransformer):
642
642
  ) -> List[str]:
643
643
  # in case the inferred output column names dimension is different
644
644
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
645
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
645
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
646
+
647
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
648
+ # seen during the fit.
649
+ snowpark_column_names = dataset.select(self.input_cols).columns
650
+ sample_pd_df.columns = snowpark_column_names
651
+
652
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
646
653
  output_df_columns = list(output_df_pd.columns)
647
654
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
648
655
  if self.sample_weight_col:
@@ -290,7 +290,7 @@ class MDS(BaseTransformer):
290
290
  inspect.currentframe(), MDS.__class__.__name__
291
291
  ),
292
292
  api_calls=[Session.call],
293
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
293
+ custom_tags={"autogen": True} if self._autogenerated else None,
294
294
  )
295
295
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
296
296
  pd_df.columns = dataset.columns
@@ -623,7 +623,14 @@ class MDS(BaseTransformer):
623
623
  ) -> List[str]:
624
624
  # in case the inferred output column names dimension is different
625
625
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
626
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
626
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
627
+
628
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
629
+ # seen during the fit.
630
+ snowpark_column_names = dataset.select(self.input_cols).columns
631
+ sample_pd_df.columns = snowpark_column_names
632
+
633
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
627
634
  output_df_columns = list(output_df_pd.columns)
628
635
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
629
636
  if self.sample_weight_col:
@@ -292,7 +292,7 @@ class SpectralEmbedding(BaseTransformer):
292
292
  inspect.currentframe(), SpectralEmbedding.__class__.__name__
293
293
  ),
294
294
  api_calls=[Session.call],
295
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
295
+ custom_tags={"autogen": True} if self._autogenerated else None,
296
296
  )
297
297
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
298
298
  pd_df.columns = dataset.columns
@@ -625,7 +625,14 @@ class SpectralEmbedding(BaseTransformer):
625
625
  ) -> List[str]:
626
626
  # in case the inferred output column names dimension is different
627
627
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
628
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
628
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
629
+
630
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
631
+ # seen during the fit.
632
+ snowpark_column_names = dataset.select(self.input_cols).columns
633
+ sample_pd_df.columns = snowpark_column_names
634
+
635
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
629
636
  output_df_columns = list(output_df_pd.columns)
630
637
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
631
638
  if self.sample_weight_col:
@@ -351,7 +351,7 @@ class TSNE(BaseTransformer):
351
351
  inspect.currentframe(), TSNE.__class__.__name__
352
352
  ),
353
353
  api_calls=[Session.call],
354
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
354
+ custom_tags={"autogen": True} if self._autogenerated else None,
355
355
  )
356
356
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
357
357
  pd_df.columns = dataset.columns
@@ -684,7 +684,14 @@ class TSNE(BaseTransformer):
684
684
  ) -> List[str]:
685
685
  # in case the inferred output column names dimension is different
686
686
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
687
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
687
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
688
+
689
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
690
+ # seen during the fit.
691
+ snowpark_column_names = dataset.select(self.input_cols).columns
692
+ sample_pd_df.columns = snowpark_column_names
693
+
694
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
688
695
  output_df_columns = list(output_df_pd.columns)
689
696
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
690
697
  if self.sample_weight_col:
@@ -354,7 +354,7 @@ class BayesianGaussianMixture(BaseTransformer):
354
354
  inspect.currentframe(), BayesianGaussianMixture.__class__.__name__
355
355
  ),
356
356
  api_calls=[Session.call],
357
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
357
+ custom_tags={"autogen": True} if self._autogenerated else None,
358
358
  )
359
359
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
360
360
  pd_df.columns = dataset.columns
@@ -689,7 +689,14 @@ class BayesianGaussianMixture(BaseTransformer):
689
689
  ) -> List[str]:
690
690
  # in case the inferred output column names dimension is different
691
691
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
692
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
692
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
693
+
694
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
695
+ # seen during the fit.
696
+ snowpark_column_names = dataset.select(self.input_cols).columns
697
+ sample_pd_df.columns = snowpark_column_names
698
+
699
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
693
700
  output_df_columns = list(output_df_pd.columns)
694
701
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
695
702
  if self.sample_weight_col:
@@ -327,7 +327,7 @@ class GaussianMixture(BaseTransformer):
327
327
  inspect.currentframe(), GaussianMixture.__class__.__name__
328
328
  ),
329
329
  api_calls=[Session.call],
330
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
330
+ custom_tags={"autogen": True} if self._autogenerated else None,
331
331
  )
332
332
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
333
333
  pd_df.columns = dataset.columns
@@ -662,7 +662,14 @@ class GaussianMixture(BaseTransformer):
662
662
  ) -> List[str]:
663
663
  # in case the inferred output column names dimension is different
664
664
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
665
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
665
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
666
+
667
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
668
+ # seen during the fit.
669
+ snowpark_column_names = dataset.select(self.input_cols).columns
670
+ sample_pd_df.columns = snowpark_column_names
671
+
672
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
666
673
  output_df_columns = list(output_df_pd.columns)
667
674
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
668
675
  if self.sample_weight_col:
@@ -285,11 +285,7 @@ class GridSearchCV(BaseTransformer):
285
285
  )
286
286
  return selected_cols
287
287
 
288
- @telemetry.send_api_usage_telemetry(
289
- project=_PROJECT,
290
- subproject=_SUBPROJECT,
291
- )
292
- def fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "GridSearchCV":
288
+ def _fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "GridSearchCV":
293
289
  """Run fit with all sets of parameters
294
290
  For more details on this function, see [sklearn.model_selection.GridSearchCV.fit]
295
291
  (https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.GridSearchCV.html#sklearn.model_selection.GridSearchCV.fit)
@@ -298,11 +298,7 @@ class RandomizedSearchCV(BaseTransformer):
298
298
  )
299
299
  return selected_cols
300
300
 
301
- @telemetry.send_api_usage_telemetry(
302
- project=_PROJECT,
303
- subproject=_SUBPROJECT,
304
- )
305
- def fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "RandomizedSearchCV":
301
+ def _fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "RandomizedSearchCV":
306
302
  """Run fit with all sets of parameters
307
303
  For more details on this function, see [sklearn.model_selection.RandomizedSearchCV.fit]
308
304
  (https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.RandomizedSearchCV.html#sklearn.model_selection.RandomizedSearchCV.fit)
@@ -239,7 +239,7 @@ class OneVsOneClassifier(BaseTransformer):
239
239
  inspect.currentframe(), OneVsOneClassifier.__class__.__name__
240
240
  ),
241
241
  api_calls=[Session.call],
242
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
242
+ custom_tags={"autogen": True} if self._autogenerated else None,
243
243
  )
244
244
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
245
245
  pd_df.columns = dataset.columns
@@ -572,7 +572,14 @@ class OneVsOneClassifier(BaseTransformer):
572
572
  ) -> List[str]:
573
573
  # in case the inferred output column names dimension is different
574
574
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
575
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
575
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
576
+
577
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
578
+ # seen during the fit.
579
+ snowpark_column_names = dataset.select(self.input_cols).columns
580
+ sample_pd_df.columns = snowpark_column_names
581
+
582
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
576
583
  output_df_columns = list(output_df_pd.columns)
577
584
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
578
585
  if self.sample_weight_col:
@@ -248,7 +248,7 @@ class OneVsRestClassifier(BaseTransformer):
248
248
  inspect.currentframe(), OneVsRestClassifier.__class__.__name__
249
249
  ),
250
250
  api_calls=[Session.call],
251
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
251
+ custom_tags={"autogen": True} if self._autogenerated else None,
252
252
  )
253
253
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
254
254
  pd_df.columns = dataset.columns
@@ -581,7 +581,14 @@ class OneVsRestClassifier(BaseTransformer):
581
581
  ) -> List[str]:
582
582
  # in case the inferred output column names dimension is different
583
583
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
584
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
584
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
585
+
586
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
587
+ # seen during the fit.
588
+ snowpark_column_names = dataset.select(self.input_cols).columns
589
+ sample_pd_df.columns = snowpark_column_names
590
+
591
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
585
592
  output_df_columns = list(output_df_pd.columns)
586
593
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
587
594
  if self.sample_weight_col:
@@ -251,7 +251,7 @@ class OutputCodeClassifier(BaseTransformer):
251
251
  inspect.currentframe(), OutputCodeClassifier.__class__.__name__
252
252
  ),
253
253
  api_calls=[Session.call],
254
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
254
+ custom_tags={"autogen": True} if self._autogenerated else None,
255
255
  )
256
256
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
257
257
  pd_df.columns = dataset.columns
@@ -584,7 +584,14 @@ class OutputCodeClassifier(BaseTransformer):
584
584
  ) -> List[str]:
585
585
  # in case the inferred output column names dimension is different
586
586
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
587
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
587
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
588
+
589
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
590
+ # seen during the fit.
591
+ snowpark_column_names = dataset.select(self.input_cols).columns
592
+ sample_pd_df.columns = snowpark_column_names
593
+
594
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
588
595
  output_df_columns = list(output_df_pd.columns)
589
596
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
590
597
  if self.sample_weight_col:
@@ -251,7 +251,7 @@ class BernoulliNB(BaseTransformer):
251
251
  inspect.currentframe(), BernoulliNB.__class__.__name__
252
252
  ),
253
253
  api_calls=[Session.call],
254
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
254
+ custom_tags={"autogen": True} if self._autogenerated else None,
255
255
  )
256
256
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
257
257
  pd_df.columns = dataset.columns
@@ -584,7 +584,14 @@ class BernoulliNB(BaseTransformer):
584
584
  ) -> List[str]:
585
585
  # in case the inferred output column names dimension is different
586
586
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
587
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
587
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
588
+
589
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
590
+ # seen during the fit.
591
+ snowpark_column_names = dataset.select(self.input_cols).columns
592
+ sample_pd_df.columns = snowpark_column_names
593
+
594
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
588
595
  output_df_columns = list(output_df_pd.columns)
589
596
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
590
597
  if self.sample_weight_col: