snowflake-ml-python 1.5.1__py3-none-any.whl → 1.5.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (207) hide show
  1. snowflake/cortex/_complete.py +26 -5
  2. snowflake/cortex/_sentiment.py +7 -4
  3. snowflake/cortex/_sse_client.py +81 -0
  4. snowflake/cortex/_util.py +105 -8
  5. snowflake/ml/_internal/lineage/lineage_utils.py +34 -25
  6. snowflake/ml/_internal/utils/temp_file_utils.py +5 -2
  7. snowflake/ml/dataset/dataset.py +15 -12
  8. snowflake/ml/dataset/dataset_factory.py +3 -4
  9. snowflake/ml/feature_store/access_manager.py +34 -30
  10. snowflake/ml/feature_store/feature_store.py +3 -3
  11. snowflake/ml/feature_store/feature_view.py +12 -11
  12. snowflake/ml/fileset/snowfs.py +2 -31
  13. snowflake/ml/model/_client/ops/model_ops.py +43 -0
  14. snowflake/ml/model/_client/sql/model_version.py +55 -3
  15. snowflake/ml/model/_model_composer/model_composer.py +7 -3
  16. snowflake/ml/model/_model_composer/model_method/infer_table_function.py_template +3 -1
  17. snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
  18. snowflake/ml/model/_packager/model_meta/model_meta.py +1 -3
  19. snowflake/ml/model/_packager/model_runtime/_snowml_inference_alternative_requirements.py +1 -1
  20. snowflake/ml/model/_packager/model_runtime/model_runtime.py +3 -27
  21. snowflake/ml/model/_signatures/builtins_handler.py +2 -1
  22. snowflake/ml/model/_signatures/core.py +13 -1
  23. snowflake/ml/model/_signatures/pandas_handler.py +2 -0
  24. snowflake/ml/model/_signatures/snowpark_handler.py +3 -3
  25. snowflake/ml/model/model_signature.py +2 -0
  26. snowflake/ml/model/type_hints.py +1 -0
  27. snowflake/ml/modeling/_internal/estimator_utils.py +58 -1
  28. snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +196 -242
  29. snowflake/ml/modeling/_internal/snowpark_implementations/distributed_search_udf_file.py +161 -0
  30. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +38 -18
  31. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +82 -134
  32. snowflake/ml/modeling/_internal/snowpark_implementations/xgboost_external_memory_trainer.py +21 -17
  33. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +9 -2
  34. snowflake/ml/modeling/cluster/affinity_propagation.py +9 -2
  35. snowflake/ml/modeling/cluster/agglomerative_clustering.py +9 -2
  36. snowflake/ml/modeling/cluster/birch.py +9 -2
  37. snowflake/ml/modeling/cluster/bisecting_k_means.py +9 -2
  38. snowflake/ml/modeling/cluster/dbscan.py +9 -2
  39. snowflake/ml/modeling/cluster/feature_agglomeration.py +9 -2
  40. snowflake/ml/modeling/cluster/k_means.py +9 -2
  41. snowflake/ml/modeling/cluster/mean_shift.py +9 -2
  42. snowflake/ml/modeling/cluster/mini_batch_k_means.py +9 -2
  43. snowflake/ml/modeling/cluster/optics.py +9 -2
  44. snowflake/ml/modeling/cluster/spectral_biclustering.py +9 -2
  45. snowflake/ml/modeling/cluster/spectral_clustering.py +9 -2
  46. snowflake/ml/modeling/cluster/spectral_coclustering.py +9 -2
  47. snowflake/ml/modeling/compose/column_transformer.py +9 -2
  48. snowflake/ml/modeling/compose/transformed_target_regressor.py +9 -2
  49. snowflake/ml/modeling/covariance/elliptic_envelope.py +9 -2
  50. snowflake/ml/modeling/covariance/empirical_covariance.py +9 -2
  51. snowflake/ml/modeling/covariance/graphical_lasso.py +9 -2
  52. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +9 -2
  53. snowflake/ml/modeling/covariance/ledoit_wolf.py +9 -2
  54. snowflake/ml/modeling/covariance/min_cov_det.py +9 -2
  55. snowflake/ml/modeling/covariance/oas.py +9 -2
  56. snowflake/ml/modeling/covariance/shrunk_covariance.py +9 -2
  57. snowflake/ml/modeling/decomposition/dictionary_learning.py +9 -2
  58. snowflake/ml/modeling/decomposition/factor_analysis.py +9 -2
  59. snowflake/ml/modeling/decomposition/fast_ica.py +9 -2
  60. snowflake/ml/modeling/decomposition/incremental_pca.py +9 -2
  61. snowflake/ml/modeling/decomposition/kernel_pca.py +9 -2
  62. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +9 -2
  63. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +9 -2
  64. snowflake/ml/modeling/decomposition/pca.py +9 -2
  65. snowflake/ml/modeling/decomposition/sparse_pca.py +9 -2
  66. snowflake/ml/modeling/decomposition/truncated_svd.py +9 -2
  67. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +9 -2
  68. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +9 -2
  69. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +9 -2
  70. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +9 -2
  71. snowflake/ml/modeling/ensemble/bagging_classifier.py +9 -2
  72. snowflake/ml/modeling/ensemble/bagging_regressor.py +9 -2
  73. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +9 -2
  74. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +9 -2
  75. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +9 -2
  76. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +9 -2
  77. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +9 -2
  78. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +9 -2
  79. snowflake/ml/modeling/ensemble/isolation_forest.py +9 -2
  80. snowflake/ml/modeling/ensemble/random_forest_classifier.py +9 -2
  81. snowflake/ml/modeling/ensemble/random_forest_regressor.py +9 -2
  82. snowflake/ml/modeling/ensemble/stacking_regressor.py +9 -2
  83. snowflake/ml/modeling/ensemble/voting_classifier.py +9 -2
  84. snowflake/ml/modeling/ensemble/voting_regressor.py +9 -2
  85. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +9 -2
  86. snowflake/ml/modeling/feature_selection/select_fdr.py +9 -2
  87. snowflake/ml/modeling/feature_selection/select_fpr.py +9 -2
  88. snowflake/ml/modeling/feature_selection/select_fwe.py +9 -2
  89. snowflake/ml/modeling/feature_selection/select_k_best.py +9 -2
  90. snowflake/ml/modeling/feature_selection/select_percentile.py +9 -2
  91. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +9 -2
  92. snowflake/ml/modeling/feature_selection/variance_threshold.py +9 -2
  93. snowflake/ml/modeling/framework/base.py +3 -8
  94. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +9 -2
  95. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +9 -2
  96. snowflake/ml/modeling/impute/iterative_imputer.py +9 -2
  97. snowflake/ml/modeling/impute/knn_imputer.py +9 -2
  98. snowflake/ml/modeling/impute/missing_indicator.py +9 -2
  99. snowflake/ml/modeling/impute/simple_imputer.py +28 -5
  100. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +9 -2
  101. snowflake/ml/modeling/kernel_approximation/nystroem.py +9 -2
  102. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +9 -2
  103. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +9 -2
  104. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +9 -2
  105. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +9 -2
  106. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +9 -2
  107. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +9 -2
  108. snowflake/ml/modeling/linear_model/ard_regression.py +9 -2
  109. snowflake/ml/modeling/linear_model/bayesian_ridge.py +9 -2
  110. snowflake/ml/modeling/linear_model/elastic_net.py +9 -2
  111. snowflake/ml/modeling/linear_model/elastic_net_cv.py +9 -2
  112. snowflake/ml/modeling/linear_model/gamma_regressor.py +9 -2
  113. snowflake/ml/modeling/linear_model/huber_regressor.py +9 -2
  114. snowflake/ml/modeling/linear_model/lars.py +9 -2
  115. snowflake/ml/modeling/linear_model/lars_cv.py +9 -2
  116. snowflake/ml/modeling/linear_model/lasso.py +9 -2
  117. snowflake/ml/modeling/linear_model/lasso_cv.py +9 -2
  118. snowflake/ml/modeling/linear_model/lasso_lars.py +9 -2
  119. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +9 -2
  120. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +9 -2
  121. snowflake/ml/modeling/linear_model/linear_regression.py +9 -2
  122. snowflake/ml/modeling/linear_model/logistic_regression.py +9 -2
  123. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +9 -2
  124. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +9 -2
  125. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +9 -2
  126. snowflake/ml/modeling/linear_model/multi_task_lasso.py +9 -2
  127. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +9 -2
  128. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +9 -2
  129. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +9 -2
  130. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +9 -2
  131. snowflake/ml/modeling/linear_model/perceptron.py +9 -2
  132. snowflake/ml/modeling/linear_model/poisson_regressor.py +9 -2
  133. snowflake/ml/modeling/linear_model/ransac_regressor.py +9 -2
  134. snowflake/ml/modeling/linear_model/ridge.py +9 -2
  135. snowflake/ml/modeling/linear_model/ridge_classifier.py +9 -2
  136. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +9 -2
  137. snowflake/ml/modeling/linear_model/ridge_cv.py +9 -2
  138. snowflake/ml/modeling/linear_model/sgd_classifier.py +9 -2
  139. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +9 -2
  140. snowflake/ml/modeling/linear_model/sgd_regressor.py +9 -2
  141. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +9 -2
  142. snowflake/ml/modeling/linear_model/tweedie_regressor.py +9 -2
  143. snowflake/ml/modeling/manifold/isomap.py +9 -2
  144. snowflake/ml/modeling/manifold/mds.py +9 -2
  145. snowflake/ml/modeling/manifold/spectral_embedding.py +9 -2
  146. snowflake/ml/modeling/manifold/tsne.py +9 -2
  147. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +9 -2
  148. snowflake/ml/modeling/mixture/gaussian_mixture.py +9 -2
  149. snowflake/ml/modeling/model_selection/grid_search_cv.py +1 -5
  150. snowflake/ml/modeling/model_selection/randomized_search_cv.py +1 -5
  151. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +9 -2
  152. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +9 -2
  153. snowflake/ml/modeling/multiclass/output_code_classifier.py +9 -2
  154. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +9 -2
  155. snowflake/ml/modeling/naive_bayes/categorical_nb.py +9 -2
  156. snowflake/ml/modeling/naive_bayes/complement_nb.py +9 -2
  157. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +9 -2
  158. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +9 -2
  159. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +9 -2
  160. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +9 -2
  161. snowflake/ml/modeling/neighbors/kernel_density.py +9 -2
  162. snowflake/ml/modeling/neighbors/local_outlier_factor.py +9 -2
  163. snowflake/ml/modeling/neighbors/nearest_centroid.py +9 -2
  164. snowflake/ml/modeling/neighbors/nearest_neighbors.py +9 -2
  165. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +9 -2
  166. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +9 -2
  167. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +9 -2
  168. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +9 -2
  169. snowflake/ml/modeling/neural_network/mlp_classifier.py +9 -2
  170. snowflake/ml/modeling/neural_network/mlp_regressor.py +9 -2
  171. snowflake/ml/modeling/parameters/enable_anonymous_sproc.py +5 -0
  172. snowflake/ml/modeling/pipeline/pipeline.py +5 -0
  173. snowflake/ml/modeling/preprocessing/binarizer.py +7 -3
  174. snowflake/ml/modeling/preprocessing/k_bins_discretizer.py +7 -2
  175. snowflake/ml/modeling/preprocessing/label_encoder.py +8 -7
  176. snowflake/ml/modeling/preprocessing/max_abs_scaler.py +7 -3
  177. snowflake/ml/modeling/preprocessing/min_max_scaler.py +7 -4
  178. snowflake/ml/modeling/preprocessing/normalizer.py +7 -3
  179. snowflake/ml/modeling/preprocessing/one_hot_encoder.py +10 -2
  180. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +8 -5
  181. snowflake/ml/modeling/preprocessing/polynomial_features.py +9 -2
  182. snowflake/ml/modeling/preprocessing/robust_scaler.py +7 -4
  183. snowflake/ml/modeling/preprocessing/standard_scaler.py +7 -3
  184. snowflake/ml/modeling/semi_supervised/label_propagation.py +9 -2
  185. snowflake/ml/modeling/semi_supervised/label_spreading.py +9 -2
  186. snowflake/ml/modeling/svm/linear_svc.py +9 -2
  187. snowflake/ml/modeling/svm/linear_svr.py +9 -2
  188. snowflake/ml/modeling/svm/nu_svc.py +9 -2
  189. snowflake/ml/modeling/svm/nu_svr.py +9 -2
  190. snowflake/ml/modeling/svm/svc.py +9 -2
  191. snowflake/ml/modeling/svm/svr.py +9 -2
  192. snowflake/ml/modeling/tree/decision_tree_classifier.py +9 -2
  193. snowflake/ml/modeling/tree/decision_tree_regressor.py +9 -2
  194. snowflake/ml/modeling/tree/extra_tree_classifier.py +9 -2
  195. snowflake/ml/modeling/tree/extra_tree_regressor.py +9 -2
  196. snowflake/ml/modeling/xgboost/xgb_classifier.py +9 -2
  197. snowflake/ml/modeling/xgboost/xgb_regressor.py +9 -2
  198. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +9 -2
  199. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +9 -2
  200. snowflake/ml/registry/_manager/model_manager.py +59 -1
  201. snowflake/ml/registry/registry.py +10 -1
  202. snowflake/ml/version.py +1 -1
  203. {snowflake_ml_python-1.5.1.dist-info → snowflake_ml_python-1.5.3.dist-info}/METADATA +32 -4
  204. {snowflake_ml_python-1.5.1.dist-info → snowflake_ml_python-1.5.3.dist-info}/RECORD +207 -204
  205. {snowflake_ml_python-1.5.1.dist-info → snowflake_ml_python-1.5.3.dist-info}/LICENSE.txt +0 -0
  206. {snowflake_ml_python-1.5.1.dist-info → snowflake_ml_python-1.5.3.dist-info}/WHEEL +0 -0
  207. {snowflake_ml_python-1.5.1.dist-info → snowflake_ml_python-1.5.3.dist-info}/top_level.txt +0 -0
@@ -262,7 +262,7 @@ class LGBMClassifier(BaseTransformer):
262
262
  inspect.currentframe(), LGBMClassifier.__class__.__name__
263
263
  ),
264
264
  api_calls=[Session.call],
265
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
265
+ custom_tags={"autogen": True} if self._autogenerated else None,
266
266
  )
267
267
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
268
268
  pd_df.columns = dataset.columns
@@ -595,7 +595,14 @@ class LGBMClassifier(BaseTransformer):
595
595
  ) -> List[str]:
596
596
  # in case the inferred output column names dimension is different
597
597
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
598
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
598
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
599
+
600
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
601
+ # seen during the fit.
602
+ snowpark_column_names = dataset.select(self.input_cols).columns
603
+ sample_pd_df.columns = snowpark_column_names
604
+
605
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
599
606
  output_df_columns = list(output_df_pd.columns)
600
607
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
601
608
  if self.sample_weight_col:
@@ -262,7 +262,7 @@ class LGBMRegressor(BaseTransformer):
262
262
  inspect.currentframe(), LGBMRegressor.__class__.__name__
263
263
  ),
264
264
  api_calls=[Session.call],
265
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
265
+ custom_tags={"autogen": True} if self._autogenerated else None,
266
266
  )
267
267
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
268
268
  pd_df.columns = dataset.columns
@@ -595,7 +595,14 @@ class LGBMRegressor(BaseTransformer):
595
595
  ) -> List[str]:
596
596
  # in case the inferred output column names dimension is different
597
597
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
598
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
598
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
599
+
600
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
601
+ # seen during the fit.
602
+ snowpark_column_names = dataset.select(self.input_cols).columns
603
+ sample_pd_df.columns = snowpark_column_names
604
+
605
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
599
606
  output_df_columns = list(output_df_pd.columns)
600
607
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
601
608
  if self.sample_weight_col:
@@ -287,7 +287,7 @@ class ARDRegression(BaseTransformer):
287
287
  inspect.currentframe(), ARDRegression.__class__.__name__
288
288
  ),
289
289
  api_calls=[Session.call],
290
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
290
+ custom_tags={"autogen": True} if self._autogenerated else None,
291
291
  )
292
292
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
293
293
  pd_df.columns = dataset.columns
@@ -620,7 +620,14 @@ class ARDRegression(BaseTransformer):
620
620
  ) -> List[str]:
621
621
  # in case the inferred output column names dimension is different
622
622
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
623
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
623
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
624
+
625
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
626
+ # seen during the fit.
627
+ snowpark_column_names = dataset.select(self.input_cols).columns
628
+ sample_pd_df.columns = snowpark_column_names
629
+
630
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
624
631
  output_df_columns = list(output_df_pd.columns)
625
632
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
626
633
  if self.sample_weight_col:
@@ -298,7 +298,7 @@ class BayesianRidge(BaseTransformer):
298
298
  inspect.currentframe(), BayesianRidge.__class__.__name__
299
299
  ),
300
300
  api_calls=[Session.call],
301
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
301
+ custom_tags={"autogen": True} if self._autogenerated else None,
302
302
  )
303
303
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
304
304
  pd_df.columns = dataset.columns
@@ -631,7 +631,14 @@ class BayesianRidge(BaseTransformer):
631
631
  ) -> List[str]:
632
632
  # in case the inferred output column names dimension is different
633
633
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
634
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
634
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
635
+
636
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
637
+ # seen during the fit.
638
+ snowpark_column_names = dataset.select(self.input_cols).columns
639
+ sample_pd_df.columns = snowpark_column_names
640
+
641
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
635
642
  output_df_columns = list(output_df_pd.columns)
636
643
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
637
644
  if self.sample_weight_col:
@@ -297,7 +297,7 @@ class ElasticNet(BaseTransformer):
297
297
  inspect.currentframe(), ElasticNet.__class__.__name__
298
298
  ),
299
299
  api_calls=[Session.call],
300
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
300
+ custom_tags={"autogen": True} if self._autogenerated else None,
301
301
  )
302
302
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
303
303
  pd_df.columns = dataset.columns
@@ -630,7 +630,14 @@ class ElasticNet(BaseTransformer):
630
630
  ) -> List[str]:
631
631
  # in case the inferred output column names dimension is different
632
632
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
633
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
633
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
634
+
635
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
636
+ # seen during the fit.
637
+ snowpark_column_names = dataset.select(self.input_cols).columns
638
+ sample_pd_df.columns = snowpark_column_names
639
+
640
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
634
641
  output_df_columns = list(output_df_pd.columns)
635
642
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
636
643
  if self.sample_weight_col:
@@ -333,7 +333,7 @@ class ElasticNetCV(BaseTransformer):
333
333
  inspect.currentframe(), ElasticNetCV.__class__.__name__
334
334
  ),
335
335
  api_calls=[Session.call],
336
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
336
+ custom_tags={"autogen": True} if self._autogenerated else None,
337
337
  )
338
338
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
339
339
  pd_df.columns = dataset.columns
@@ -666,7 +666,14 @@ class ElasticNetCV(BaseTransformer):
666
666
  ) -> List[str]:
667
667
  # in case the inferred output column names dimension is different
668
668
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
669
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
669
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
670
+
671
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
672
+ # seen during the fit.
673
+ snowpark_column_names = dataset.select(self.input_cols).columns
674
+ sample_pd_df.columns = snowpark_column_names
675
+
676
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
670
677
  output_df_columns = list(output_df_pd.columns)
671
678
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
672
679
  if self.sample_weight_col:
@@ -278,7 +278,7 @@ class GammaRegressor(BaseTransformer):
278
278
  inspect.currentframe(), GammaRegressor.__class__.__name__
279
279
  ),
280
280
  api_calls=[Session.call],
281
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
281
+ custom_tags={"autogen": True} if self._autogenerated else None,
282
282
  )
283
283
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
284
284
  pd_df.columns = dataset.columns
@@ -611,7 +611,14 @@ class GammaRegressor(BaseTransformer):
611
611
  ) -> List[str]:
612
612
  # in case the inferred output column names dimension is different
613
613
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
614
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
614
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
615
+
616
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
617
+ # seen during the fit.
618
+ snowpark_column_names = dataset.select(self.input_cols).columns
619
+ sample_pd_df.columns = snowpark_column_names
620
+
621
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
615
622
  output_df_columns = list(output_df_pd.columns)
616
623
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
617
624
  if self.sample_weight_col:
@@ -261,7 +261,7 @@ class HuberRegressor(BaseTransformer):
261
261
  inspect.currentframe(), HuberRegressor.__class__.__name__
262
262
  ),
263
263
  api_calls=[Session.call],
264
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
264
+ custom_tags={"autogen": True} if self._autogenerated else None,
265
265
  )
266
266
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
267
267
  pd_df.columns = dataset.columns
@@ -594,7 +594,14 @@ class HuberRegressor(BaseTransformer):
594
594
  ) -> List[str]:
595
595
  # in case the inferred output column names dimension is different
596
596
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
597
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
597
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
598
+
599
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
600
+ # seen during the fit.
601
+ snowpark_column_names = dataset.select(self.input_cols).columns
602
+ sample_pd_df.columns = snowpark_column_names
603
+
604
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
598
605
  output_df_columns = list(output_df_pd.columns)
599
606
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
600
607
  if self.sample_weight_col:
@@ -290,7 +290,7 @@ class Lars(BaseTransformer):
290
290
  inspect.currentframe(), Lars.__class__.__name__
291
291
  ),
292
292
  api_calls=[Session.call],
293
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
293
+ custom_tags={"autogen": True} if self._autogenerated else None,
294
294
  )
295
295
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
296
296
  pd_df.columns = dataset.columns
@@ -623,7 +623,14 @@ class Lars(BaseTransformer):
623
623
  ) -> List[str]:
624
624
  # in case the inferred output column names dimension is different
625
625
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
626
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
626
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
627
+
628
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
629
+ # seen during the fit.
630
+ snowpark_column_names = dataset.select(self.input_cols).columns
631
+ sample_pd_df.columns = snowpark_column_names
632
+
633
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
627
634
  output_df_columns = list(output_df_pd.columns)
628
635
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
629
636
  if self.sample_weight_col:
@@ -298,7 +298,7 @@ class LarsCV(BaseTransformer):
298
298
  inspect.currentframe(), LarsCV.__class__.__name__
299
299
  ),
300
300
  api_calls=[Session.call],
301
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
301
+ custom_tags={"autogen": True} if self._autogenerated else None,
302
302
  )
303
303
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
304
304
  pd_df.columns = dataset.columns
@@ -631,7 +631,14 @@ class LarsCV(BaseTransformer):
631
631
  ) -> List[str]:
632
632
  # in case the inferred output column names dimension is different
633
633
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
634
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
634
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
635
+
636
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
637
+ # seen during the fit.
638
+ snowpark_column_names = dataset.select(self.input_cols).columns
639
+ sample_pd_df.columns = snowpark_column_names
640
+
641
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
635
642
  output_df_columns = list(output_df_pd.columns)
636
643
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
637
644
  if self.sample_weight_col:
@@ -291,7 +291,7 @@ class Lasso(BaseTransformer):
291
291
  inspect.currentframe(), Lasso.__class__.__name__
292
292
  ),
293
293
  api_calls=[Session.call],
294
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
294
+ custom_tags={"autogen": True} if self._autogenerated else None,
295
295
  )
296
296
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
297
297
  pd_df.columns = dataset.columns
@@ -624,7 +624,14 @@ class Lasso(BaseTransformer):
624
624
  ) -> List[str]:
625
625
  # in case the inferred output column names dimension is different
626
626
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
627
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
627
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
628
+
629
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
630
+ # seen during the fit.
631
+ snowpark_column_names = dataset.select(self.input_cols).columns
632
+ sample_pd_df.columns = snowpark_column_names
633
+
634
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
628
635
  output_df_columns = list(output_df_pd.columns)
629
636
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
630
637
  if self.sample_weight_col:
@@ -319,7 +319,7 @@ class LassoCV(BaseTransformer):
319
319
  inspect.currentframe(), LassoCV.__class__.__name__
320
320
  ),
321
321
  api_calls=[Session.call],
322
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
322
+ custom_tags={"autogen": True} if self._autogenerated else None,
323
323
  )
324
324
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
325
325
  pd_df.columns = dataset.columns
@@ -652,7 +652,14 @@ class LassoCV(BaseTransformer):
652
652
  ) -> List[str]:
653
653
  # in case the inferred output column names dimension is different
654
654
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
655
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
655
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
656
+
657
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
658
+ # seen during the fit.
659
+ snowpark_column_names = dataset.select(self.input_cols).columns
660
+ sample_pd_df.columns = snowpark_column_names
661
+
662
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
656
663
  output_df_columns = list(output_df_pd.columns)
657
664
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
658
665
  if self.sample_weight_col:
@@ -311,7 +311,7 @@ class LassoLars(BaseTransformer):
311
311
  inspect.currentframe(), LassoLars.__class__.__name__
312
312
  ),
313
313
  api_calls=[Session.call],
314
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
314
+ custom_tags={"autogen": True} if self._autogenerated else None,
315
315
  )
316
316
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
317
317
  pd_df.columns = dataset.columns
@@ -644,7 +644,14 @@ class LassoLars(BaseTransformer):
644
644
  ) -> List[str]:
645
645
  # in case the inferred output column names dimension is different
646
646
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
647
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
647
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
648
+
649
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
650
+ # seen during the fit.
651
+ snowpark_column_names = dataset.select(self.input_cols).columns
652
+ sample_pd_df.columns = snowpark_column_names
653
+
654
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
648
655
  output_df_columns = list(output_df_pd.columns)
649
656
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
650
657
  if self.sample_weight_col:
@@ -312,7 +312,7 @@ class LassoLarsCV(BaseTransformer):
312
312
  inspect.currentframe(), LassoLarsCV.__class__.__name__
313
313
  ),
314
314
  api_calls=[Session.call],
315
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
315
+ custom_tags={"autogen": True} if self._autogenerated else None,
316
316
  )
317
317
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
318
318
  pd_df.columns = dataset.columns
@@ -645,7 +645,14 @@ class LassoLarsCV(BaseTransformer):
645
645
  ) -> List[str]:
646
646
  # in case the inferred output column names dimension is different
647
647
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
648
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
648
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
649
+
650
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
651
+ # seen during the fit.
652
+ snowpark_column_names = dataset.select(self.input_cols).columns
653
+ sample_pd_df.columns = snowpark_column_names
654
+
655
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
649
656
  output_df_columns = list(output_df_pd.columns)
650
657
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
651
658
  if self.sample_weight_col:
@@ -295,7 +295,7 @@ class LassoLarsIC(BaseTransformer):
295
295
  inspect.currentframe(), LassoLarsIC.__class__.__name__
296
296
  ),
297
297
  api_calls=[Session.call],
298
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
298
+ custom_tags={"autogen": True} if self._autogenerated else None,
299
299
  )
300
300
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
301
301
  pd_df.columns = dataset.columns
@@ -628,7 +628,14 @@ class LassoLarsIC(BaseTransformer):
628
628
  ) -> List[str]:
629
629
  # in case the inferred output column names dimension is different
630
630
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
631
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
631
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
632
+
633
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
634
+ # seen during the fit.
635
+ snowpark_column_names = dataset.select(self.input_cols).columns
636
+ sample_pd_df.columns = snowpark_column_names
637
+
638
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
632
639
  output_df_columns = list(output_df_pd.columns)
633
640
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
634
641
  if self.sample_weight_col:
@@ -248,7 +248,7 @@ class LinearRegression(BaseTransformer):
248
248
  inspect.currentframe(), LinearRegression.__class__.__name__
249
249
  ),
250
250
  api_calls=[Session.call],
251
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
251
+ custom_tags={"autogen": True} if self._autogenerated else None,
252
252
  )
253
253
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
254
254
  pd_df.columns = dataset.columns
@@ -581,7 +581,14 @@ class LinearRegression(BaseTransformer):
581
581
  ) -> List[str]:
582
582
  # in case the inferred output column names dimension is different
583
583
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
584
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
584
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
585
+
586
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
587
+ # seen during the fit.
588
+ snowpark_column_names = dataset.select(self.input_cols).columns
589
+ sample_pd_df.columns = snowpark_column_names
590
+
591
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
585
592
  output_df_columns = list(output_df_pd.columns)
586
593
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
587
594
  if self.sample_weight_col:
@@ -362,7 +362,7 @@ class LogisticRegression(BaseTransformer):
362
362
  inspect.currentframe(), LogisticRegression.__class__.__name__
363
363
  ),
364
364
  api_calls=[Session.call],
365
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
365
+ custom_tags={"autogen": True} if self._autogenerated else None,
366
366
  )
367
367
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
368
368
  pd_df.columns = dataset.columns
@@ -695,7 +695,14 @@ class LogisticRegression(BaseTransformer):
695
695
  ) -> List[str]:
696
696
  # in case the inferred output column names dimension is different
697
697
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
698
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
698
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
699
+
700
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
701
+ # seen during the fit.
702
+ snowpark_column_names = dataset.select(self.input_cols).columns
703
+ sample_pd_df.columns = snowpark_column_names
704
+
705
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
699
706
  output_df_columns = list(output_df_pd.columns)
700
707
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
701
708
  if self.sample_weight_col:
@@ -383,7 +383,7 @@ class LogisticRegressionCV(BaseTransformer):
383
383
  inspect.currentframe(), LogisticRegressionCV.__class__.__name__
384
384
  ),
385
385
  api_calls=[Session.call],
386
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
386
+ custom_tags={"autogen": True} if self._autogenerated else None,
387
387
  )
388
388
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
389
389
  pd_df.columns = dataset.columns
@@ -716,7 +716,14 @@ class LogisticRegressionCV(BaseTransformer):
716
716
  ) -> List[str]:
717
717
  # in case the inferred output column names dimension is different
718
718
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
719
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
719
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
720
+
721
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
722
+ # seen during the fit.
723
+ snowpark_column_names = dataset.select(self.input_cols).columns
724
+ sample_pd_df.columns = snowpark_column_names
725
+
726
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
720
727
  output_df_columns = list(output_df_pd.columns)
721
728
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
722
729
  if self.sample_weight_col:
@@ -281,7 +281,7 @@ class MultiTaskElasticNet(BaseTransformer):
281
281
  inspect.currentframe(), MultiTaskElasticNet.__class__.__name__
282
282
  ),
283
283
  api_calls=[Session.call],
284
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
284
+ custom_tags={"autogen": True} if self._autogenerated else None,
285
285
  )
286
286
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
287
287
  pd_df.columns = dataset.columns
@@ -614,7 +614,14 @@ class MultiTaskElasticNet(BaseTransformer):
614
614
  ) -> List[str]:
615
615
  # in case the inferred output column names dimension is different
616
616
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
617
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
617
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
618
+
619
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
620
+ # seen during the fit.
621
+ snowpark_column_names = dataset.select(self.input_cols).columns
622
+ sample_pd_df.columns = snowpark_column_names
623
+
624
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
618
625
  output_df_columns = list(output_df_pd.columns)
619
626
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
620
627
  if self.sample_weight_col:
@@ -322,7 +322,7 @@ class MultiTaskElasticNetCV(BaseTransformer):
322
322
  inspect.currentframe(), MultiTaskElasticNetCV.__class__.__name__
323
323
  ),
324
324
  api_calls=[Session.call],
325
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
325
+ custom_tags={"autogen": True} if self._autogenerated else None,
326
326
  )
327
327
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
328
328
  pd_df.columns = dataset.columns
@@ -655,7 +655,14 @@ class MultiTaskElasticNetCV(BaseTransformer):
655
655
  ) -> List[str]:
656
656
  # in case the inferred output column names dimension is different
657
657
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
658
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
658
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
659
+
660
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
661
+ # seen during the fit.
662
+ snowpark_column_names = dataset.select(self.input_cols).columns
663
+ sample_pd_df.columns = snowpark_column_names
664
+
665
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
659
666
  output_df_columns = list(output_df_pd.columns)
660
667
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
661
668
  if self.sample_weight_col:
@@ -273,7 +273,7 @@ class MultiTaskLasso(BaseTransformer):
273
273
  inspect.currentframe(), MultiTaskLasso.__class__.__name__
274
274
  ),
275
275
  api_calls=[Session.call],
276
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
276
+ custom_tags={"autogen": True} if self._autogenerated else None,
277
277
  )
278
278
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
279
279
  pd_df.columns = dataset.columns
@@ -606,7 +606,14 @@ class MultiTaskLasso(BaseTransformer):
606
606
  ) -> List[str]:
607
607
  # in case the inferred output column names dimension is different
608
608
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
609
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
609
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
610
+
611
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
612
+ # seen during the fit.
613
+ snowpark_column_names = dataset.select(self.input_cols).columns
614
+ sample_pd_df.columns = snowpark_column_names
615
+
616
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
610
617
  output_df_columns = list(output_df_pd.columns)
611
618
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
612
619
  if self.sample_weight_col:
@@ -308,7 +308,7 @@ class MultiTaskLassoCV(BaseTransformer):
308
308
  inspect.currentframe(), MultiTaskLassoCV.__class__.__name__
309
309
  ),
310
310
  api_calls=[Session.call],
311
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
311
+ custom_tags={"autogen": True} if self._autogenerated else None,
312
312
  )
313
313
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
314
314
  pd_df.columns = dataset.columns
@@ -641,7 +641,14 @@ class MultiTaskLassoCV(BaseTransformer):
641
641
  ) -> List[str]:
642
642
  # in case the inferred output column names dimension is different
643
643
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
644
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
644
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
645
+
646
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
647
+ # seen during the fit.
648
+ snowpark_column_names = dataset.select(self.input_cols).columns
649
+ sample_pd_df.columns = snowpark_column_names
650
+
651
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
645
652
  output_df_columns = list(output_df_pd.columns)
646
653
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
647
654
  if self.sample_weight_col:
@@ -256,7 +256,7 @@ class OrthogonalMatchingPursuit(BaseTransformer):
256
256
  inspect.currentframe(), OrthogonalMatchingPursuit.__class__.__name__
257
257
  ),
258
258
  api_calls=[Session.call],
259
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
259
+ custom_tags={"autogen": True} if self._autogenerated else None,
260
260
  )
261
261
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
262
262
  pd_df.columns = dataset.columns
@@ -589,7 +589,14 @@ class OrthogonalMatchingPursuit(BaseTransformer):
589
589
  ) -> List[str]:
590
590
  # in case the inferred output column names dimension is different
591
591
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
592
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
592
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
593
+
594
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
595
+ # seen during the fit.
596
+ snowpark_column_names = dataset.select(self.input_cols).columns
597
+ sample_pd_df.columns = snowpark_column_names
598
+
599
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
593
600
  output_df_columns = list(output_df_pd.columns)
594
601
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
595
602
  if self.sample_weight_col:
@@ -330,7 +330,7 @@ class PassiveAggressiveClassifier(BaseTransformer):
330
330
  inspect.currentframe(), PassiveAggressiveClassifier.__class__.__name__
331
331
  ),
332
332
  api_calls=[Session.call],
333
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
333
+ custom_tags={"autogen": True} if self._autogenerated else None,
334
334
  )
335
335
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
336
336
  pd_df.columns = dataset.columns
@@ -663,7 +663,14 @@ class PassiveAggressiveClassifier(BaseTransformer):
663
663
  ) -> List[str]:
664
664
  # in case the inferred output column names dimension is different
665
665
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
666
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
666
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
667
+
668
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
669
+ # seen during the fit.
670
+ snowpark_column_names = dataset.select(self.input_cols).columns
671
+ sample_pd_df.columns = snowpark_column_names
672
+
673
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
667
674
  output_df_columns = list(output_df_pd.columns)
668
675
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
669
676
  if self.sample_weight_col: