snowflake-ml-python 1.5.1__py3-none-any.whl → 1.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (174) hide show
  1. snowflake/cortex/_sentiment.py +7 -4
  2. snowflake/ml/_internal/utils/temp_file_utils.py +5 -2
  3. snowflake/ml/feature_store/access_manager.py +34 -30
  4. snowflake/ml/feature_store/feature_store.py +1 -1
  5. snowflake/ml/feature_store/feature_view.py +12 -11
  6. snowflake/ml/fileset/snowfs.py +2 -31
  7. snowflake/ml/model/_client/ops/model_ops.py +43 -0
  8. snowflake/ml/model/_client/sql/model_version.py +53 -1
  9. snowflake/ml/model/_model_composer/model_composer.py +6 -2
  10. snowflake/ml/model/_packager/model_meta/model_meta.py +1 -3
  11. snowflake/ml/model/_packager/model_runtime/model_runtime.py +3 -27
  12. snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +58 -139
  13. snowflake/ml/modeling/_internal/snowpark_implementations/distributed_search_udf_file.py +159 -0
  14. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +8 -1
  15. snowflake/ml/modeling/cluster/affinity_propagation.py +8 -1
  16. snowflake/ml/modeling/cluster/agglomerative_clustering.py +8 -1
  17. snowflake/ml/modeling/cluster/birch.py +8 -1
  18. snowflake/ml/modeling/cluster/bisecting_k_means.py +8 -1
  19. snowflake/ml/modeling/cluster/dbscan.py +8 -1
  20. snowflake/ml/modeling/cluster/feature_agglomeration.py +8 -1
  21. snowflake/ml/modeling/cluster/k_means.py +8 -1
  22. snowflake/ml/modeling/cluster/mean_shift.py +8 -1
  23. snowflake/ml/modeling/cluster/mini_batch_k_means.py +8 -1
  24. snowflake/ml/modeling/cluster/optics.py +8 -1
  25. snowflake/ml/modeling/cluster/spectral_biclustering.py +8 -1
  26. snowflake/ml/modeling/cluster/spectral_clustering.py +8 -1
  27. snowflake/ml/modeling/cluster/spectral_coclustering.py +8 -1
  28. snowflake/ml/modeling/compose/column_transformer.py +8 -1
  29. snowflake/ml/modeling/compose/transformed_target_regressor.py +8 -1
  30. snowflake/ml/modeling/covariance/elliptic_envelope.py +8 -1
  31. snowflake/ml/modeling/covariance/empirical_covariance.py +8 -1
  32. snowflake/ml/modeling/covariance/graphical_lasso.py +8 -1
  33. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +8 -1
  34. snowflake/ml/modeling/covariance/ledoit_wolf.py +8 -1
  35. snowflake/ml/modeling/covariance/min_cov_det.py +8 -1
  36. snowflake/ml/modeling/covariance/oas.py +8 -1
  37. snowflake/ml/modeling/covariance/shrunk_covariance.py +8 -1
  38. snowflake/ml/modeling/decomposition/dictionary_learning.py +8 -1
  39. snowflake/ml/modeling/decomposition/factor_analysis.py +8 -1
  40. snowflake/ml/modeling/decomposition/fast_ica.py +8 -1
  41. snowflake/ml/modeling/decomposition/incremental_pca.py +8 -1
  42. snowflake/ml/modeling/decomposition/kernel_pca.py +8 -1
  43. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +8 -1
  44. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +8 -1
  45. snowflake/ml/modeling/decomposition/pca.py +8 -1
  46. snowflake/ml/modeling/decomposition/sparse_pca.py +8 -1
  47. snowflake/ml/modeling/decomposition/truncated_svd.py +8 -1
  48. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +8 -1
  49. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +8 -1
  50. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +8 -1
  51. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +8 -1
  52. snowflake/ml/modeling/ensemble/bagging_classifier.py +8 -1
  53. snowflake/ml/modeling/ensemble/bagging_regressor.py +8 -1
  54. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +8 -1
  55. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +8 -1
  56. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +8 -1
  57. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +8 -1
  58. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +8 -1
  59. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +8 -1
  60. snowflake/ml/modeling/ensemble/isolation_forest.py +8 -1
  61. snowflake/ml/modeling/ensemble/random_forest_classifier.py +8 -1
  62. snowflake/ml/modeling/ensemble/random_forest_regressor.py +8 -1
  63. snowflake/ml/modeling/ensemble/stacking_regressor.py +8 -1
  64. snowflake/ml/modeling/ensemble/voting_classifier.py +8 -1
  65. snowflake/ml/modeling/ensemble/voting_regressor.py +8 -1
  66. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +8 -1
  67. snowflake/ml/modeling/feature_selection/select_fdr.py +8 -1
  68. snowflake/ml/modeling/feature_selection/select_fpr.py +8 -1
  69. snowflake/ml/modeling/feature_selection/select_fwe.py +8 -1
  70. snowflake/ml/modeling/feature_selection/select_k_best.py +8 -1
  71. snowflake/ml/modeling/feature_selection/select_percentile.py +8 -1
  72. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +8 -1
  73. snowflake/ml/modeling/feature_selection/variance_threshold.py +8 -1
  74. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +8 -1
  75. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +8 -1
  76. snowflake/ml/modeling/impute/iterative_imputer.py +8 -1
  77. snowflake/ml/modeling/impute/knn_imputer.py +8 -1
  78. snowflake/ml/modeling/impute/missing_indicator.py +8 -1
  79. snowflake/ml/modeling/impute/simple_imputer.py +21 -2
  80. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +8 -1
  81. snowflake/ml/modeling/kernel_approximation/nystroem.py +8 -1
  82. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +8 -1
  83. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +8 -1
  84. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +8 -1
  85. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +8 -1
  86. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +8 -1
  87. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +8 -1
  88. snowflake/ml/modeling/linear_model/ard_regression.py +8 -1
  89. snowflake/ml/modeling/linear_model/bayesian_ridge.py +8 -1
  90. snowflake/ml/modeling/linear_model/elastic_net.py +8 -1
  91. snowflake/ml/modeling/linear_model/elastic_net_cv.py +8 -1
  92. snowflake/ml/modeling/linear_model/gamma_regressor.py +8 -1
  93. snowflake/ml/modeling/linear_model/huber_regressor.py +8 -1
  94. snowflake/ml/modeling/linear_model/lars.py +8 -1
  95. snowflake/ml/modeling/linear_model/lars_cv.py +8 -1
  96. snowflake/ml/modeling/linear_model/lasso.py +8 -1
  97. snowflake/ml/modeling/linear_model/lasso_cv.py +8 -1
  98. snowflake/ml/modeling/linear_model/lasso_lars.py +8 -1
  99. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +8 -1
  100. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +8 -1
  101. snowflake/ml/modeling/linear_model/linear_regression.py +8 -1
  102. snowflake/ml/modeling/linear_model/logistic_regression.py +8 -1
  103. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +8 -1
  104. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +8 -1
  105. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +8 -1
  106. snowflake/ml/modeling/linear_model/multi_task_lasso.py +8 -1
  107. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +8 -1
  108. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +8 -1
  109. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +8 -1
  110. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +8 -1
  111. snowflake/ml/modeling/linear_model/perceptron.py +8 -1
  112. snowflake/ml/modeling/linear_model/poisson_regressor.py +8 -1
  113. snowflake/ml/modeling/linear_model/ransac_regressor.py +8 -1
  114. snowflake/ml/modeling/linear_model/ridge.py +8 -1
  115. snowflake/ml/modeling/linear_model/ridge_classifier.py +8 -1
  116. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +8 -1
  117. snowflake/ml/modeling/linear_model/ridge_cv.py +8 -1
  118. snowflake/ml/modeling/linear_model/sgd_classifier.py +8 -1
  119. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +8 -1
  120. snowflake/ml/modeling/linear_model/sgd_regressor.py +8 -1
  121. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +8 -1
  122. snowflake/ml/modeling/linear_model/tweedie_regressor.py +8 -1
  123. snowflake/ml/modeling/manifold/isomap.py +8 -1
  124. snowflake/ml/modeling/manifold/mds.py +8 -1
  125. snowflake/ml/modeling/manifold/spectral_embedding.py +8 -1
  126. snowflake/ml/modeling/manifold/tsne.py +8 -1
  127. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +8 -1
  128. snowflake/ml/modeling/mixture/gaussian_mixture.py +8 -1
  129. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +8 -1
  130. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +8 -1
  131. snowflake/ml/modeling/multiclass/output_code_classifier.py +8 -1
  132. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +8 -1
  133. snowflake/ml/modeling/naive_bayes/categorical_nb.py +8 -1
  134. snowflake/ml/modeling/naive_bayes/complement_nb.py +8 -1
  135. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +8 -1
  136. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +8 -1
  137. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +8 -1
  138. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +8 -1
  139. snowflake/ml/modeling/neighbors/kernel_density.py +8 -1
  140. snowflake/ml/modeling/neighbors/local_outlier_factor.py +8 -1
  141. snowflake/ml/modeling/neighbors/nearest_centroid.py +8 -1
  142. snowflake/ml/modeling/neighbors/nearest_neighbors.py +8 -1
  143. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +8 -1
  144. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +8 -1
  145. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +8 -1
  146. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +8 -1
  147. snowflake/ml/modeling/neural_network/mlp_classifier.py +8 -1
  148. snowflake/ml/modeling/neural_network/mlp_regressor.py +8 -1
  149. snowflake/ml/modeling/parameters/enable_anonymous_sproc.py +5 -0
  150. snowflake/ml/modeling/preprocessing/polynomial_features.py +8 -1
  151. snowflake/ml/modeling/semi_supervised/label_propagation.py +8 -1
  152. snowflake/ml/modeling/semi_supervised/label_spreading.py +8 -1
  153. snowflake/ml/modeling/svm/linear_svc.py +8 -1
  154. snowflake/ml/modeling/svm/linear_svr.py +8 -1
  155. snowflake/ml/modeling/svm/nu_svc.py +8 -1
  156. snowflake/ml/modeling/svm/nu_svr.py +8 -1
  157. snowflake/ml/modeling/svm/svc.py +8 -1
  158. snowflake/ml/modeling/svm/svr.py +8 -1
  159. snowflake/ml/modeling/tree/decision_tree_classifier.py +8 -1
  160. snowflake/ml/modeling/tree/decision_tree_regressor.py +8 -1
  161. snowflake/ml/modeling/tree/extra_tree_classifier.py +8 -1
  162. snowflake/ml/modeling/tree/extra_tree_regressor.py +8 -1
  163. snowflake/ml/modeling/xgboost/xgb_classifier.py +8 -1
  164. snowflake/ml/modeling/xgboost/xgb_regressor.py +8 -1
  165. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +8 -1
  166. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +8 -1
  167. snowflake/ml/registry/_manager/model_manager.py +59 -1
  168. snowflake/ml/registry/registry.py +10 -1
  169. snowflake/ml/version.py +1 -1
  170. {snowflake_ml_python-1.5.1.dist-info → snowflake_ml_python-1.5.2.dist-info}/METADATA +13 -1
  171. {snowflake_ml_python-1.5.1.dist-info → snowflake_ml_python-1.5.2.dist-info}/RECORD +174 -172
  172. {snowflake_ml_python-1.5.1.dist-info → snowflake_ml_python-1.5.2.dist-info}/LICENSE.txt +0 -0
  173. {snowflake_ml_python-1.5.1.dist-info → snowflake_ml_python-1.5.2.dist-info}/WHEEL +0 -0
  174. {snowflake_ml_python-1.5.1.dist-info → snowflake_ml_python-1.5.2.dist-info}/top_level.txt +0 -0
@@ -590,7 +590,14 @@ class CategoricalNB(BaseTransformer):
590
590
  ) -> List[str]:
591
591
  # in case the inferred output column names dimension is different
592
592
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
593
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
593
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
594
+
595
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
596
+ # seen during the fit.
597
+ snowpark_column_names = dataset.select(self.input_cols).columns
598
+ sample_pd_df.columns = snowpark_column_names
599
+
600
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
594
601
  output_df_columns = list(output_df_pd.columns)
595
602
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
596
603
  if self.sample_weight_col:
@@ -584,7 +584,14 @@ class ComplementNB(BaseTransformer):
584
584
  ) -> List[str]:
585
585
  # in case the inferred output column names dimension is different
586
586
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
587
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
587
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
588
+
589
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
590
+ # seen during the fit.
591
+ snowpark_column_names = dataset.select(self.input_cols).columns
592
+ sample_pd_df.columns = snowpark_column_names
593
+
594
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
588
595
  output_df_columns = list(output_df_pd.columns)
589
596
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
590
597
  if self.sample_weight_col:
@@ -565,7 +565,14 @@ class GaussianNB(BaseTransformer):
565
565
  ) -> List[str]:
566
566
  # in case the inferred output column names dimension is different
567
567
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
568
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
568
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
569
+
570
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
571
+ # seen during the fit.
572
+ snowpark_column_names = dataset.select(self.input_cols).columns
573
+ sample_pd_df.columns = snowpark_column_names
574
+
575
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
569
576
  output_df_columns = list(output_df_pd.columns)
570
577
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
571
578
  if self.sample_weight_col:
@@ -578,7 +578,14 @@ class MultinomialNB(BaseTransformer):
578
578
  ) -> List[str]:
579
579
  # in case the inferred output column names dimension is different
580
580
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
581
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
581
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
582
+
583
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
584
+ # seen during the fit.
585
+ snowpark_column_names = dataset.select(self.input_cols).columns
586
+ sample_pd_df.columns = snowpark_column_names
587
+
588
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
582
589
  output_df_columns = list(output_df_pd.columns)
583
590
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
584
591
  if self.sample_weight_col:
@@ -635,7 +635,14 @@ class KNeighborsClassifier(BaseTransformer):
635
635
  ) -> List[str]:
636
636
  # in case the inferred output column names dimension is different
637
637
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
638
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
638
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
639
+
640
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
641
+ # seen during the fit.
642
+ snowpark_column_names = dataset.select(self.input_cols).columns
643
+ sample_pd_df.columns = snowpark_column_names
644
+
645
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
639
646
  output_df_columns = list(output_df_pd.columns)
640
647
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
641
648
  if self.sample_weight_col:
@@ -637,7 +637,14 @@ class KNeighborsRegressor(BaseTransformer):
637
637
  ) -> List[str]:
638
638
  # in case the inferred output column names dimension is different
639
639
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
640
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
640
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
641
+
642
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
643
+ # seen during the fit.
644
+ snowpark_column_names = dataset.select(self.input_cols).columns
645
+ sample_pd_df.columns = snowpark_column_names
646
+
647
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
641
648
  output_df_columns = list(output_df_pd.columns)
642
649
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
643
650
  if self.sample_weight_col:
@@ -612,7 +612,14 @@ class KernelDensity(BaseTransformer):
612
612
  ) -> List[str]:
613
613
  # in case the inferred output column names dimension is different
614
614
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
615
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
615
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
616
+
617
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
618
+ # seen during the fit.
619
+ snowpark_column_names = dataset.select(self.input_cols).columns
620
+ sample_pd_df.columns = snowpark_column_names
621
+
622
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
616
623
  output_df_columns = list(output_df_pd.columns)
617
624
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
618
625
  if self.sample_weight_col:
@@ -644,7 +644,14 @@ class LocalOutlierFactor(BaseTransformer):
644
644
  ) -> List[str]:
645
645
  # in case the inferred output column names dimension is different
646
646
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
647
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
647
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
648
+
649
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
650
+ # seen during the fit.
651
+ snowpark_column_names = dataset.select(self.input_cols).columns
652
+ sample_pd_df.columns = snowpark_column_names
653
+
654
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
648
655
  output_df_columns = list(output_df_pd.columns)
649
656
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
650
657
  if self.sample_weight_col:
@@ -575,7 +575,14 @@ class NearestCentroid(BaseTransformer):
575
575
  ) -> List[str]:
576
576
  # in case the inferred output column names dimension is different
577
577
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
578
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
578
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
579
+
580
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
581
+ # seen during the fit.
582
+ snowpark_column_names = dataset.select(self.input_cols).columns
583
+ sample_pd_df.columns = snowpark_column_names
584
+
585
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
579
586
  output_df_columns = list(output_df_pd.columns)
580
587
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
581
588
  if self.sample_weight_col:
@@ -623,7 +623,14 @@ class NearestNeighbors(BaseTransformer):
623
623
  ) -> List[str]:
624
624
  # in case the inferred output column names dimension is different
625
625
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
626
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
626
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
627
+
628
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
629
+ # seen during the fit.
630
+ snowpark_column_names = dataset.select(self.input_cols).columns
631
+ sample_pd_df.columns = snowpark_column_names
632
+
633
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
627
634
  output_df_columns = list(output_df_pd.columns)
628
635
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
629
636
  if self.sample_weight_col:
@@ -648,7 +648,14 @@ class NeighborhoodComponentsAnalysis(BaseTransformer):
648
648
  ) -> List[str]:
649
649
  # in case the inferred output column names dimension is different
650
650
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
651
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
651
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
652
+
653
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
654
+ # seen during the fit.
655
+ snowpark_column_names = dataset.select(self.input_cols).columns
656
+ sample_pd_df.columns = snowpark_column_names
657
+
658
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
652
659
  output_df_columns = list(output_df_pd.columns)
653
660
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
654
661
  if self.sample_weight_col:
@@ -647,7 +647,14 @@ class RadiusNeighborsClassifier(BaseTransformer):
647
647
  ) -> List[str]:
648
648
  # in case the inferred output column names dimension is different
649
649
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
650
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
650
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
651
+
652
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
653
+ # seen during the fit.
654
+ snowpark_column_names = dataset.select(self.input_cols).columns
655
+ sample_pd_df.columns = snowpark_column_names
656
+
657
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
651
658
  output_df_columns = list(output_df_pd.columns)
652
659
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
653
660
  if self.sample_weight_col:
@@ -637,7 +637,14 @@ class RadiusNeighborsRegressor(BaseTransformer):
637
637
  ) -> List[str]:
638
638
  # in case the inferred output column names dimension is different
639
639
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
640
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
640
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
641
+
642
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
643
+ # seen during the fit.
644
+ snowpark_column_names = dataset.select(self.input_cols).columns
645
+ sample_pd_df.columns = snowpark_column_names
646
+
647
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
641
648
  output_df_columns = list(output_df_pd.columns)
642
649
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
643
650
  if self.sample_weight_col:
@@ -596,7 +596,14 @@ class BernoulliRBM(BaseTransformer):
596
596
  ) -> List[str]:
597
597
  # in case the inferred output column names dimension is different
598
598
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
599
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
599
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
600
+
601
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
602
+ # seen during the fit.
603
+ snowpark_column_names = dataset.select(self.input_cols).columns
604
+ sample_pd_df.columns = snowpark_column_names
605
+
606
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
600
607
  output_df_columns = list(output_df_pd.columns)
601
608
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
602
609
  if self.sample_weight_col:
@@ -749,7 +749,14 @@ class MLPClassifier(BaseTransformer):
749
749
  ) -> List[str]:
750
750
  # in case the inferred output column names dimension is different
751
751
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
752
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
752
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
753
+
754
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
755
+ # seen during the fit.
756
+ snowpark_column_names = dataset.select(self.input_cols).columns
757
+ sample_pd_df.columns = snowpark_column_names
758
+
759
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
753
760
  output_df_columns = list(output_df_pd.columns)
754
761
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
755
762
  if self.sample_weight_col:
@@ -745,7 +745,14 @@ class MLPRegressor(BaseTransformer):
745
745
  ) -> List[str]:
746
746
  # in case the inferred output column names dimension is different
747
747
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
748
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
748
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
749
+
750
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
751
+ # seen during the fit.
752
+ snowpark_column_names = dataset.select(self.input_cols).columns
753
+ sample_pd_df.columns = snowpark_column_names
754
+
755
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
749
756
  output_df_columns = list(output_df_pd.columns)
750
757
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
751
758
  if self.sample_weight_col:
@@ -0,0 +1,5 @@
1
+ """Enables the anonymous stored procedures for running modeling fit"""
2
+
3
+ from snowflake.ml.modeling._internal.snowpark_implementations import snowpark_trainer
4
+
5
+ snowpark_trainer._ENABLE_ANONYMOUS_SPROC = True
@@ -586,7 +586,14 @@ class PolynomialFeatures(BaseTransformer):
586
586
  ) -> List[str]:
587
587
  # in case the inferred output column names dimension is different
588
588
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
589
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
589
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
590
+
591
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
592
+ # seen during the fit.
593
+ snowpark_column_names = dataset.select(self.input_cols).columns
594
+ sample_pd_df.columns = snowpark_column_names
595
+
596
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
590
597
  output_df_columns = list(output_df_pd.columns)
591
598
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
592
599
  if self.sample_weight_col:
@@ -590,7 +590,14 @@ class LabelPropagation(BaseTransformer):
590
590
  ) -> List[str]:
591
591
  # in case the inferred output column names dimension is different
592
592
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
593
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
593
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
594
+
595
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
596
+ # seen during the fit.
597
+ snowpark_column_names = dataset.select(self.input_cols).columns
598
+ sample_pd_df.columns = snowpark_column_names
599
+
600
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
594
601
  output_df_columns = list(output_df_pd.columns)
595
602
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
596
603
  if self.sample_weight_col:
@@ -599,7 +599,14 @@ class LabelSpreading(BaseTransformer):
599
599
  ) -> List[str]:
600
600
  # in case the inferred output column names dimension is different
601
601
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
602
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
602
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
603
+
604
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
605
+ # seen during the fit.
606
+ snowpark_column_names = dataset.select(self.input_cols).columns
607
+ sample_pd_df.columns = snowpark_column_names
608
+
609
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
603
610
  output_df_columns = list(output_df_pd.columns)
604
611
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
605
612
  if self.sample_weight_col:
@@ -655,7 +655,14 @@ class LinearSVC(BaseTransformer):
655
655
  ) -> List[str]:
656
656
  # in case the inferred output column names dimension is different
657
657
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
658
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
658
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
659
+
660
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
661
+ # seen during the fit.
662
+ snowpark_column_names = dataset.select(self.input_cols).columns
663
+ sample_pd_df.columns = snowpark_column_names
664
+
665
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
659
666
  output_df_columns = list(output_df_pd.columns)
660
667
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
661
668
  if self.sample_weight_col:
@@ -627,7 +627,14 @@ class LinearSVR(BaseTransformer):
627
627
  ) -> List[str]:
628
628
  # in case the inferred output column names dimension is different
629
629
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
630
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
630
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
631
+
632
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
633
+ # seen during the fit.
634
+ snowpark_column_names = dataset.select(self.input_cols).columns
635
+ sample_pd_df.columns = snowpark_column_names
636
+
637
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
631
638
  output_df_columns = list(output_df_pd.columns)
632
639
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
633
640
  if self.sample_weight_col:
@@ -661,7 +661,14 @@ class NuSVC(BaseTransformer):
661
661
  ) -> List[str]:
662
662
  # in case the inferred output column names dimension is different
663
663
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
664
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
664
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
665
+
666
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
667
+ # seen during the fit.
668
+ snowpark_column_names = dataset.select(self.input_cols).columns
669
+ sample_pd_df.columns = snowpark_column_names
670
+
671
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
665
672
  output_df_columns = list(output_df_pd.columns)
666
673
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
667
674
  if self.sample_weight_col:
@@ -622,7 +622,14 @@ class NuSVR(BaseTransformer):
622
622
  ) -> List[str]:
623
623
  # in case the inferred output column names dimension is different
624
624
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
625
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
625
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
626
+
627
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
628
+ # seen during the fit.
629
+ snowpark_column_names = dataset.select(self.input_cols).columns
630
+ sample_pd_df.columns = snowpark_column_names
631
+
632
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
626
633
  output_df_columns = list(output_df_pd.columns)
627
634
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
628
635
  if self.sample_weight_col:
@@ -664,7 +664,14 @@ class SVC(BaseTransformer):
664
664
  ) -> List[str]:
665
665
  # in case the inferred output column names dimension is different
666
666
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
667
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
667
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
668
+
669
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
670
+ # seen during the fit.
671
+ snowpark_column_names = dataset.select(self.input_cols).columns
672
+ sample_pd_df.columns = snowpark_column_names
673
+
674
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
668
675
  output_df_columns = list(output_df_pd.columns)
669
676
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
670
677
  if self.sample_weight_col:
@@ -625,7 +625,14 @@ class SVR(BaseTransformer):
625
625
  ) -> List[str]:
626
626
  # in case the inferred output column names dimension is different
627
627
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
628
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
628
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
629
+
630
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
631
+ # seen during the fit.
632
+ snowpark_column_names = dataset.select(self.input_cols).columns
633
+ sample_pd_df.columns = snowpark_column_names
634
+
635
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
629
636
  output_df_columns = list(output_df_pd.columns)
630
637
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
631
638
  if self.sample_weight_col:
@@ -692,7 +692,14 @@ class DecisionTreeClassifier(BaseTransformer):
692
692
  ) -> List[str]:
693
693
  # in case the inferred output column names dimension is different
694
694
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
695
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
695
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
696
+
697
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
698
+ # seen during the fit.
699
+ snowpark_column_names = dataset.select(self.input_cols).columns
700
+ sample_pd_df.columns = snowpark_column_names
701
+
702
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
696
703
  output_df_columns = list(output_df_pd.columns)
697
704
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
698
705
  if self.sample_weight_col:
@@ -674,7 +674,14 @@ class DecisionTreeRegressor(BaseTransformer):
674
674
  ) -> List[str]:
675
675
  # in case the inferred output column names dimension is different
676
676
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
677
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
677
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
678
+
679
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
680
+ # seen during the fit.
681
+ snowpark_column_names = dataset.select(self.input_cols).columns
682
+ sample_pd_df.columns = snowpark_column_names
683
+
684
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
678
685
  output_df_columns = list(output_df_pd.columns)
679
686
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
680
687
  if self.sample_weight_col:
@@ -684,7 +684,14 @@ class ExtraTreeClassifier(BaseTransformer):
684
684
  ) -> List[str]:
685
685
  # in case the inferred output column names dimension is different
686
686
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
687
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
687
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
688
+
689
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
690
+ # seen during the fit.
691
+ snowpark_column_names = dataset.select(self.input_cols).columns
692
+ sample_pd_df.columns = snowpark_column_names
693
+
694
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
688
695
  output_df_columns = list(output_df_pd.columns)
689
696
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
690
697
  if self.sample_weight_col:
@@ -666,7 +666,14 @@ class ExtraTreeRegressor(BaseTransformer):
666
666
  ) -> List[str]:
667
667
  # in case the inferred output column names dimension is different
668
668
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
669
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
669
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
670
+
671
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
672
+ # seen during the fit.
673
+ snowpark_column_names = dataset.select(self.input_cols).columns
674
+ sample_pd_df.columns = snowpark_column_names
675
+
676
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
670
677
  output_df_columns = list(output_df_pd.columns)
671
678
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
672
679
  if self.sample_weight_col:
@@ -784,7 +784,14 @@ class XGBClassifier(BaseTransformer):
784
784
  ) -> List[str]:
785
785
  # in case the inferred output column names dimension is different
786
786
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
787
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
787
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
788
+
789
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
790
+ # seen during the fit.
791
+ snowpark_column_names = dataset.select(self.input_cols).columns
792
+ sample_pd_df.columns = snowpark_column_names
793
+
794
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
788
795
  output_df_columns = list(output_df_pd.columns)
789
796
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
790
797
  if self.sample_weight_col:
@@ -783,7 +783,14 @@ class XGBRegressor(BaseTransformer):
783
783
  ) -> List[str]:
784
784
  # in case the inferred output column names dimension is different
785
785
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
786
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
786
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
787
+
788
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
789
+ # seen during the fit.
790
+ snowpark_column_names = dataset.select(self.input_cols).columns
791
+ sample_pd_df.columns = snowpark_column_names
792
+
793
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
787
794
  output_df_columns = list(output_df_pd.columns)
788
795
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
789
796
  if self.sample_weight_col:
@@ -788,7 +788,14 @@ class XGBRFClassifier(BaseTransformer):
788
788
  ) -> List[str]:
789
789
  # in case the inferred output column names dimension is different
790
790
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
791
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
791
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
792
+
793
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
794
+ # seen during the fit.
795
+ snowpark_column_names = dataset.select(self.input_cols).columns
796
+ sample_pd_df.columns = snowpark_column_names
797
+
798
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
792
799
  output_df_columns = list(output_df_pd.columns)
793
800
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
794
801
  if self.sample_weight_col:
@@ -788,7 +788,14 @@ class XGBRFRegressor(BaseTransformer):
788
788
  ) -> List[str]:
789
789
  # in case the inferred output column names dimension is different
790
790
  # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
791
- output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
791
+ sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
792
+
793
+ # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
794
+ # seen during the fit.
795
+ snowpark_column_names = dataset.select(self.input_cols).columns
796
+ sample_pd_df.columns = snowpark_column_names
797
+
798
+ output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
792
799
  output_df_columns = list(output_df_pd.columns)
793
800
  output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
794
801
  if self.sample_weight_col: