snowflake-ml-python 1.5.1__py3-none-any.whl → 1.5.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/cortex/_sentiment.py +7 -4
- snowflake/ml/_internal/utils/temp_file_utils.py +5 -2
- snowflake/ml/feature_store/access_manager.py +34 -30
- snowflake/ml/feature_store/feature_store.py +1 -1
- snowflake/ml/feature_store/feature_view.py +12 -11
- snowflake/ml/fileset/snowfs.py +2 -31
- snowflake/ml/model/_client/ops/model_ops.py +43 -0
- snowflake/ml/model/_client/sql/model_version.py +53 -1
- snowflake/ml/model/_model_composer/model_composer.py +6 -2
- snowflake/ml/model/_packager/model_meta/model_meta.py +1 -3
- snowflake/ml/model/_packager/model_runtime/model_runtime.py +3 -27
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +58 -139
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_search_udf_file.py +159 -0
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +8 -1
- snowflake/ml/modeling/cluster/affinity_propagation.py +8 -1
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +8 -1
- snowflake/ml/modeling/cluster/birch.py +8 -1
- snowflake/ml/modeling/cluster/bisecting_k_means.py +8 -1
- snowflake/ml/modeling/cluster/dbscan.py +8 -1
- snowflake/ml/modeling/cluster/feature_agglomeration.py +8 -1
- snowflake/ml/modeling/cluster/k_means.py +8 -1
- snowflake/ml/modeling/cluster/mean_shift.py +8 -1
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +8 -1
- snowflake/ml/modeling/cluster/optics.py +8 -1
- snowflake/ml/modeling/cluster/spectral_biclustering.py +8 -1
- snowflake/ml/modeling/cluster/spectral_clustering.py +8 -1
- snowflake/ml/modeling/cluster/spectral_coclustering.py +8 -1
- snowflake/ml/modeling/compose/column_transformer.py +8 -1
- snowflake/ml/modeling/compose/transformed_target_regressor.py +8 -1
- snowflake/ml/modeling/covariance/elliptic_envelope.py +8 -1
- snowflake/ml/modeling/covariance/empirical_covariance.py +8 -1
- snowflake/ml/modeling/covariance/graphical_lasso.py +8 -1
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +8 -1
- snowflake/ml/modeling/covariance/ledoit_wolf.py +8 -1
- snowflake/ml/modeling/covariance/min_cov_det.py +8 -1
- snowflake/ml/modeling/covariance/oas.py +8 -1
- snowflake/ml/modeling/covariance/shrunk_covariance.py +8 -1
- snowflake/ml/modeling/decomposition/dictionary_learning.py +8 -1
- snowflake/ml/modeling/decomposition/factor_analysis.py +8 -1
- snowflake/ml/modeling/decomposition/fast_ica.py +8 -1
- snowflake/ml/modeling/decomposition/incremental_pca.py +8 -1
- snowflake/ml/modeling/decomposition/kernel_pca.py +8 -1
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +8 -1
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +8 -1
- snowflake/ml/modeling/decomposition/pca.py +8 -1
- snowflake/ml/modeling/decomposition/sparse_pca.py +8 -1
- snowflake/ml/modeling/decomposition/truncated_svd.py +8 -1
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +8 -1
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +8 -1
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +8 -1
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +8 -1
- snowflake/ml/modeling/ensemble/bagging_classifier.py +8 -1
- snowflake/ml/modeling/ensemble/bagging_regressor.py +8 -1
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +8 -1
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +8 -1
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +8 -1
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +8 -1
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +8 -1
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +8 -1
- snowflake/ml/modeling/ensemble/isolation_forest.py +8 -1
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +8 -1
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +8 -1
- snowflake/ml/modeling/ensemble/stacking_regressor.py +8 -1
- snowflake/ml/modeling/ensemble/voting_classifier.py +8 -1
- snowflake/ml/modeling/ensemble/voting_regressor.py +8 -1
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +8 -1
- snowflake/ml/modeling/feature_selection/select_fdr.py +8 -1
- snowflake/ml/modeling/feature_selection/select_fpr.py +8 -1
- snowflake/ml/modeling/feature_selection/select_fwe.py +8 -1
- snowflake/ml/modeling/feature_selection/select_k_best.py +8 -1
- snowflake/ml/modeling/feature_selection/select_percentile.py +8 -1
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +8 -1
- snowflake/ml/modeling/feature_selection/variance_threshold.py +8 -1
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +8 -1
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +8 -1
- snowflake/ml/modeling/impute/iterative_imputer.py +8 -1
- snowflake/ml/modeling/impute/knn_imputer.py +8 -1
- snowflake/ml/modeling/impute/missing_indicator.py +8 -1
- snowflake/ml/modeling/impute/simple_imputer.py +21 -2
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +8 -1
- snowflake/ml/modeling/kernel_approximation/nystroem.py +8 -1
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +8 -1
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +8 -1
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +8 -1
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +8 -1
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +8 -1
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +8 -1
- snowflake/ml/modeling/linear_model/ard_regression.py +8 -1
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +8 -1
- snowflake/ml/modeling/linear_model/elastic_net.py +8 -1
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +8 -1
- snowflake/ml/modeling/linear_model/gamma_regressor.py +8 -1
- snowflake/ml/modeling/linear_model/huber_regressor.py +8 -1
- snowflake/ml/modeling/linear_model/lars.py +8 -1
- snowflake/ml/modeling/linear_model/lars_cv.py +8 -1
- snowflake/ml/modeling/linear_model/lasso.py +8 -1
- snowflake/ml/modeling/linear_model/lasso_cv.py +8 -1
- snowflake/ml/modeling/linear_model/lasso_lars.py +8 -1
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +8 -1
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +8 -1
- snowflake/ml/modeling/linear_model/linear_regression.py +8 -1
- snowflake/ml/modeling/linear_model/logistic_regression.py +8 -1
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +8 -1
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +8 -1
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +8 -1
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +8 -1
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +8 -1
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +8 -1
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +8 -1
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +8 -1
- snowflake/ml/modeling/linear_model/perceptron.py +8 -1
- snowflake/ml/modeling/linear_model/poisson_regressor.py +8 -1
- snowflake/ml/modeling/linear_model/ransac_regressor.py +8 -1
- snowflake/ml/modeling/linear_model/ridge.py +8 -1
- snowflake/ml/modeling/linear_model/ridge_classifier.py +8 -1
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +8 -1
- snowflake/ml/modeling/linear_model/ridge_cv.py +8 -1
- snowflake/ml/modeling/linear_model/sgd_classifier.py +8 -1
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +8 -1
- snowflake/ml/modeling/linear_model/sgd_regressor.py +8 -1
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +8 -1
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +8 -1
- snowflake/ml/modeling/manifold/isomap.py +8 -1
- snowflake/ml/modeling/manifold/mds.py +8 -1
- snowflake/ml/modeling/manifold/spectral_embedding.py +8 -1
- snowflake/ml/modeling/manifold/tsne.py +8 -1
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +8 -1
- snowflake/ml/modeling/mixture/gaussian_mixture.py +8 -1
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +8 -1
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +8 -1
- snowflake/ml/modeling/multiclass/output_code_classifier.py +8 -1
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +8 -1
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +8 -1
- snowflake/ml/modeling/naive_bayes/complement_nb.py +8 -1
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +8 -1
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +8 -1
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +8 -1
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +8 -1
- snowflake/ml/modeling/neighbors/kernel_density.py +8 -1
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +8 -1
- snowflake/ml/modeling/neighbors/nearest_centroid.py +8 -1
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +8 -1
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +8 -1
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +8 -1
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +8 -1
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +8 -1
- snowflake/ml/modeling/neural_network/mlp_classifier.py +8 -1
- snowflake/ml/modeling/neural_network/mlp_regressor.py +8 -1
- snowflake/ml/modeling/parameters/enable_anonymous_sproc.py +5 -0
- snowflake/ml/modeling/preprocessing/polynomial_features.py +8 -1
- snowflake/ml/modeling/semi_supervised/label_propagation.py +8 -1
- snowflake/ml/modeling/semi_supervised/label_spreading.py +8 -1
- snowflake/ml/modeling/svm/linear_svc.py +8 -1
- snowflake/ml/modeling/svm/linear_svr.py +8 -1
- snowflake/ml/modeling/svm/nu_svc.py +8 -1
- snowflake/ml/modeling/svm/nu_svr.py +8 -1
- snowflake/ml/modeling/svm/svc.py +8 -1
- snowflake/ml/modeling/svm/svr.py +8 -1
- snowflake/ml/modeling/tree/decision_tree_classifier.py +8 -1
- snowflake/ml/modeling/tree/decision_tree_regressor.py +8 -1
- snowflake/ml/modeling/tree/extra_tree_classifier.py +8 -1
- snowflake/ml/modeling/tree/extra_tree_regressor.py +8 -1
- snowflake/ml/modeling/xgboost/xgb_classifier.py +8 -1
- snowflake/ml/modeling/xgboost/xgb_regressor.py +8 -1
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +8 -1
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +8 -1
- snowflake/ml/registry/_manager/model_manager.py +59 -1
- snowflake/ml/registry/registry.py +10 -1
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.5.1.dist-info → snowflake_ml_python-1.5.2.dist-info}/METADATA +13 -1
- {snowflake_ml_python-1.5.1.dist-info → snowflake_ml_python-1.5.2.dist-info}/RECORD +174 -172
- {snowflake_ml_python-1.5.1.dist-info → snowflake_ml_python-1.5.2.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.5.1.dist-info → snowflake_ml_python-1.5.2.dist-info}/WHEEL +0 -0
- {snowflake_ml_python-1.5.1.dist-info → snowflake_ml_python-1.5.2.dist-info}/top_level.txt +0 -0
@@ -652,7 +652,14 @@ class LassoCV(BaseTransformer):
|
|
652
652
|
) -> List[str]:
|
653
653
|
# in case the inferred output column names dimension is different
|
654
654
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
655
|
-
|
655
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
656
|
+
|
657
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
658
|
+
# seen during the fit.
|
659
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
660
|
+
sample_pd_df.columns = snowpark_column_names
|
661
|
+
|
662
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
656
663
|
output_df_columns = list(output_df_pd.columns)
|
657
664
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
658
665
|
if self.sample_weight_col:
|
@@ -644,7 +644,14 @@ class LassoLars(BaseTransformer):
|
|
644
644
|
) -> List[str]:
|
645
645
|
# in case the inferred output column names dimension is different
|
646
646
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
647
|
-
|
647
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
648
|
+
|
649
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
650
|
+
# seen during the fit.
|
651
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
652
|
+
sample_pd_df.columns = snowpark_column_names
|
653
|
+
|
654
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
648
655
|
output_df_columns = list(output_df_pd.columns)
|
649
656
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
650
657
|
if self.sample_weight_col:
|
@@ -645,7 +645,14 @@ class LassoLarsCV(BaseTransformer):
|
|
645
645
|
) -> List[str]:
|
646
646
|
# in case the inferred output column names dimension is different
|
647
647
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
648
|
-
|
648
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
649
|
+
|
650
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
651
|
+
# seen during the fit.
|
652
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
653
|
+
sample_pd_df.columns = snowpark_column_names
|
654
|
+
|
655
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
649
656
|
output_df_columns = list(output_df_pd.columns)
|
650
657
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
651
658
|
if self.sample_weight_col:
|
@@ -628,7 +628,14 @@ class LassoLarsIC(BaseTransformer):
|
|
628
628
|
) -> List[str]:
|
629
629
|
# in case the inferred output column names dimension is different
|
630
630
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
631
|
-
|
631
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
632
|
+
|
633
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
634
|
+
# seen during the fit.
|
635
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
636
|
+
sample_pd_df.columns = snowpark_column_names
|
637
|
+
|
638
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
632
639
|
output_df_columns = list(output_df_pd.columns)
|
633
640
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
634
641
|
if self.sample_weight_col:
|
@@ -581,7 +581,14 @@ class LinearRegression(BaseTransformer):
|
|
581
581
|
) -> List[str]:
|
582
582
|
# in case the inferred output column names dimension is different
|
583
583
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
584
|
-
|
584
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
585
|
+
|
586
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
587
|
+
# seen during the fit.
|
588
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
589
|
+
sample_pd_df.columns = snowpark_column_names
|
590
|
+
|
591
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
585
592
|
output_df_columns = list(output_df_pd.columns)
|
586
593
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
587
594
|
if self.sample_weight_col:
|
@@ -695,7 +695,14 @@ class LogisticRegression(BaseTransformer):
|
|
695
695
|
) -> List[str]:
|
696
696
|
# in case the inferred output column names dimension is different
|
697
697
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
698
|
-
|
698
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
699
|
+
|
700
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
701
|
+
# seen during the fit.
|
702
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
703
|
+
sample_pd_df.columns = snowpark_column_names
|
704
|
+
|
705
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
699
706
|
output_df_columns = list(output_df_pd.columns)
|
700
707
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
701
708
|
if self.sample_weight_col:
|
@@ -716,7 +716,14 @@ class LogisticRegressionCV(BaseTransformer):
|
|
716
716
|
) -> List[str]:
|
717
717
|
# in case the inferred output column names dimension is different
|
718
718
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
719
|
-
|
719
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
720
|
+
|
721
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
722
|
+
# seen during the fit.
|
723
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
724
|
+
sample_pd_df.columns = snowpark_column_names
|
725
|
+
|
726
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
720
727
|
output_df_columns = list(output_df_pd.columns)
|
721
728
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
722
729
|
if self.sample_weight_col:
|
@@ -614,7 +614,14 @@ class MultiTaskElasticNet(BaseTransformer):
|
|
614
614
|
) -> List[str]:
|
615
615
|
# in case the inferred output column names dimension is different
|
616
616
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
617
|
-
|
617
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
618
|
+
|
619
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
620
|
+
# seen during the fit.
|
621
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
622
|
+
sample_pd_df.columns = snowpark_column_names
|
623
|
+
|
624
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
618
625
|
output_df_columns = list(output_df_pd.columns)
|
619
626
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
620
627
|
if self.sample_weight_col:
|
@@ -655,7 +655,14 @@ class MultiTaskElasticNetCV(BaseTransformer):
|
|
655
655
|
) -> List[str]:
|
656
656
|
# in case the inferred output column names dimension is different
|
657
657
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
658
|
-
|
658
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
659
|
+
|
660
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
661
|
+
# seen during the fit.
|
662
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
663
|
+
sample_pd_df.columns = snowpark_column_names
|
664
|
+
|
665
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
659
666
|
output_df_columns = list(output_df_pd.columns)
|
660
667
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
661
668
|
if self.sample_weight_col:
|
@@ -606,7 +606,14 @@ class MultiTaskLasso(BaseTransformer):
|
|
606
606
|
) -> List[str]:
|
607
607
|
# in case the inferred output column names dimension is different
|
608
608
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
609
|
-
|
609
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
610
|
+
|
611
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
612
|
+
# seen during the fit.
|
613
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
614
|
+
sample_pd_df.columns = snowpark_column_names
|
615
|
+
|
616
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
610
617
|
output_df_columns = list(output_df_pd.columns)
|
611
618
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
612
619
|
if self.sample_weight_col:
|
@@ -641,7 +641,14 @@ class MultiTaskLassoCV(BaseTransformer):
|
|
641
641
|
) -> List[str]:
|
642
642
|
# in case the inferred output column names dimension is different
|
643
643
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
644
|
-
|
644
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
645
|
+
|
646
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
647
|
+
# seen during the fit.
|
648
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
649
|
+
sample_pd_df.columns = snowpark_column_names
|
650
|
+
|
651
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
645
652
|
output_df_columns = list(output_df_pd.columns)
|
646
653
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
647
654
|
if self.sample_weight_col:
|
@@ -589,7 +589,14 @@ class OrthogonalMatchingPursuit(BaseTransformer):
|
|
589
589
|
) -> List[str]:
|
590
590
|
# in case the inferred output column names dimension is different
|
591
591
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
592
|
-
|
592
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
593
|
+
|
594
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
595
|
+
# seen during the fit.
|
596
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
597
|
+
sample_pd_df.columns = snowpark_column_names
|
598
|
+
|
599
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
593
600
|
output_df_columns = list(output_df_pd.columns)
|
594
601
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
595
602
|
if self.sample_weight_col:
|
@@ -663,7 +663,14 @@ class PassiveAggressiveClassifier(BaseTransformer):
|
|
663
663
|
) -> List[str]:
|
664
664
|
# in case the inferred output column names dimension is different
|
665
665
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
666
|
-
|
666
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
667
|
+
|
668
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
669
|
+
# seen during the fit.
|
670
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
671
|
+
sample_pd_df.columns = snowpark_column_names
|
672
|
+
|
673
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
667
674
|
output_df_columns = list(output_df_pd.columns)
|
668
675
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
669
676
|
if self.sample_weight_col:
|
@@ -649,7 +649,14 @@ class PassiveAggressiveRegressor(BaseTransformer):
|
|
649
649
|
) -> List[str]:
|
650
650
|
# in case the inferred output column names dimension is different
|
651
651
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
652
|
-
|
652
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
653
|
+
|
654
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
655
|
+
# seen during the fit.
|
656
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
657
|
+
sample_pd_df.columns = snowpark_column_names
|
658
|
+
|
659
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
653
660
|
output_df_columns = list(output_df_pd.columns)
|
654
661
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
655
662
|
if self.sample_weight_col:
|
@@ -662,7 +662,14 @@ class Perceptron(BaseTransformer):
|
|
662
662
|
) -> List[str]:
|
663
663
|
# in case the inferred output column names dimension is different
|
664
664
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
665
|
-
|
665
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
666
|
+
|
667
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
668
|
+
# seen during the fit.
|
669
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
670
|
+
sample_pd_df.columns = snowpark_column_names
|
671
|
+
|
672
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
666
673
|
output_df_columns = list(output_df_pd.columns)
|
667
674
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
668
675
|
if self.sample_weight_col:
|
@@ -611,7 +611,14 @@ class PoissonRegressor(BaseTransformer):
|
|
611
611
|
) -> List[str]:
|
612
612
|
# in case the inferred output column names dimension is different
|
613
613
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
614
|
-
|
614
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
615
|
+
|
616
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
617
|
+
# seen during the fit.
|
618
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
619
|
+
sample_pd_df.columns = snowpark_column_names
|
620
|
+
|
621
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
615
622
|
output_df_columns = list(output_df_pd.columns)
|
616
623
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
617
624
|
if self.sample_weight_col:
|
@@ -667,7 +667,14 @@ class RANSACRegressor(BaseTransformer):
|
|
667
667
|
) -> List[str]:
|
668
668
|
# in case the inferred output column names dimension is different
|
669
669
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
670
|
-
|
670
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
671
|
+
|
672
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
673
|
+
# seen during the fit.
|
674
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
675
|
+
sample_pd_df.columns = snowpark_column_names
|
676
|
+
|
677
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
671
678
|
output_df_columns = list(output_df_pd.columns)
|
672
679
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
673
680
|
if self.sample_weight_col:
|
@@ -659,7 +659,14 @@ class Ridge(BaseTransformer):
|
|
659
659
|
) -> List[str]:
|
660
660
|
# in case the inferred output column names dimension is different
|
661
661
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
662
|
-
|
662
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
663
|
+
|
664
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
665
|
+
# seen during the fit.
|
666
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
667
|
+
sample_pd_df.columns = snowpark_column_names
|
668
|
+
|
669
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
663
670
|
output_df_columns = list(output_df_pd.columns)
|
664
671
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
665
672
|
if self.sample_weight_col:
|
@@ -659,7 +659,14 @@ class RidgeClassifier(BaseTransformer):
|
|
659
659
|
) -> List[str]:
|
660
660
|
# in case the inferred output column names dimension is different
|
661
661
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
662
|
-
|
662
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
663
|
+
|
664
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
665
|
+
# seen during the fit.
|
666
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
667
|
+
sample_pd_df.columns = snowpark_column_names
|
668
|
+
|
669
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
663
670
|
output_df_columns = list(output_df_pd.columns)
|
664
671
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
665
672
|
if self.sample_weight_col:
|
@@ -610,7 +610,14 @@ class RidgeClassifierCV(BaseTransformer):
|
|
610
610
|
) -> List[str]:
|
611
611
|
# in case the inferred output column names dimension is different
|
612
612
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
613
|
-
|
613
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
614
|
+
|
615
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
616
|
+
# seen during the fit.
|
617
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
618
|
+
sample_pd_df.columns = snowpark_column_names
|
619
|
+
|
620
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
614
621
|
output_df_columns = list(output_df_pd.columns)
|
615
622
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
616
623
|
if self.sample_weight_col:
|
@@ -631,7 +631,14 @@ class RidgeCV(BaseTransformer):
|
|
631
631
|
) -> List[str]:
|
632
632
|
# in case the inferred output column names dimension is different
|
633
633
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
634
|
-
|
634
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
635
|
+
|
636
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
637
|
+
# seen during the fit.
|
638
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
639
|
+
sample_pd_df.columns = snowpark_column_names
|
640
|
+
|
641
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
635
642
|
output_df_columns = list(output_df_pd.columns)
|
636
643
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
637
644
|
if self.sample_weight_col:
|
@@ -750,7 +750,14 @@ class SGDClassifier(BaseTransformer):
|
|
750
750
|
) -> List[str]:
|
751
751
|
# in case the inferred output column names dimension is different
|
752
752
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
753
|
-
|
753
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
754
|
+
|
755
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
756
|
+
# seen during the fit.
|
757
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
758
|
+
sample_pd_df.columns = snowpark_column_names
|
759
|
+
|
760
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
754
761
|
output_df_columns = list(output_df_pd.columns)
|
755
762
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
756
763
|
if self.sample_weight_col:
|
@@ -650,7 +650,14 @@ class SGDOneClassSVM(BaseTransformer):
|
|
650
650
|
) -> List[str]:
|
651
651
|
# in case the inferred output column names dimension is different
|
652
652
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
653
|
-
|
653
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
654
|
+
|
655
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
656
|
+
# seen during the fit.
|
657
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
658
|
+
sample_pd_df.columns = snowpark_column_names
|
659
|
+
|
660
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
654
661
|
output_df_columns = list(output_df_pd.columns)
|
655
662
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
656
663
|
if self.sample_weight_col:
|
@@ -716,7 +716,14 @@ class SGDRegressor(BaseTransformer):
|
|
716
716
|
) -> List[str]:
|
717
717
|
# in case the inferred output column names dimension is different
|
718
718
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
719
|
-
|
719
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
720
|
+
|
721
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
722
|
+
# seen during the fit.
|
723
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
724
|
+
sample_pd_df.columns = snowpark_column_names
|
725
|
+
|
726
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
720
727
|
output_df_columns = list(output_df_pd.columns)
|
721
728
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
722
729
|
if self.sample_weight_col:
|
@@ -618,7 +618,14 @@ class TheilSenRegressor(BaseTransformer):
|
|
618
618
|
) -> List[str]:
|
619
619
|
# in case the inferred output column names dimension is different
|
620
620
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
621
|
-
|
621
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
622
|
+
|
623
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
624
|
+
# seen during the fit.
|
625
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
626
|
+
sample_pd_df.columns = snowpark_column_names
|
627
|
+
|
628
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
622
629
|
output_df_columns = list(output_df_pd.columns)
|
623
630
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
624
631
|
if self.sample_weight_col:
|
@@ -644,7 +644,14 @@ class TweedieRegressor(BaseTransformer):
|
|
644
644
|
) -> List[str]:
|
645
645
|
# in case the inferred output column names dimension is different
|
646
646
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
647
|
-
|
647
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
648
|
+
|
649
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
650
|
+
# seen during the fit.
|
651
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
652
|
+
sample_pd_df.columns = snowpark_column_names
|
653
|
+
|
654
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
648
655
|
output_df_columns = list(output_df_pd.columns)
|
649
656
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
650
657
|
if self.sample_weight_col:
|
@@ -642,7 +642,14 @@ class Isomap(BaseTransformer):
|
|
642
642
|
) -> List[str]:
|
643
643
|
# in case the inferred output column names dimension is different
|
644
644
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
645
|
-
|
645
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
646
|
+
|
647
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
648
|
+
# seen during the fit.
|
649
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
650
|
+
sample_pd_df.columns = snowpark_column_names
|
651
|
+
|
652
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
646
653
|
output_df_columns = list(output_df_pd.columns)
|
647
654
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
648
655
|
if self.sample_weight_col:
|
@@ -623,7 +623,14 @@ class MDS(BaseTransformer):
|
|
623
623
|
) -> List[str]:
|
624
624
|
# in case the inferred output column names dimension is different
|
625
625
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
626
|
-
|
626
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
627
|
+
|
628
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
629
|
+
# seen during the fit.
|
630
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
631
|
+
sample_pd_df.columns = snowpark_column_names
|
632
|
+
|
633
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
627
634
|
output_df_columns = list(output_df_pd.columns)
|
628
635
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
629
636
|
if self.sample_weight_col:
|
@@ -625,7 +625,14 @@ class SpectralEmbedding(BaseTransformer):
|
|
625
625
|
) -> List[str]:
|
626
626
|
# in case the inferred output column names dimension is different
|
627
627
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
628
|
-
|
628
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
629
|
+
|
630
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
631
|
+
# seen during the fit.
|
632
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
633
|
+
sample_pd_df.columns = snowpark_column_names
|
634
|
+
|
635
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
629
636
|
output_df_columns = list(output_df_pd.columns)
|
630
637
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
631
638
|
if self.sample_weight_col:
|
@@ -684,7 +684,14 @@ class TSNE(BaseTransformer):
|
|
684
684
|
) -> List[str]:
|
685
685
|
# in case the inferred output column names dimension is different
|
686
686
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
687
|
-
|
687
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
688
|
+
|
689
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
690
|
+
# seen during the fit.
|
691
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
692
|
+
sample_pd_df.columns = snowpark_column_names
|
693
|
+
|
694
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
688
695
|
output_df_columns = list(output_df_pd.columns)
|
689
696
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
690
697
|
if self.sample_weight_col:
|
@@ -689,7 +689,14 @@ class BayesianGaussianMixture(BaseTransformer):
|
|
689
689
|
) -> List[str]:
|
690
690
|
# in case the inferred output column names dimension is different
|
691
691
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
692
|
-
|
692
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
693
|
+
|
694
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
695
|
+
# seen during the fit.
|
696
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
697
|
+
sample_pd_df.columns = snowpark_column_names
|
698
|
+
|
699
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
693
700
|
output_df_columns = list(output_df_pd.columns)
|
694
701
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
695
702
|
if self.sample_weight_col:
|
@@ -662,7 +662,14 @@ class GaussianMixture(BaseTransformer):
|
|
662
662
|
) -> List[str]:
|
663
663
|
# in case the inferred output column names dimension is different
|
664
664
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
665
|
-
|
665
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
666
|
+
|
667
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
668
|
+
# seen during the fit.
|
669
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
670
|
+
sample_pd_df.columns = snowpark_column_names
|
671
|
+
|
672
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
666
673
|
output_df_columns = list(output_df_pd.columns)
|
667
674
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
668
675
|
if self.sample_weight_col:
|
@@ -572,7 +572,14 @@ class OneVsOneClassifier(BaseTransformer):
|
|
572
572
|
) -> List[str]:
|
573
573
|
# in case the inferred output column names dimension is different
|
574
574
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
575
|
-
|
575
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
576
|
+
|
577
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
578
|
+
# seen during the fit.
|
579
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
580
|
+
sample_pd_df.columns = snowpark_column_names
|
581
|
+
|
582
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
576
583
|
output_df_columns = list(output_df_pd.columns)
|
577
584
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
578
585
|
if self.sample_weight_col:
|
@@ -581,7 +581,14 @@ class OneVsRestClassifier(BaseTransformer):
|
|
581
581
|
) -> List[str]:
|
582
582
|
# in case the inferred output column names dimension is different
|
583
583
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
584
|
-
|
584
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
585
|
+
|
586
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
587
|
+
# seen during the fit.
|
588
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
589
|
+
sample_pd_df.columns = snowpark_column_names
|
590
|
+
|
591
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
585
592
|
output_df_columns = list(output_df_pd.columns)
|
586
593
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
587
594
|
if self.sample_weight_col:
|
@@ -584,7 +584,14 @@ class OutputCodeClassifier(BaseTransformer):
|
|
584
584
|
) -> List[str]:
|
585
585
|
# in case the inferred output column names dimension is different
|
586
586
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
587
|
-
|
587
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
588
|
+
|
589
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
590
|
+
# seen during the fit.
|
591
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
592
|
+
sample_pd_df.columns = snowpark_column_names
|
593
|
+
|
594
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
588
595
|
output_df_columns = list(output_df_pd.columns)
|
589
596
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
590
597
|
if self.sample_weight_col:
|
@@ -584,7 +584,14 @@ class BernoulliNB(BaseTransformer):
|
|
584
584
|
) -> List[str]:
|
585
585
|
# in case the inferred output column names dimension is different
|
586
586
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
587
|
-
|
587
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
588
|
+
|
589
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
590
|
+
# seen during the fit.
|
591
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
592
|
+
sample_pd_df.columns = snowpark_column_names
|
593
|
+
|
594
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
588
595
|
output_df_columns = list(output_df_pd.columns)
|
589
596
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
590
597
|
if self.sample_weight_col:
|