snowflake-ml-python 1.5.1__py3-none-any.whl → 1.5.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/cortex/_sentiment.py +7 -4
- snowflake/ml/_internal/utils/temp_file_utils.py +5 -2
- snowflake/ml/feature_store/access_manager.py +34 -30
- snowflake/ml/feature_store/feature_store.py +1 -1
- snowflake/ml/feature_store/feature_view.py +12 -11
- snowflake/ml/fileset/snowfs.py +2 -31
- snowflake/ml/model/_client/ops/model_ops.py +43 -0
- snowflake/ml/model/_client/sql/model_version.py +53 -1
- snowflake/ml/model/_model_composer/model_composer.py +6 -2
- snowflake/ml/model/_packager/model_meta/model_meta.py +1 -3
- snowflake/ml/model/_packager/model_runtime/model_runtime.py +3 -27
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +58 -139
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_search_udf_file.py +159 -0
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +8 -1
- snowflake/ml/modeling/cluster/affinity_propagation.py +8 -1
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +8 -1
- snowflake/ml/modeling/cluster/birch.py +8 -1
- snowflake/ml/modeling/cluster/bisecting_k_means.py +8 -1
- snowflake/ml/modeling/cluster/dbscan.py +8 -1
- snowflake/ml/modeling/cluster/feature_agglomeration.py +8 -1
- snowflake/ml/modeling/cluster/k_means.py +8 -1
- snowflake/ml/modeling/cluster/mean_shift.py +8 -1
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +8 -1
- snowflake/ml/modeling/cluster/optics.py +8 -1
- snowflake/ml/modeling/cluster/spectral_biclustering.py +8 -1
- snowflake/ml/modeling/cluster/spectral_clustering.py +8 -1
- snowflake/ml/modeling/cluster/spectral_coclustering.py +8 -1
- snowflake/ml/modeling/compose/column_transformer.py +8 -1
- snowflake/ml/modeling/compose/transformed_target_regressor.py +8 -1
- snowflake/ml/modeling/covariance/elliptic_envelope.py +8 -1
- snowflake/ml/modeling/covariance/empirical_covariance.py +8 -1
- snowflake/ml/modeling/covariance/graphical_lasso.py +8 -1
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +8 -1
- snowflake/ml/modeling/covariance/ledoit_wolf.py +8 -1
- snowflake/ml/modeling/covariance/min_cov_det.py +8 -1
- snowflake/ml/modeling/covariance/oas.py +8 -1
- snowflake/ml/modeling/covariance/shrunk_covariance.py +8 -1
- snowflake/ml/modeling/decomposition/dictionary_learning.py +8 -1
- snowflake/ml/modeling/decomposition/factor_analysis.py +8 -1
- snowflake/ml/modeling/decomposition/fast_ica.py +8 -1
- snowflake/ml/modeling/decomposition/incremental_pca.py +8 -1
- snowflake/ml/modeling/decomposition/kernel_pca.py +8 -1
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +8 -1
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +8 -1
- snowflake/ml/modeling/decomposition/pca.py +8 -1
- snowflake/ml/modeling/decomposition/sparse_pca.py +8 -1
- snowflake/ml/modeling/decomposition/truncated_svd.py +8 -1
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +8 -1
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +8 -1
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +8 -1
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +8 -1
- snowflake/ml/modeling/ensemble/bagging_classifier.py +8 -1
- snowflake/ml/modeling/ensemble/bagging_regressor.py +8 -1
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +8 -1
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +8 -1
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +8 -1
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +8 -1
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +8 -1
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +8 -1
- snowflake/ml/modeling/ensemble/isolation_forest.py +8 -1
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +8 -1
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +8 -1
- snowflake/ml/modeling/ensemble/stacking_regressor.py +8 -1
- snowflake/ml/modeling/ensemble/voting_classifier.py +8 -1
- snowflake/ml/modeling/ensemble/voting_regressor.py +8 -1
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +8 -1
- snowflake/ml/modeling/feature_selection/select_fdr.py +8 -1
- snowflake/ml/modeling/feature_selection/select_fpr.py +8 -1
- snowflake/ml/modeling/feature_selection/select_fwe.py +8 -1
- snowflake/ml/modeling/feature_selection/select_k_best.py +8 -1
- snowflake/ml/modeling/feature_selection/select_percentile.py +8 -1
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +8 -1
- snowflake/ml/modeling/feature_selection/variance_threshold.py +8 -1
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +8 -1
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +8 -1
- snowflake/ml/modeling/impute/iterative_imputer.py +8 -1
- snowflake/ml/modeling/impute/knn_imputer.py +8 -1
- snowflake/ml/modeling/impute/missing_indicator.py +8 -1
- snowflake/ml/modeling/impute/simple_imputer.py +21 -2
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +8 -1
- snowflake/ml/modeling/kernel_approximation/nystroem.py +8 -1
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +8 -1
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +8 -1
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +8 -1
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +8 -1
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +8 -1
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +8 -1
- snowflake/ml/modeling/linear_model/ard_regression.py +8 -1
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +8 -1
- snowflake/ml/modeling/linear_model/elastic_net.py +8 -1
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +8 -1
- snowflake/ml/modeling/linear_model/gamma_regressor.py +8 -1
- snowflake/ml/modeling/linear_model/huber_regressor.py +8 -1
- snowflake/ml/modeling/linear_model/lars.py +8 -1
- snowflake/ml/modeling/linear_model/lars_cv.py +8 -1
- snowflake/ml/modeling/linear_model/lasso.py +8 -1
- snowflake/ml/modeling/linear_model/lasso_cv.py +8 -1
- snowflake/ml/modeling/linear_model/lasso_lars.py +8 -1
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +8 -1
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +8 -1
- snowflake/ml/modeling/linear_model/linear_regression.py +8 -1
- snowflake/ml/modeling/linear_model/logistic_regression.py +8 -1
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +8 -1
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +8 -1
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +8 -1
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +8 -1
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +8 -1
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +8 -1
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +8 -1
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +8 -1
- snowflake/ml/modeling/linear_model/perceptron.py +8 -1
- snowflake/ml/modeling/linear_model/poisson_regressor.py +8 -1
- snowflake/ml/modeling/linear_model/ransac_regressor.py +8 -1
- snowflake/ml/modeling/linear_model/ridge.py +8 -1
- snowflake/ml/modeling/linear_model/ridge_classifier.py +8 -1
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +8 -1
- snowflake/ml/modeling/linear_model/ridge_cv.py +8 -1
- snowflake/ml/modeling/linear_model/sgd_classifier.py +8 -1
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +8 -1
- snowflake/ml/modeling/linear_model/sgd_regressor.py +8 -1
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +8 -1
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +8 -1
- snowflake/ml/modeling/manifold/isomap.py +8 -1
- snowflake/ml/modeling/manifold/mds.py +8 -1
- snowflake/ml/modeling/manifold/spectral_embedding.py +8 -1
- snowflake/ml/modeling/manifold/tsne.py +8 -1
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +8 -1
- snowflake/ml/modeling/mixture/gaussian_mixture.py +8 -1
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +8 -1
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +8 -1
- snowflake/ml/modeling/multiclass/output_code_classifier.py +8 -1
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +8 -1
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +8 -1
- snowflake/ml/modeling/naive_bayes/complement_nb.py +8 -1
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +8 -1
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +8 -1
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +8 -1
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +8 -1
- snowflake/ml/modeling/neighbors/kernel_density.py +8 -1
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +8 -1
- snowflake/ml/modeling/neighbors/nearest_centroid.py +8 -1
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +8 -1
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +8 -1
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +8 -1
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +8 -1
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +8 -1
- snowflake/ml/modeling/neural_network/mlp_classifier.py +8 -1
- snowflake/ml/modeling/neural_network/mlp_regressor.py +8 -1
- snowflake/ml/modeling/parameters/enable_anonymous_sproc.py +5 -0
- snowflake/ml/modeling/preprocessing/polynomial_features.py +8 -1
- snowflake/ml/modeling/semi_supervised/label_propagation.py +8 -1
- snowflake/ml/modeling/semi_supervised/label_spreading.py +8 -1
- snowflake/ml/modeling/svm/linear_svc.py +8 -1
- snowflake/ml/modeling/svm/linear_svr.py +8 -1
- snowflake/ml/modeling/svm/nu_svc.py +8 -1
- snowflake/ml/modeling/svm/nu_svr.py +8 -1
- snowflake/ml/modeling/svm/svc.py +8 -1
- snowflake/ml/modeling/svm/svr.py +8 -1
- snowflake/ml/modeling/tree/decision_tree_classifier.py +8 -1
- snowflake/ml/modeling/tree/decision_tree_regressor.py +8 -1
- snowflake/ml/modeling/tree/extra_tree_classifier.py +8 -1
- snowflake/ml/modeling/tree/extra_tree_regressor.py +8 -1
- snowflake/ml/modeling/xgboost/xgb_classifier.py +8 -1
- snowflake/ml/modeling/xgboost/xgb_regressor.py +8 -1
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +8 -1
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +8 -1
- snowflake/ml/registry/_manager/model_manager.py +59 -1
- snowflake/ml/registry/registry.py +10 -1
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.5.1.dist-info → snowflake_ml_python-1.5.2.dist-info}/METADATA +13 -1
- {snowflake_ml_python-1.5.1.dist-info → snowflake_ml_python-1.5.2.dist-info}/RECORD +174 -172
- {snowflake_ml_python-1.5.1.dist-info → snowflake_ml_python-1.5.2.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.5.1.dist-info → snowflake_ml_python-1.5.2.dist-info}/WHEEL +0 -0
- {snowflake_ml_python-1.5.1.dist-info → snowflake_ml_python-1.5.2.dist-info}/top_level.txt +0 -0
@@ -600,7 +600,14 @@ class SpectralCoclustering(BaseTransformer):
|
|
600
600
|
) -> List[str]:
|
601
601
|
# in case the inferred output column names dimension is different
|
602
602
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
603
|
-
|
603
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
604
|
+
|
605
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
606
|
+
# seen during the fit.
|
607
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
608
|
+
sample_pd_df.columns = snowpark_column_names
|
609
|
+
|
610
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
604
611
|
output_df_columns = list(output_df_pd.columns)
|
605
612
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
606
613
|
if self.sample_weight_col:
|
@@ -634,7 +634,14 @@ class ColumnTransformer(BaseTransformer):
|
|
634
634
|
) -> List[str]:
|
635
635
|
# in case the inferred output column names dimension is different
|
636
636
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
637
|
-
|
637
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
638
|
+
|
639
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
640
|
+
# seen during the fit.
|
641
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
642
|
+
sample_pd_df.columns = snowpark_column_names
|
643
|
+
|
644
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
638
645
|
output_df_columns = list(output_df_pd.columns)
|
639
646
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
640
647
|
if self.sample_weight_col:
|
@@ -593,7 +593,14 @@ class TransformedTargetRegressor(BaseTransformer):
|
|
593
593
|
) -> List[str]:
|
594
594
|
# in case the inferred output column names dimension is different
|
595
595
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
596
|
-
|
596
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
597
|
+
|
598
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
599
|
+
# seen during the fit.
|
600
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
601
|
+
sample_pd_df.columns = snowpark_column_names
|
602
|
+
|
603
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
597
604
|
output_df_columns = list(output_df_pd.columns)
|
598
605
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
599
606
|
if self.sample_weight_col:
|
@@ -590,7 +590,14 @@ class EllipticEnvelope(BaseTransformer):
|
|
590
590
|
) -> List[str]:
|
591
591
|
# in case the inferred output column names dimension is different
|
592
592
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
593
|
-
|
593
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
594
|
+
|
595
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
596
|
+
# seen during the fit.
|
597
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
598
|
+
sample_pd_df.columns = snowpark_column_names
|
599
|
+
|
600
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
594
601
|
output_df_columns = list(output_df_pd.columns)
|
595
602
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
596
603
|
if self.sample_weight_col:
|
@@ -562,7 +562,14 @@ class EmpiricalCovariance(BaseTransformer):
|
|
562
562
|
) -> List[str]:
|
563
563
|
# in case the inferred output column names dimension is different
|
564
564
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
565
|
-
|
565
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
566
|
+
|
567
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
568
|
+
# seen during the fit.
|
569
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
570
|
+
sample_pd_df.columns = snowpark_column_names
|
571
|
+
|
572
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
566
573
|
output_df_columns = list(output_df_pd.columns)
|
567
574
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
568
575
|
if self.sample_weight_col:
|
@@ -610,7 +610,14 @@ class GraphicalLasso(BaseTransformer):
|
|
610
610
|
) -> List[str]:
|
611
611
|
# in case the inferred output column names dimension is different
|
612
612
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
613
|
-
|
613
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
614
|
+
|
615
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
616
|
+
# seen during the fit.
|
617
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
618
|
+
sample_pd_df.columns = snowpark_column_names
|
619
|
+
|
620
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
614
621
|
output_df_columns = list(output_df_pd.columns)
|
615
622
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
616
623
|
if self.sample_weight_col:
|
@@ -636,7 +636,14 @@ class GraphicalLassoCV(BaseTransformer):
|
|
636
636
|
) -> List[str]:
|
637
637
|
# in case the inferred output column names dimension is different
|
638
638
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
639
|
-
|
639
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
640
|
+
|
641
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
642
|
+
# seen during the fit.
|
643
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
644
|
+
sample_pd_df.columns = snowpark_column_names
|
645
|
+
|
646
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
640
647
|
output_df_columns = list(output_df_pd.columns)
|
641
648
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
642
649
|
if self.sample_weight_col:
|
@@ -569,7 +569,14 @@ class LedoitWolf(BaseTransformer):
|
|
569
569
|
) -> List[str]:
|
570
570
|
# in case the inferred output column names dimension is different
|
571
571
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
572
|
-
|
572
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
573
|
+
|
574
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
575
|
+
# seen during the fit.
|
576
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
577
|
+
sample_pd_df.columns = snowpark_column_names
|
578
|
+
|
579
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
573
580
|
output_df_columns = list(output_df_pd.columns)
|
574
581
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
575
582
|
if self.sample_weight_col:
|
@@ -581,7 +581,14 @@ class MinCovDet(BaseTransformer):
|
|
581
581
|
) -> List[str]:
|
582
582
|
# in case the inferred output column names dimension is different
|
583
583
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
584
|
-
|
584
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
585
|
+
|
586
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
587
|
+
# seen during the fit.
|
588
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
589
|
+
sample_pd_df.columns = snowpark_column_names
|
590
|
+
|
591
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
585
592
|
output_df_columns = list(output_df_pd.columns)
|
586
593
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
587
594
|
if self.sample_weight_col:
|
@@ -562,7 +562,14 @@ class OAS(BaseTransformer):
|
|
562
562
|
) -> List[str]:
|
563
563
|
# in case the inferred output column names dimension is different
|
564
564
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
565
|
-
|
565
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
566
|
+
|
567
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
568
|
+
# seen during the fit.
|
569
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
570
|
+
sample_pd_df.columns = snowpark_column_names
|
571
|
+
|
572
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
566
573
|
output_df_columns = list(output_df_pd.columns)
|
567
574
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
568
575
|
if self.sample_weight_col:
|
@@ -568,7 +568,14 @@ class ShrunkCovariance(BaseTransformer):
|
|
568
568
|
) -> List[str]:
|
569
569
|
# in case the inferred output column names dimension is different
|
570
570
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
571
|
-
|
571
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
572
|
+
|
573
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
574
|
+
# seen during the fit.
|
575
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
576
|
+
sample_pd_df.columns = snowpark_column_names
|
577
|
+
|
578
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
572
579
|
output_df_columns = list(output_df_pd.columns)
|
573
580
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
574
581
|
if self.sample_weight_col:
|
@@ -678,7 +678,14 @@ class DictionaryLearning(BaseTransformer):
|
|
678
678
|
) -> List[str]:
|
679
679
|
# in case the inferred output column names dimension is different
|
680
680
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
681
|
-
|
681
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
682
|
+
|
683
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
684
|
+
# seen during the fit.
|
685
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
686
|
+
sample_pd_df.columns = snowpark_column_names
|
687
|
+
|
688
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
682
689
|
output_df_columns = list(output_df_pd.columns)
|
683
690
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
684
691
|
if self.sample_weight_col:
|
@@ -615,7 +615,14 @@ class FactorAnalysis(BaseTransformer):
|
|
615
615
|
) -> List[str]:
|
616
616
|
# in case the inferred output column names dimension is different
|
617
617
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
618
|
-
|
618
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
619
|
+
|
620
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
621
|
+
# seen during the fit.
|
622
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
623
|
+
sample_pd_df.columns = snowpark_column_names
|
624
|
+
|
625
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
619
626
|
output_df_columns = list(output_df_pd.columns)
|
620
627
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
621
628
|
if self.sample_weight_col:
|
@@ -633,7 +633,14 @@ class FastICA(BaseTransformer):
|
|
633
633
|
) -> List[str]:
|
634
634
|
# in case the inferred output column names dimension is different
|
635
635
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
636
|
-
|
636
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
637
|
+
|
638
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
639
|
+
# seen during the fit.
|
640
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
641
|
+
sample_pd_df.columns = snowpark_column_names
|
642
|
+
|
643
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
637
644
|
output_df_columns = list(output_df_pd.columns)
|
638
645
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
639
646
|
if self.sample_weight_col:
|
@@ -585,7 +585,14 @@ class IncrementalPCA(BaseTransformer):
|
|
585
585
|
) -> List[str]:
|
586
586
|
# in case the inferred output column names dimension is different
|
587
587
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
588
|
-
|
588
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
589
|
+
|
590
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
591
|
+
# seen during the fit.
|
592
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
593
|
+
sample_pd_df.columns = snowpark_column_names
|
594
|
+
|
595
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
589
596
|
output_df_columns = list(output_df_pd.columns)
|
590
597
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
591
598
|
if self.sample_weight_col:
|
@@ -681,7 +681,14 @@ class KernelPCA(BaseTransformer):
|
|
681
681
|
) -> List[str]:
|
682
682
|
# in case the inferred output column names dimension is different
|
683
683
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
684
|
-
|
684
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
685
|
+
|
686
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
687
|
+
# seen during the fit.
|
688
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
689
|
+
sample_pd_df.columns = snowpark_column_names
|
690
|
+
|
691
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
685
692
|
output_df_columns = list(output_df_pd.columns)
|
686
693
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
687
694
|
if self.sample_weight_col:
|
@@ -703,7 +703,14 @@ class MiniBatchDictionaryLearning(BaseTransformer):
|
|
703
703
|
) -> List[str]:
|
704
704
|
# in case the inferred output column names dimension is different
|
705
705
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
706
|
-
|
706
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
707
|
+
|
708
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
709
|
+
# seen during the fit.
|
710
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
711
|
+
sample_pd_df.columns = snowpark_column_names
|
712
|
+
|
713
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
707
714
|
output_df_columns = list(output_df_pd.columns)
|
708
715
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
709
716
|
if self.sample_weight_col:
|
@@ -648,7 +648,14 @@ class MiniBatchSparsePCA(BaseTransformer):
|
|
648
648
|
) -> List[str]:
|
649
649
|
# in case the inferred output column names dimension is different
|
650
650
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
651
|
-
|
651
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
652
|
+
|
653
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
654
|
+
# seen during the fit.
|
655
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
656
|
+
sample_pd_df.columns = snowpark_column_names
|
657
|
+
|
658
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
652
659
|
output_df_columns = list(output_df_pd.columns)
|
653
660
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
654
661
|
if self.sample_weight_col:
|
@@ -650,7 +650,14 @@ class PCA(BaseTransformer):
|
|
650
650
|
) -> List[str]:
|
651
651
|
# in case the inferred output column names dimension is different
|
652
652
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
653
|
-
|
653
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
654
|
+
|
655
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
656
|
+
# seen during the fit.
|
657
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
658
|
+
sample_pd_df.columns = snowpark_column_names
|
659
|
+
|
660
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
654
661
|
output_df_columns = list(output_df_pd.columns)
|
655
662
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
656
663
|
if self.sample_weight_col:
|
@@ -623,7 +623,14 @@ class SparsePCA(BaseTransformer):
|
|
623
623
|
) -> List[str]:
|
624
624
|
# in case the inferred output column names dimension is different
|
625
625
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
626
|
-
|
626
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
627
|
+
|
628
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
629
|
+
# seen during the fit.
|
630
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
631
|
+
sample_pd_df.columns = snowpark_column_names
|
632
|
+
|
633
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
627
634
|
output_df_columns = list(output_df_pd.columns)
|
628
635
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
629
636
|
if self.sample_weight_col:
|
@@ -604,7 +604,14 @@ class TruncatedSVD(BaseTransformer):
|
|
604
604
|
) -> List[str]:
|
605
605
|
# in case the inferred output column names dimension is different
|
606
606
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
607
|
-
|
607
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
608
|
+
|
609
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
610
|
+
# seen during the fit.
|
611
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
612
|
+
sample_pd_df.columns = snowpark_column_names
|
613
|
+
|
614
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
608
615
|
output_df_columns = list(output_df_pd.columns)
|
609
616
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
610
617
|
if self.sample_weight_col:
|
@@ -623,7 +623,14 @@ class LinearDiscriminantAnalysis(BaseTransformer):
|
|
623
623
|
) -> List[str]:
|
624
624
|
# in case the inferred output column names dimension is different
|
625
625
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
626
|
-
|
626
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
627
|
+
|
628
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
629
|
+
# seen during the fit.
|
630
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
631
|
+
sample_pd_df.columns = snowpark_column_names
|
632
|
+
|
633
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
627
634
|
output_df_columns = list(output_df_pd.columns)
|
628
635
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
629
636
|
if self.sample_weight_col:
|
@@ -581,7 +581,14 @@ class QuadraticDiscriminantAnalysis(BaseTransformer):
|
|
581
581
|
) -> List[str]:
|
582
582
|
# in case the inferred output column names dimension is different
|
583
583
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
584
|
-
|
584
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
585
|
+
|
586
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
587
|
+
# seen during the fit.
|
588
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
589
|
+
sample_pd_df.columns = snowpark_column_names
|
590
|
+
|
591
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
585
592
|
output_df_columns = list(output_df_pd.columns)
|
586
593
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
587
594
|
if self.sample_weight_col:
|
@@ -606,7 +606,14 @@ class AdaBoostClassifier(BaseTransformer):
|
|
606
606
|
) -> List[str]:
|
607
607
|
# in case the inferred output column names dimension is different
|
608
608
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
609
|
-
|
609
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
610
|
+
|
611
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
612
|
+
# seen during the fit.
|
613
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
614
|
+
sample_pd_df.columns = snowpark_column_names
|
615
|
+
|
616
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
610
617
|
output_df_columns = list(output_df_pd.columns)
|
611
618
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
612
619
|
if self.sample_weight_col:
|
@@ -603,7 +603,14 @@ class AdaBoostRegressor(BaseTransformer):
|
|
603
603
|
) -> List[str]:
|
604
604
|
# in case the inferred output column names dimension is different
|
605
605
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
606
|
-
|
606
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
607
|
+
|
608
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
609
|
+
# seen during the fit.
|
610
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
611
|
+
sample_pd_df.columns = snowpark_column_names
|
612
|
+
|
613
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
607
614
|
output_df_columns = list(output_df_pd.columns)
|
608
615
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
609
616
|
if self.sample_weight_col:
|
@@ -638,7 +638,14 @@ class BaggingClassifier(BaseTransformer):
|
|
638
638
|
) -> List[str]:
|
639
639
|
# in case the inferred output column names dimension is different
|
640
640
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
641
|
-
|
641
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
642
|
+
|
643
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
644
|
+
# seen during the fit.
|
645
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
646
|
+
sample_pd_df.columns = snowpark_column_names
|
647
|
+
|
648
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
642
649
|
output_df_columns = list(output_df_pd.columns)
|
643
650
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
644
651
|
if self.sample_weight_col:
|
@@ -638,7 +638,14 @@ class BaggingRegressor(BaseTransformer):
|
|
638
638
|
) -> List[str]:
|
639
639
|
# in case the inferred output column names dimension is different
|
640
640
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
641
|
-
|
641
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
642
|
+
|
643
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
644
|
+
# seen during the fit.
|
645
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
646
|
+
sample_pd_df.columns = snowpark_column_names
|
647
|
+
|
648
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
642
649
|
output_df_columns = list(output_df_pd.columns)
|
643
650
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
644
651
|
if self.sample_weight_col:
|
@@ -741,7 +741,14 @@ class ExtraTreesClassifier(BaseTransformer):
|
|
741
741
|
) -> List[str]:
|
742
742
|
# in case the inferred output column names dimension is different
|
743
743
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
744
|
-
|
744
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
745
|
+
|
746
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
747
|
+
# seen during the fit.
|
748
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
749
|
+
sample_pd_df.columns = snowpark_column_names
|
750
|
+
|
751
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
745
752
|
output_df_columns = list(output_df_pd.columns)
|
746
753
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
747
754
|
if self.sample_weight_col:
|
@@ -720,7 +720,14 @@ class ExtraTreesRegressor(BaseTransformer):
|
|
720
720
|
) -> List[str]:
|
721
721
|
# in case the inferred output column names dimension is different
|
722
722
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
723
|
-
|
723
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
724
|
+
|
725
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
726
|
+
# seen during the fit.
|
727
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
728
|
+
sample_pd_df.columns = snowpark_column_names
|
729
|
+
|
730
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
724
731
|
output_df_columns = list(output_df_pd.columns)
|
725
732
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
726
733
|
if self.sample_weight_col:
|
@@ -753,7 +753,14 @@ class GradientBoostingClassifier(BaseTransformer):
|
|
753
753
|
) -> List[str]:
|
754
754
|
# in case the inferred output column names dimension is different
|
755
755
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
756
|
-
|
756
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
757
|
+
|
758
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
759
|
+
# seen during the fit.
|
760
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
761
|
+
sample_pd_df.columns = snowpark_column_names
|
762
|
+
|
763
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
757
764
|
output_df_columns = list(output_df_pd.columns)
|
758
765
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
759
766
|
if self.sample_weight_col:
|
@@ -762,7 +762,14 @@ class GradientBoostingRegressor(BaseTransformer):
|
|
762
762
|
) -> List[str]:
|
763
763
|
# in case the inferred output column names dimension is different
|
764
764
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
765
|
-
|
765
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
766
|
+
|
767
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
768
|
+
# seen during the fit.
|
769
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
770
|
+
sample_pd_df.columns = snowpark_column_names
|
771
|
+
|
772
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
766
773
|
output_df_columns = list(output_df_pd.columns)
|
767
774
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
768
775
|
if self.sample_weight_col:
|
@@ -734,7 +734,14 @@ class HistGradientBoostingClassifier(BaseTransformer):
|
|
734
734
|
) -> List[str]:
|
735
735
|
# in case the inferred output column names dimension is different
|
736
736
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
737
|
-
|
737
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
738
|
+
|
739
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
740
|
+
# seen during the fit.
|
741
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
742
|
+
sample_pd_df.columns = snowpark_column_names
|
743
|
+
|
744
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
738
745
|
output_df_columns = list(output_df_pd.columns)
|
739
746
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
740
747
|
if self.sample_weight_col:
|
@@ -725,7 +725,14 @@ class HistGradientBoostingRegressor(BaseTransformer):
|
|
725
725
|
) -> List[str]:
|
726
726
|
# in case the inferred output column names dimension is different
|
727
727
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
728
|
-
|
728
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
729
|
+
|
730
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
731
|
+
# seen during the fit.
|
732
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
733
|
+
sample_pd_df.columns = snowpark_column_names
|
734
|
+
|
735
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
729
736
|
output_df_columns = list(output_df_pd.columns)
|
730
737
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
731
738
|
if self.sample_weight_col:
|
@@ -627,7 +627,14 @@ class IsolationForest(BaseTransformer):
|
|
627
627
|
) -> List[str]:
|
628
628
|
# in case the inferred output column names dimension is different
|
629
629
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
630
|
-
|
630
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
631
|
+
|
632
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
633
|
+
# seen during the fit.
|
634
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
635
|
+
sample_pd_df.columns = snowpark_column_names
|
636
|
+
|
637
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
631
638
|
output_df_columns = list(output_df_pd.columns)
|
632
639
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
633
640
|
if self.sample_weight_col:
|
@@ -737,7 +737,14 @@ class RandomForestClassifier(BaseTransformer):
|
|
737
737
|
) -> List[str]:
|
738
738
|
# in case the inferred output column names dimension is different
|
739
739
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
740
|
-
|
740
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
741
|
+
|
742
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
743
|
+
# seen during the fit.
|
744
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
745
|
+
sample_pd_df.columns = snowpark_column_names
|
746
|
+
|
747
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
741
748
|
output_df_columns = list(output_df_pd.columns)
|
742
749
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
743
750
|
if self.sample_weight_col:
|
@@ -716,7 +716,14 @@ class RandomForestRegressor(BaseTransformer):
|
|
716
716
|
) -> List[str]:
|
717
717
|
# in case the inferred output column names dimension is different
|
718
718
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
719
|
-
|
719
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
720
|
+
|
721
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
722
|
+
# seen during the fit.
|
723
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
724
|
+
sample_pd_df.columns = snowpark_column_names
|
725
|
+
|
726
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
720
727
|
output_df_columns = list(output_df_pd.columns)
|
721
728
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
722
729
|
if self.sample_weight_col:
|