snowflake-ml-python 1.5.1__py3-none-any.whl → 1.5.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/cortex/_sentiment.py +7 -4
- snowflake/ml/_internal/utils/temp_file_utils.py +5 -2
- snowflake/ml/feature_store/access_manager.py +34 -30
- snowflake/ml/feature_store/feature_store.py +1 -1
- snowflake/ml/feature_store/feature_view.py +12 -11
- snowflake/ml/fileset/snowfs.py +2 -31
- snowflake/ml/model/_client/ops/model_ops.py +43 -0
- snowflake/ml/model/_client/sql/model_version.py +53 -1
- snowflake/ml/model/_model_composer/model_composer.py +6 -2
- snowflake/ml/model/_packager/model_meta/model_meta.py +1 -3
- snowflake/ml/model/_packager/model_runtime/model_runtime.py +3 -27
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +58 -139
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_search_udf_file.py +159 -0
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +8 -1
- snowflake/ml/modeling/cluster/affinity_propagation.py +8 -1
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +8 -1
- snowflake/ml/modeling/cluster/birch.py +8 -1
- snowflake/ml/modeling/cluster/bisecting_k_means.py +8 -1
- snowflake/ml/modeling/cluster/dbscan.py +8 -1
- snowflake/ml/modeling/cluster/feature_agglomeration.py +8 -1
- snowflake/ml/modeling/cluster/k_means.py +8 -1
- snowflake/ml/modeling/cluster/mean_shift.py +8 -1
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +8 -1
- snowflake/ml/modeling/cluster/optics.py +8 -1
- snowflake/ml/modeling/cluster/spectral_biclustering.py +8 -1
- snowflake/ml/modeling/cluster/spectral_clustering.py +8 -1
- snowflake/ml/modeling/cluster/spectral_coclustering.py +8 -1
- snowflake/ml/modeling/compose/column_transformer.py +8 -1
- snowflake/ml/modeling/compose/transformed_target_regressor.py +8 -1
- snowflake/ml/modeling/covariance/elliptic_envelope.py +8 -1
- snowflake/ml/modeling/covariance/empirical_covariance.py +8 -1
- snowflake/ml/modeling/covariance/graphical_lasso.py +8 -1
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +8 -1
- snowflake/ml/modeling/covariance/ledoit_wolf.py +8 -1
- snowflake/ml/modeling/covariance/min_cov_det.py +8 -1
- snowflake/ml/modeling/covariance/oas.py +8 -1
- snowflake/ml/modeling/covariance/shrunk_covariance.py +8 -1
- snowflake/ml/modeling/decomposition/dictionary_learning.py +8 -1
- snowflake/ml/modeling/decomposition/factor_analysis.py +8 -1
- snowflake/ml/modeling/decomposition/fast_ica.py +8 -1
- snowflake/ml/modeling/decomposition/incremental_pca.py +8 -1
- snowflake/ml/modeling/decomposition/kernel_pca.py +8 -1
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +8 -1
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +8 -1
- snowflake/ml/modeling/decomposition/pca.py +8 -1
- snowflake/ml/modeling/decomposition/sparse_pca.py +8 -1
- snowflake/ml/modeling/decomposition/truncated_svd.py +8 -1
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +8 -1
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +8 -1
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +8 -1
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +8 -1
- snowflake/ml/modeling/ensemble/bagging_classifier.py +8 -1
- snowflake/ml/modeling/ensemble/bagging_regressor.py +8 -1
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +8 -1
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +8 -1
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +8 -1
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +8 -1
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +8 -1
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +8 -1
- snowflake/ml/modeling/ensemble/isolation_forest.py +8 -1
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +8 -1
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +8 -1
- snowflake/ml/modeling/ensemble/stacking_regressor.py +8 -1
- snowflake/ml/modeling/ensemble/voting_classifier.py +8 -1
- snowflake/ml/modeling/ensemble/voting_regressor.py +8 -1
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +8 -1
- snowflake/ml/modeling/feature_selection/select_fdr.py +8 -1
- snowflake/ml/modeling/feature_selection/select_fpr.py +8 -1
- snowflake/ml/modeling/feature_selection/select_fwe.py +8 -1
- snowflake/ml/modeling/feature_selection/select_k_best.py +8 -1
- snowflake/ml/modeling/feature_selection/select_percentile.py +8 -1
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +8 -1
- snowflake/ml/modeling/feature_selection/variance_threshold.py +8 -1
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +8 -1
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +8 -1
- snowflake/ml/modeling/impute/iterative_imputer.py +8 -1
- snowflake/ml/modeling/impute/knn_imputer.py +8 -1
- snowflake/ml/modeling/impute/missing_indicator.py +8 -1
- snowflake/ml/modeling/impute/simple_imputer.py +21 -2
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +8 -1
- snowflake/ml/modeling/kernel_approximation/nystroem.py +8 -1
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +8 -1
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +8 -1
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +8 -1
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +8 -1
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +8 -1
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +8 -1
- snowflake/ml/modeling/linear_model/ard_regression.py +8 -1
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +8 -1
- snowflake/ml/modeling/linear_model/elastic_net.py +8 -1
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +8 -1
- snowflake/ml/modeling/linear_model/gamma_regressor.py +8 -1
- snowflake/ml/modeling/linear_model/huber_regressor.py +8 -1
- snowflake/ml/modeling/linear_model/lars.py +8 -1
- snowflake/ml/modeling/linear_model/lars_cv.py +8 -1
- snowflake/ml/modeling/linear_model/lasso.py +8 -1
- snowflake/ml/modeling/linear_model/lasso_cv.py +8 -1
- snowflake/ml/modeling/linear_model/lasso_lars.py +8 -1
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +8 -1
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +8 -1
- snowflake/ml/modeling/linear_model/linear_regression.py +8 -1
- snowflake/ml/modeling/linear_model/logistic_regression.py +8 -1
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +8 -1
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +8 -1
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +8 -1
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +8 -1
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +8 -1
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +8 -1
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +8 -1
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +8 -1
- snowflake/ml/modeling/linear_model/perceptron.py +8 -1
- snowflake/ml/modeling/linear_model/poisson_regressor.py +8 -1
- snowflake/ml/modeling/linear_model/ransac_regressor.py +8 -1
- snowflake/ml/modeling/linear_model/ridge.py +8 -1
- snowflake/ml/modeling/linear_model/ridge_classifier.py +8 -1
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +8 -1
- snowflake/ml/modeling/linear_model/ridge_cv.py +8 -1
- snowflake/ml/modeling/linear_model/sgd_classifier.py +8 -1
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +8 -1
- snowflake/ml/modeling/linear_model/sgd_regressor.py +8 -1
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +8 -1
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +8 -1
- snowflake/ml/modeling/manifold/isomap.py +8 -1
- snowflake/ml/modeling/manifold/mds.py +8 -1
- snowflake/ml/modeling/manifold/spectral_embedding.py +8 -1
- snowflake/ml/modeling/manifold/tsne.py +8 -1
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +8 -1
- snowflake/ml/modeling/mixture/gaussian_mixture.py +8 -1
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +8 -1
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +8 -1
- snowflake/ml/modeling/multiclass/output_code_classifier.py +8 -1
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +8 -1
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +8 -1
- snowflake/ml/modeling/naive_bayes/complement_nb.py +8 -1
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +8 -1
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +8 -1
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +8 -1
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +8 -1
- snowflake/ml/modeling/neighbors/kernel_density.py +8 -1
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +8 -1
- snowflake/ml/modeling/neighbors/nearest_centroid.py +8 -1
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +8 -1
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +8 -1
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +8 -1
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +8 -1
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +8 -1
- snowflake/ml/modeling/neural_network/mlp_classifier.py +8 -1
- snowflake/ml/modeling/neural_network/mlp_regressor.py +8 -1
- snowflake/ml/modeling/parameters/enable_anonymous_sproc.py +5 -0
- snowflake/ml/modeling/preprocessing/polynomial_features.py +8 -1
- snowflake/ml/modeling/semi_supervised/label_propagation.py +8 -1
- snowflake/ml/modeling/semi_supervised/label_spreading.py +8 -1
- snowflake/ml/modeling/svm/linear_svc.py +8 -1
- snowflake/ml/modeling/svm/linear_svr.py +8 -1
- snowflake/ml/modeling/svm/nu_svc.py +8 -1
- snowflake/ml/modeling/svm/nu_svr.py +8 -1
- snowflake/ml/modeling/svm/svc.py +8 -1
- snowflake/ml/modeling/svm/svr.py +8 -1
- snowflake/ml/modeling/tree/decision_tree_classifier.py +8 -1
- snowflake/ml/modeling/tree/decision_tree_regressor.py +8 -1
- snowflake/ml/modeling/tree/extra_tree_classifier.py +8 -1
- snowflake/ml/modeling/tree/extra_tree_regressor.py +8 -1
- snowflake/ml/modeling/xgboost/xgb_classifier.py +8 -1
- snowflake/ml/modeling/xgboost/xgb_regressor.py +8 -1
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +8 -1
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +8 -1
- snowflake/ml/registry/_manager/model_manager.py +59 -1
- snowflake/ml/registry/registry.py +10 -1
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.5.1.dist-info → snowflake_ml_python-1.5.2.dist-info}/METADATA +13 -1
- {snowflake_ml_python-1.5.1.dist-info → snowflake_ml_python-1.5.2.dist-info}/RECORD +174 -172
- {snowflake_ml_python-1.5.1.dist-info → snowflake_ml_python-1.5.2.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.5.1.dist-info → snowflake_ml_python-1.5.2.dist-info}/WHEEL +0 -0
- {snowflake_ml_python-1.5.1.dist-info → snowflake_ml_python-1.5.2.dist-info}/top_level.txt +0 -0
@@ -590,7 +590,14 @@ class CategoricalNB(BaseTransformer):
|
|
590
590
|
) -> List[str]:
|
591
591
|
# in case the inferred output column names dimension is different
|
592
592
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
593
|
-
|
593
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
594
|
+
|
595
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
596
|
+
# seen during the fit.
|
597
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
598
|
+
sample_pd_df.columns = snowpark_column_names
|
599
|
+
|
600
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
594
601
|
output_df_columns = list(output_df_pd.columns)
|
595
602
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
596
603
|
if self.sample_weight_col:
|
@@ -584,7 +584,14 @@ class ComplementNB(BaseTransformer):
|
|
584
584
|
) -> List[str]:
|
585
585
|
# in case the inferred output column names dimension is different
|
586
586
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
587
|
-
|
587
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
588
|
+
|
589
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
590
|
+
# seen during the fit.
|
591
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
592
|
+
sample_pd_df.columns = snowpark_column_names
|
593
|
+
|
594
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
588
595
|
output_df_columns = list(output_df_pd.columns)
|
589
596
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
590
597
|
if self.sample_weight_col:
|
@@ -565,7 +565,14 @@ class GaussianNB(BaseTransformer):
|
|
565
565
|
) -> List[str]:
|
566
566
|
# in case the inferred output column names dimension is different
|
567
567
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
568
|
-
|
568
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
569
|
+
|
570
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
571
|
+
# seen during the fit.
|
572
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
573
|
+
sample_pd_df.columns = snowpark_column_names
|
574
|
+
|
575
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
569
576
|
output_df_columns = list(output_df_pd.columns)
|
570
577
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
571
578
|
if self.sample_weight_col:
|
@@ -578,7 +578,14 @@ class MultinomialNB(BaseTransformer):
|
|
578
578
|
) -> List[str]:
|
579
579
|
# in case the inferred output column names dimension is different
|
580
580
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
581
|
-
|
581
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
582
|
+
|
583
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
584
|
+
# seen during the fit.
|
585
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
586
|
+
sample_pd_df.columns = snowpark_column_names
|
587
|
+
|
588
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
582
589
|
output_df_columns = list(output_df_pd.columns)
|
583
590
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
584
591
|
if self.sample_weight_col:
|
@@ -635,7 +635,14 @@ class KNeighborsClassifier(BaseTransformer):
|
|
635
635
|
) -> List[str]:
|
636
636
|
# in case the inferred output column names dimension is different
|
637
637
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
638
|
-
|
638
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
639
|
+
|
640
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
641
|
+
# seen during the fit.
|
642
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
643
|
+
sample_pd_df.columns = snowpark_column_names
|
644
|
+
|
645
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
639
646
|
output_df_columns = list(output_df_pd.columns)
|
640
647
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
641
648
|
if self.sample_weight_col:
|
@@ -637,7 +637,14 @@ class KNeighborsRegressor(BaseTransformer):
|
|
637
637
|
) -> List[str]:
|
638
638
|
# in case the inferred output column names dimension is different
|
639
639
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
640
|
-
|
640
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
641
|
+
|
642
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
643
|
+
# seen during the fit.
|
644
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
645
|
+
sample_pd_df.columns = snowpark_column_names
|
646
|
+
|
647
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
641
648
|
output_df_columns = list(output_df_pd.columns)
|
642
649
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
643
650
|
if self.sample_weight_col:
|
@@ -612,7 +612,14 @@ class KernelDensity(BaseTransformer):
|
|
612
612
|
) -> List[str]:
|
613
613
|
# in case the inferred output column names dimension is different
|
614
614
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
615
|
-
|
615
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
616
|
+
|
617
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
618
|
+
# seen during the fit.
|
619
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
620
|
+
sample_pd_df.columns = snowpark_column_names
|
621
|
+
|
622
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
616
623
|
output_df_columns = list(output_df_pd.columns)
|
617
624
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
618
625
|
if self.sample_weight_col:
|
@@ -644,7 +644,14 @@ class LocalOutlierFactor(BaseTransformer):
|
|
644
644
|
) -> List[str]:
|
645
645
|
# in case the inferred output column names dimension is different
|
646
646
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
647
|
-
|
647
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
648
|
+
|
649
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
650
|
+
# seen during the fit.
|
651
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
652
|
+
sample_pd_df.columns = snowpark_column_names
|
653
|
+
|
654
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
648
655
|
output_df_columns = list(output_df_pd.columns)
|
649
656
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
650
657
|
if self.sample_weight_col:
|
@@ -575,7 +575,14 @@ class NearestCentroid(BaseTransformer):
|
|
575
575
|
) -> List[str]:
|
576
576
|
# in case the inferred output column names dimension is different
|
577
577
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
578
|
-
|
578
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
579
|
+
|
580
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
581
|
+
# seen during the fit.
|
582
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
583
|
+
sample_pd_df.columns = snowpark_column_names
|
584
|
+
|
585
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
579
586
|
output_df_columns = list(output_df_pd.columns)
|
580
587
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
581
588
|
if self.sample_weight_col:
|
@@ -623,7 +623,14 @@ class NearestNeighbors(BaseTransformer):
|
|
623
623
|
) -> List[str]:
|
624
624
|
# in case the inferred output column names dimension is different
|
625
625
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
626
|
-
|
626
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
627
|
+
|
628
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
629
|
+
# seen during the fit.
|
630
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
631
|
+
sample_pd_df.columns = snowpark_column_names
|
632
|
+
|
633
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
627
634
|
output_df_columns = list(output_df_pd.columns)
|
628
635
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
629
636
|
if self.sample_weight_col:
|
@@ -648,7 +648,14 @@ class NeighborhoodComponentsAnalysis(BaseTransformer):
|
|
648
648
|
) -> List[str]:
|
649
649
|
# in case the inferred output column names dimension is different
|
650
650
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
651
|
-
|
651
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
652
|
+
|
653
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
654
|
+
# seen during the fit.
|
655
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
656
|
+
sample_pd_df.columns = snowpark_column_names
|
657
|
+
|
658
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
652
659
|
output_df_columns = list(output_df_pd.columns)
|
653
660
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
654
661
|
if self.sample_weight_col:
|
@@ -647,7 +647,14 @@ class RadiusNeighborsClassifier(BaseTransformer):
|
|
647
647
|
) -> List[str]:
|
648
648
|
# in case the inferred output column names dimension is different
|
649
649
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
650
|
-
|
650
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
651
|
+
|
652
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
653
|
+
# seen during the fit.
|
654
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
655
|
+
sample_pd_df.columns = snowpark_column_names
|
656
|
+
|
657
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
651
658
|
output_df_columns = list(output_df_pd.columns)
|
652
659
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
653
660
|
if self.sample_weight_col:
|
@@ -637,7 +637,14 @@ class RadiusNeighborsRegressor(BaseTransformer):
|
|
637
637
|
) -> List[str]:
|
638
638
|
# in case the inferred output column names dimension is different
|
639
639
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
640
|
-
|
640
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
641
|
+
|
642
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
643
|
+
# seen during the fit.
|
644
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
645
|
+
sample_pd_df.columns = snowpark_column_names
|
646
|
+
|
647
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
641
648
|
output_df_columns = list(output_df_pd.columns)
|
642
649
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
643
650
|
if self.sample_weight_col:
|
@@ -596,7 +596,14 @@ class BernoulliRBM(BaseTransformer):
|
|
596
596
|
) -> List[str]:
|
597
597
|
# in case the inferred output column names dimension is different
|
598
598
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
599
|
-
|
599
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
600
|
+
|
601
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
602
|
+
# seen during the fit.
|
603
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
604
|
+
sample_pd_df.columns = snowpark_column_names
|
605
|
+
|
606
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
600
607
|
output_df_columns = list(output_df_pd.columns)
|
601
608
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
602
609
|
if self.sample_weight_col:
|
@@ -749,7 +749,14 @@ class MLPClassifier(BaseTransformer):
|
|
749
749
|
) -> List[str]:
|
750
750
|
# in case the inferred output column names dimension is different
|
751
751
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
752
|
-
|
752
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
753
|
+
|
754
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
755
|
+
# seen during the fit.
|
756
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
757
|
+
sample_pd_df.columns = snowpark_column_names
|
758
|
+
|
759
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
753
760
|
output_df_columns = list(output_df_pd.columns)
|
754
761
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
755
762
|
if self.sample_weight_col:
|
@@ -745,7 +745,14 @@ class MLPRegressor(BaseTransformer):
|
|
745
745
|
) -> List[str]:
|
746
746
|
# in case the inferred output column names dimension is different
|
747
747
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
748
|
-
|
748
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
749
|
+
|
750
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
751
|
+
# seen during the fit.
|
752
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
753
|
+
sample_pd_df.columns = snowpark_column_names
|
754
|
+
|
755
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
749
756
|
output_df_columns = list(output_df_pd.columns)
|
750
757
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
751
758
|
if self.sample_weight_col:
|
@@ -586,7 +586,14 @@ class PolynomialFeatures(BaseTransformer):
|
|
586
586
|
) -> List[str]:
|
587
587
|
# in case the inferred output column names dimension is different
|
588
588
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
589
|
-
|
589
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
590
|
+
|
591
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
592
|
+
# seen during the fit.
|
593
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
594
|
+
sample_pd_df.columns = snowpark_column_names
|
595
|
+
|
596
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
590
597
|
output_df_columns = list(output_df_pd.columns)
|
591
598
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
592
599
|
if self.sample_weight_col:
|
@@ -590,7 +590,14 @@ class LabelPropagation(BaseTransformer):
|
|
590
590
|
) -> List[str]:
|
591
591
|
# in case the inferred output column names dimension is different
|
592
592
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
593
|
-
|
593
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
594
|
+
|
595
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
596
|
+
# seen during the fit.
|
597
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
598
|
+
sample_pd_df.columns = snowpark_column_names
|
599
|
+
|
600
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
594
601
|
output_df_columns = list(output_df_pd.columns)
|
595
602
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
596
603
|
if self.sample_weight_col:
|
@@ -599,7 +599,14 @@ class LabelSpreading(BaseTransformer):
|
|
599
599
|
) -> List[str]:
|
600
600
|
# in case the inferred output column names dimension is different
|
601
601
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
602
|
-
|
602
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
603
|
+
|
604
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
605
|
+
# seen during the fit.
|
606
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
607
|
+
sample_pd_df.columns = snowpark_column_names
|
608
|
+
|
609
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
603
610
|
output_df_columns = list(output_df_pd.columns)
|
604
611
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
605
612
|
if self.sample_weight_col:
|
@@ -655,7 +655,14 @@ class LinearSVC(BaseTransformer):
|
|
655
655
|
) -> List[str]:
|
656
656
|
# in case the inferred output column names dimension is different
|
657
657
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
658
|
-
|
658
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
659
|
+
|
660
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
661
|
+
# seen during the fit.
|
662
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
663
|
+
sample_pd_df.columns = snowpark_column_names
|
664
|
+
|
665
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
659
666
|
output_df_columns = list(output_df_pd.columns)
|
660
667
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
661
668
|
if self.sample_weight_col:
|
@@ -627,7 +627,14 @@ class LinearSVR(BaseTransformer):
|
|
627
627
|
) -> List[str]:
|
628
628
|
# in case the inferred output column names dimension is different
|
629
629
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
630
|
-
|
630
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
631
|
+
|
632
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
633
|
+
# seen during the fit.
|
634
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
635
|
+
sample_pd_df.columns = snowpark_column_names
|
636
|
+
|
637
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
631
638
|
output_df_columns = list(output_df_pd.columns)
|
632
639
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
633
640
|
if self.sample_weight_col:
|
@@ -661,7 +661,14 @@ class NuSVC(BaseTransformer):
|
|
661
661
|
) -> List[str]:
|
662
662
|
# in case the inferred output column names dimension is different
|
663
663
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
664
|
-
|
664
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
665
|
+
|
666
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
667
|
+
# seen during the fit.
|
668
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
669
|
+
sample_pd_df.columns = snowpark_column_names
|
670
|
+
|
671
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
665
672
|
output_df_columns = list(output_df_pd.columns)
|
666
673
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
667
674
|
if self.sample_weight_col:
|
@@ -622,7 +622,14 @@ class NuSVR(BaseTransformer):
|
|
622
622
|
) -> List[str]:
|
623
623
|
# in case the inferred output column names dimension is different
|
624
624
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
625
|
-
|
625
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
626
|
+
|
627
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
628
|
+
# seen during the fit.
|
629
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
630
|
+
sample_pd_df.columns = snowpark_column_names
|
631
|
+
|
632
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
626
633
|
output_df_columns = list(output_df_pd.columns)
|
627
634
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
628
635
|
if self.sample_weight_col:
|
snowflake/ml/modeling/svm/svc.py
CHANGED
@@ -664,7 +664,14 @@ class SVC(BaseTransformer):
|
|
664
664
|
) -> List[str]:
|
665
665
|
# in case the inferred output column names dimension is different
|
666
666
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
667
|
-
|
667
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
668
|
+
|
669
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
670
|
+
# seen during the fit.
|
671
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
672
|
+
sample_pd_df.columns = snowpark_column_names
|
673
|
+
|
674
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
668
675
|
output_df_columns = list(output_df_pd.columns)
|
669
676
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
670
677
|
if self.sample_weight_col:
|
snowflake/ml/modeling/svm/svr.py
CHANGED
@@ -625,7 +625,14 @@ class SVR(BaseTransformer):
|
|
625
625
|
) -> List[str]:
|
626
626
|
# in case the inferred output column names dimension is different
|
627
627
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
628
|
-
|
628
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
629
|
+
|
630
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
631
|
+
# seen during the fit.
|
632
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
633
|
+
sample_pd_df.columns = snowpark_column_names
|
634
|
+
|
635
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
629
636
|
output_df_columns = list(output_df_pd.columns)
|
630
637
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
631
638
|
if self.sample_weight_col:
|
@@ -692,7 +692,14 @@ class DecisionTreeClassifier(BaseTransformer):
|
|
692
692
|
) -> List[str]:
|
693
693
|
# in case the inferred output column names dimension is different
|
694
694
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
695
|
-
|
695
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
696
|
+
|
697
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
698
|
+
# seen during the fit.
|
699
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
700
|
+
sample_pd_df.columns = snowpark_column_names
|
701
|
+
|
702
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
696
703
|
output_df_columns = list(output_df_pd.columns)
|
697
704
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
698
705
|
if self.sample_weight_col:
|
@@ -674,7 +674,14 @@ class DecisionTreeRegressor(BaseTransformer):
|
|
674
674
|
) -> List[str]:
|
675
675
|
# in case the inferred output column names dimension is different
|
676
676
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
677
|
-
|
677
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
678
|
+
|
679
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
680
|
+
# seen during the fit.
|
681
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
682
|
+
sample_pd_df.columns = snowpark_column_names
|
683
|
+
|
684
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
678
685
|
output_df_columns = list(output_df_pd.columns)
|
679
686
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
680
687
|
if self.sample_weight_col:
|
@@ -684,7 +684,14 @@ class ExtraTreeClassifier(BaseTransformer):
|
|
684
684
|
) -> List[str]:
|
685
685
|
# in case the inferred output column names dimension is different
|
686
686
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
687
|
-
|
687
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
688
|
+
|
689
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
690
|
+
# seen during the fit.
|
691
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
692
|
+
sample_pd_df.columns = snowpark_column_names
|
693
|
+
|
694
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
688
695
|
output_df_columns = list(output_df_pd.columns)
|
689
696
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
690
697
|
if self.sample_weight_col:
|
@@ -666,7 +666,14 @@ class ExtraTreeRegressor(BaseTransformer):
|
|
666
666
|
) -> List[str]:
|
667
667
|
# in case the inferred output column names dimension is different
|
668
668
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
669
|
-
|
669
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
670
|
+
|
671
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
672
|
+
# seen during the fit.
|
673
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
674
|
+
sample_pd_df.columns = snowpark_column_names
|
675
|
+
|
676
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
670
677
|
output_df_columns = list(output_df_pd.columns)
|
671
678
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
672
679
|
if self.sample_weight_col:
|
@@ -784,7 +784,14 @@ class XGBClassifier(BaseTransformer):
|
|
784
784
|
) -> List[str]:
|
785
785
|
# in case the inferred output column names dimension is different
|
786
786
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
787
|
-
|
787
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
788
|
+
|
789
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
790
|
+
# seen during the fit.
|
791
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
792
|
+
sample_pd_df.columns = snowpark_column_names
|
793
|
+
|
794
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
788
795
|
output_df_columns = list(output_df_pd.columns)
|
789
796
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
790
797
|
if self.sample_weight_col:
|
@@ -783,7 +783,14 @@ class XGBRegressor(BaseTransformer):
|
|
783
783
|
) -> List[str]:
|
784
784
|
# in case the inferred output column names dimension is different
|
785
785
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
786
|
-
|
786
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
787
|
+
|
788
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
789
|
+
# seen during the fit.
|
790
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
791
|
+
sample_pd_df.columns = snowpark_column_names
|
792
|
+
|
793
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
787
794
|
output_df_columns = list(output_df_pd.columns)
|
788
795
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
789
796
|
if self.sample_weight_col:
|
@@ -788,7 +788,14 @@ class XGBRFClassifier(BaseTransformer):
|
|
788
788
|
) -> List[str]:
|
789
789
|
# in case the inferred output column names dimension is different
|
790
790
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
791
|
-
|
791
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
792
|
+
|
793
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
794
|
+
# seen during the fit.
|
795
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
796
|
+
sample_pd_df.columns = snowpark_column_names
|
797
|
+
|
798
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
792
799
|
output_df_columns = list(output_df_pd.columns)
|
793
800
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
794
801
|
if self.sample_weight_col:
|
@@ -788,7 +788,14 @@ class XGBRFRegressor(BaseTransformer):
|
|
788
788
|
) -> List[str]:
|
789
789
|
# in case the inferred output column names dimension is different
|
790
790
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
791
|
-
|
791
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
792
|
+
|
793
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
794
|
+
# seen during the fit.
|
795
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
796
|
+
sample_pd_df.columns = snowpark_column_names
|
797
|
+
|
798
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
792
799
|
output_df_columns = list(output_df_pd.columns)
|
793
800
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
794
801
|
if self.sample_weight_col:
|