snowflake-ml-python 1.5.1__py3-none-any.whl → 1.5.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/cortex/_complete.py +26 -5
- snowflake/cortex/_sentiment.py +7 -4
- snowflake/cortex/_sse_client.py +81 -0
- snowflake/cortex/_util.py +105 -8
- snowflake/ml/_internal/lineage/lineage_utils.py +34 -25
- snowflake/ml/_internal/utils/temp_file_utils.py +5 -2
- snowflake/ml/dataset/dataset.py +15 -12
- snowflake/ml/dataset/dataset_factory.py +3 -4
- snowflake/ml/feature_store/access_manager.py +34 -30
- snowflake/ml/feature_store/feature_store.py +3 -3
- snowflake/ml/feature_store/feature_view.py +12 -11
- snowflake/ml/fileset/snowfs.py +2 -31
- snowflake/ml/model/_client/ops/model_ops.py +43 -0
- snowflake/ml/model/_client/sql/model_version.py +55 -3
- snowflake/ml/model/_model_composer/model_composer.py +7 -3
- snowflake/ml/model/_model_composer/model_method/infer_table_function.py_template +3 -1
- snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
- snowflake/ml/model/_packager/model_meta/model_meta.py +1 -3
- snowflake/ml/model/_packager/model_runtime/_snowml_inference_alternative_requirements.py +1 -1
- snowflake/ml/model/_packager/model_runtime/model_runtime.py +3 -27
- snowflake/ml/model/_signatures/builtins_handler.py +2 -1
- snowflake/ml/model/_signatures/core.py +13 -1
- snowflake/ml/model/_signatures/pandas_handler.py +2 -0
- snowflake/ml/model/_signatures/snowpark_handler.py +3 -3
- snowflake/ml/model/model_signature.py +2 -0
- snowflake/ml/model/type_hints.py +1 -0
- snowflake/ml/modeling/_internal/estimator_utils.py +58 -1
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +196 -242
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_search_udf_file.py +161 -0
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +38 -18
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +82 -134
- snowflake/ml/modeling/_internal/snowpark_implementations/xgboost_external_memory_trainer.py +21 -17
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +9 -2
- snowflake/ml/modeling/cluster/affinity_propagation.py +9 -2
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +9 -2
- snowflake/ml/modeling/cluster/birch.py +9 -2
- snowflake/ml/modeling/cluster/bisecting_k_means.py +9 -2
- snowflake/ml/modeling/cluster/dbscan.py +9 -2
- snowflake/ml/modeling/cluster/feature_agglomeration.py +9 -2
- snowflake/ml/modeling/cluster/k_means.py +9 -2
- snowflake/ml/modeling/cluster/mean_shift.py +9 -2
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +9 -2
- snowflake/ml/modeling/cluster/optics.py +9 -2
- snowflake/ml/modeling/cluster/spectral_biclustering.py +9 -2
- snowflake/ml/modeling/cluster/spectral_clustering.py +9 -2
- snowflake/ml/modeling/cluster/spectral_coclustering.py +9 -2
- snowflake/ml/modeling/compose/column_transformer.py +9 -2
- snowflake/ml/modeling/compose/transformed_target_regressor.py +9 -2
- snowflake/ml/modeling/covariance/elliptic_envelope.py +9 -2
- snowflake/ml/modeling/covariance/empirical_covariance.py +9 -2
- snowflake/ml/modeling/covariance/graphical_lasso.py +9 -2
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +9 -2
- snowflake/ml/modeling/covariance/ledoit_wolf.py +9 -2
- snowflake/ml/modeling/covariance/min_cov_det.py +9 -2
- snowflake/ml/modeling/covariance/oas.py +9 -2
- snowflake/ml/modeling/covariance/shrunk_covariance.py +9 -2
- snowflake/ml/modeling/decomposition/dictionary_learning.py +9 -2
- snowflake/ml/modeling/decomposition/factor_analysis.py +9 -2
- snowflake/ml/modeling/decomposition/fast_ica.py +9 -2
- snowflake/ml/modeling/decomposition/incremental_pca.py +9 -2
- snowflake/ml/modeling/decomposition/kernel_pca.py +9 -2
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +9 -2
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +9 -2
- snowflake/ml/modeling/decomposition/pca.py +9 -2
- snowflake/ml/modeling/decomposition/sparse_pca.py +9 -2
- snowflake/ml/modeling/decomposition/truncated_svd.py +9 -2
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +9 -2
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +9 -2
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +9 -2
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +9 -2
- snowflake/ml/modeling/ensemble/bagging_classifier.py +9 -2
- snowflake/ml/modeling/ensemble/bagging_regressor.py +9 -2
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +9 -2
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +9 -2
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +9 -2
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +9 -2
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +9 -2
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +9 -2
- snowflake/ml/modeling/ensemble/isolation_forest.py +9 -2
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +9 -2
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +9 -2
- snowflake/ml/modeling/ensemble/stacking_regressor.py +9 -2
- snowflake/ml/modeling/ensemble/voting_classifier.py +9 -2
- snowflake/ml/modeling/ensemble/voting_regressor.py +9 -2
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +9 -2
- snowflake/ml/modeling/feature_selection/select_fdr.py +9 -2
- snowflake/ml/modeling/feature_selection/select_fpr.py +9 -2
- snowflake/ml/modeling/feature_selection/select_fwe.py +9 -2
- snowflake/ml/modeling/feature_selection/select_k_best.py +9 -2
- snowflake/ml/modeling/feature_selection/select_percentile.py +9 -2
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +9 -2
- snowflake/ml/modeling/feature_selection/variance_threshold.py +9 -2
- snowflake/ml/modeling/framework/base.py +3 -8
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +9 -2
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +9 -2
- snowflake/ml/modeling/impute/iterative_imputer.py +9 -2
- snowflake/ml/modeling/impute/knn_imputer.py +9 -2
- snowflake/ml/modeling/impute/missing_indicator.py +9 -2
- snowflake/ml/modeling/impute/simple_imputer.py +28 -5
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +9 -2
- snowflake/ml/modeling/kernel_approximation/nystroem.py +9 -2
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +9 -2
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +9 -2
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +9 -2
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +9 -2
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +9 -2
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +9 -2
- snowflake/ml/modeling/linear_model/ard_regression.py +9 -2
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +9 -2
- snowflake/ml/modeling/linear_model/elastic_net.py +9 -2
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +9 -2
- snowflake/ml/modeling/linear_model/gamma_regressor.py +9 -2
- snowflake/ml/modeling/linear_model/huber_regressor.py +9 -2
- snowflake/ml/modeling/linear_model/lars.py +9 -2
- snowflake/ml/modeling/linear_model/lars_cv.py +9 -2
- snowflake/ml/modeling/linear_model/lasso.py +9 -2
- snowflake/ml/modeling/linear_model/lasso_cv.py +9 -2
- snowflake/ml/modeling/linear_model/lasso_lars.py +9 -2
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +9 -2
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +9 -2
- snowflake/ml/modeling/linear_model/linear_regression.py +9 -2
- snowflake/ml/modeling/linear_model/logistic_regression.py +9 -2
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +9 -2
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +9 -2
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +9 -2
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +9 -2
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +9 -2
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +9 -2
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +9 -2
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +9 -2
- snowflake/ml/modeling/linear_model/perceptron.py +9 -2
- snowflake/ml/modeling/linear_model/poisson_regressor.py +9 -2
- snowflake/ml/modeling/linear_model/ransac_regressor.py +9 -2
- snowflake/ml/modeling/linear_model/ridge.py +9 -2
- snowflake/ml/modeling/linear_model/ridge_classifier.py +9 -2
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +9 -2
- snowflake/ml/modeling/linear_model/ridge_cv.py +9 -2
- snowflake/ml/modeling/linear_model/sgd_classifier.py +9 -2
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +9 -2
- snowflake/ml/modeling/linear_model/sgd_regressor.py +9 -2
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +9 -2
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +9 -2
- snowflake/ml/modeling/manifold/isomap.py +9 -2
- snowflake/ml/modeling/manifold/mds.py +9 -2
- snowflake/ml/modeling/manifold/spectral_embedding.py +9 -2
- snowflake/ml/modeling/manifold/tsne.py +9 -2
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +9 -2
- snowflake/ml/modeling/mixture/gaussian_mixture.py +9 -2
- snowflake/ml/modeling/model_selection/grid_search_cv.py +1 -5
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +1 -5
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +9 -2
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +9 -2
- snowflake/ml/modeling/multiclass/output_code_classifier.py +9 -2
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +9 -2
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +9 -2
- snowflake/ml/modeling/naive_bayes/complement_nb.py +9 -2
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +9 -2
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +9 -2
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +9 -2
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +9 -2
- snowflake/ml/modeling/neighbors/kernel_density.py +9 -2
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +9 -2
- snowflake/ml/modeling/neighbors/nearest_centroid.py +9 -2
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +9 -2
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +9 -2
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +9 -2
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +9 -2
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +9 -2
- snowflake/ml/modeling/neural_network/mlp_classifier.py +9 -2
- snowflake/ml/modeling/neural_network/mlp_regressor.py +9 -2
- snowflake/ml/modeling/parameters/enable_anonymous_sproc.py +5 -0
- snowflake/ml/modeling/pipeline/pipeline.py +5 -0
- snowflake/ml/modeling/preprocessing/binarizer.py +7 -3
- snowflake/ml/modeling/preprocessing/k_bins_discretizer.py +7 -2
- snowflake/ml/modeling/preprocessing/label_encoder.py +8 -7
- snowflake/ml/modeling/preprocessing/max_abs_scaler.py +7 -3
- snowflake/ml/modeling/preprocessing/min_max_scaler.py +7 -4
- snowflake/ml/modeling/preprocessing/normalizer.py +7 -3
- snowflake/ml/modeling/preprocessing/one_hot_encoder.py +10 -2
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +8 -5
- snowflake/ml/modeling/preprocessing/polynomial_features.py +9 -2
- snowflake/ml/modeling/preprocessing/robust_scaler.py +7 -4
- snowflake/ml/modeling/preprocessing/standard_scaler.py +7 -3
- snowflake/ml/modeling/semi_supervised/label_propagation.py +9 -2
- snowflake/ml/modeling/semi_supervised/label_spreading.py +9 -2
- snowflake/ml/modeling/svm/linear_svc.py +9 -2
- snowflake/ml/modeling/svm/linear_svr.py +9 -2
- snowflake/ml/modeling/svm/nu_svc.py +9 -2
- snowflake/ml/modeling/svm/nu_svr.py +9 -2
- snowflake/ml/modeling/svm/svc.py +9 -2
- snowflake/ml/modeling/svm/svr.py +9 -2
- snowflake/ml/modeling/tree/decision_tree_classifier.py +9 -2
- snowflake/ml/modeling/tree/decision_tree_regressor.py +9 -2
- snowflake/ml/modeling/tree/extra_tree_classifier.py +9 -2
- snowflake/ml/modeling/tree/extra_tree_regressor.py +9 -2
- snowflake/ml/modeling/xgboost/xgb_classifier.py +9 -2
- snowflake/ml/modeling/xgboost/xgb_regressor.py +9 -2
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +9 -2
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +9 -2
- snowflake/ml/registry/_manager/model_manager.py +59 -1
- snowflake/ml/registry/registry.py +10 -1
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.5.1.dist-info → snowflake_ml_python-1.5.3.dist-info}/METADATA +32 -4
- {snowflake_ml_python-1.5.1.dist-info → snowflake_ml_python-1.5.3.dist-info}/RECORD +207 -204
- {snowflake_ml_python-1.5.1.dist-info → snowflake_ml_python-1.5.3.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.5.1.dist-info → snowflake_ml_python-1.5.3.dist-info}/WHEEL +0 -0
- {snowflake_ml_python-1.5.1.dist-info → snowflake_ml_python-1.5.3.dist-info}/top_level.txt +0 -0
@@ -250,7 +250,7 @@ class IncrementalPCA(BaseTransformer):
|
|
250
250
|
inspect.currentframe(), IncrementalPCA.__class__.__name__
|
251
251
|
),
|
252
252
|
api_calls=[Session.call],
|
253
|
-
custom_tags=
|
253
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
254
254
|
)
|
255
255
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
256
256
|
pd_df.columns = dataset.columns
|
@@ -585,7 +585,14 @@ class IncrementalPCA(BaseTransformer):
|
|
585
585
|
) -> List[str]:
|
586
586
|
# in case the inferred output column names dimension is different
|
587
587
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
588
|
-
|
588
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
589
|
+
|
590
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
591
|
+
# seen during the fit.
|
592
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
593
|
+
sample_pd_df.columns = snowpark_column_names
|
594
|
+
|
595
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
589
596
|
output_df_columns = list(output_df_pd.columns)
|
590
597
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
591
598
|
if self.sample_weight_col:
|
@@ -346,7 +346,7 @@ class KernelPCA(BaseTransformer):
|
|
346
346
|
inspect.currentframe(), KernelPCA.__class__.__name__
|
347
347
|
),
|
348
348
|
api_calls=[Session.call],
|
349
|
-
custom_tags=
|
349
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
350
350
|
)
|
351
351
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
352
352
|
pd_df.columns = dataset.columns
|
@@ -681,7 +681,14 @@ class KernelPCA(BaseTransformer):
|
|
681
681
|
) -> List[str]:
|
682
682
|
# in case the inferred output column names dimension is different
|
683
683
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
684
|
-
|
684
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
685
|
+
|
686
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
687
|
+
# seen during the fit.
|
688
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
689
|
+
sample_pd_df.columns = snowpark_column_names
|
690
|
+
|
691
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
685
692
|
output_df_columns = list(output_df_pd.columns)
|
686
693
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
687
694
|
if self.sample_weight_col:
|
@@ -368,7 +368,7 @@ class MiniBatchDictionaryLearning(BaseTransformer):
|
|
368
368
|
inspect.currentframe(), MiniBatchDictionaryLearning.__class__.__name__
|
369
369
|
),
|
370
370
|
api_calls=[Session.call],
|
371
|
-
custom_tags=
|
371
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
372
372
|
)
|
373
373
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
374
374
|
pd_df.columns = dataset.columns
|
@@ -703,7 +703,14 @@ class MiniBatchDictionaryLearning(BaseTransformer):
|
|
703
703
|
) -> List[str]:
|
704
704
|
# in case the inferred output column names dimension is different
|
705
705
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
706
|
-
|
706
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
707
|
+
|
708
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
709
|
+
# seen during the fit.
|
710
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
711
|
+
sample_pd_df.columns = snowpark_column_names
|
712
|
+
|
713
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
707
714
|
output_df_columns = list(output_df_pd.columns)
|
708
715
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
709
716
|
if self.sample_weight_col:
|
@@ -313,7 +313,7 @@ class MiniBatchSparsePCA(BaseTransformer):
|
|
313
313
|
inspect.currentframe(), MiniBatchSparsePCA.__class__.__name__
|
314
314
|
),
|
315
315
|
api_calls=[Session.call],
|
316
|
-
custom_tags=
|
316
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
317
317
|
)
|
318
318
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
319
319
|
pd_df.columns = dataset.columns
|
@@ -648,7 +648,14 @@ class MiniBatchSparsePCA(BaseTransformer):
|
|
648
648
|
) -> List[str]:
|
649
649
|
# in case the inferred output column names dimension is different
|
650
650
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
651
|
-
|
651
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
652
|
+
|
653
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
654
|
+
# seen during the fit.
|
655
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
656
|
+
sample_pd_df.columns = snowpark_column_names
|
657
|
+
|
658
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
652
659
|
output_df_columns = list(output_df_pd.columns)
|
653
660
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
654
661
|
if self.sample_weight_col:
|
@@ -315,7 +315,7 @@ class PCA(BaseTransformer):
|
|
315
315
|
inspect.currentframe(), PCA.__class__.__name__
|
316
316
|
),
|
317
317
|
api_calls=[Session.call],
|
318
|
-
custom_tags=
|
318
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
319
319
|
)
|
320
320
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
321
321
|
pd_df.columns = dataset.columns
|
@@ -650,7 +650,14 @@ class PCA(BaseTransformer):
|
|
650
650
|
) -> List[str]:
|
651
651
|
# in case the inferred output column names dimension is different
|
652
652
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
653
|
-
|
653
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
654
|
+
|
655
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
656
|
+
# seen during the fit.
|
657
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
658
|
+
sample_pd_df.columns = snowpark_column_names
|
659
|
+
|
660
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
654
661
|
output_df_columns = list(output_df_pd.columns)
|
655
662
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
656
663
|
if self.sample_weight_col:
|
@@ -288,7 +288,7 @@ class SparsePCA(BaseTransformer):
|
|
288
288
|
inspect.currentframe(), SparsePCA.__class__.__name__
|
289
289
|
),
|
290
290
|
api_calls=[Session.call],
|
291
|
-
custom_tags=
|
291
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
292
292
|
)
|
293
293
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
294
294
|
pd_df.columns = dataset.columns
|
@@ -623,7 +623,14 @@ class SparsePCA(BaseTransformer):
|
|
623
623
|
) -> List[str]:
|
624
624
|
# in case the inferred output column names dimension is different
|
625
625
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
626
|
-
|
626
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
627
|
+
|
628
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
629
|
+
# seen during the fit.
|
630
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
631
|
+
sample_pd_df.columns = snowpark_column_names
|
632
|
+
|
633
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
627
634
|
output_df_columns = list(output_df_pd.columns)
|
628
635
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
629
636
|
if self.sample_weight_col:
|
@@ -269,7 +269,7 @@ class TruncatedSVD(BaseTransformer):
|
|
269
269
|
inspect.currentframe(), TruncatedSVD.__class__.__name__
|
270
270
|
),
|
271
271
|
api_calls=[Session.call],
|
272
|
-
custom_tags=
|
272
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
273
273
|
)
|
274
274
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
275
275
|
pd_df.columns = dataset.columns
|
@@ -604,7 +604,14 @@ class TruncatedSVD(BaseTransformer):
|
|
604
604
|
) -> List[str]:
|
605
605
|
# in case the inferred output column names dimension is different
|
606
606
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
607
|
-
|
607
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
608
|
+
|
609
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
610
|
+
# seen during the fit.
|
611
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
612
|
+
sample_pd_df.columns = snowpark_column_names
|
613
|
+
|
614
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
608
615
|
output_df_columns = list(output_df_pd.columns)
|
609
616
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
610
617
|
if self.sample_weight_col:
|
@@ -286,7 +286,7 @@ class LinearDiscriminantAnalysis(BaseTransformer):
|
|
286
286
|
inspect.currentframe(), LinearDiscriminantAnalysis.__class__.__name__
|
287
287
|
),
|
288
288
|
api_calls=[Session.call],
|
289
|
-
custom_tags=
|
289
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
290
290
|
)
|
291
291
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
292
292
|
pd_df.columns = dataset.columns
|
@@ -623,7 +623,14 @@ class LinearDiscriminantAnalysis(BaseTransformer):
|
|
623
623
|
) -> List[str]:
|
624
624
|
# in case the inferred output column names dimension is different
|
625
625
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
626
|
-
|
626
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
627
|
+
|
628
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
629
|
+
# seen during the fit.
|
630
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
631
|
+
sample_pd_df.columns = snowpark_column_names
|
632
|
+
|
633
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
627
634
|
output_df_columns = list(output_df_pd.columns)
|
628
635
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
629
636
|
if self.sample_weight_col:
|
@@ -248,7 +248,7 @@ class QuadraticDiscriminantAnalysis(BaseTransformer):
|
|
248
248
|
inspect.currentframe(), QuadraticDiscriminantAnalysis.__class__.__name__
|
249
249
|
),
|
250
250
|
api_calls=[Session.call],
|
251
|
-
custom_tags=
|
251
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
252
252
|
)
|
253
253
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
254
254
|
pd_df.columns = dataset.columns
|
@@ -581,7 +581,14 @@ class QuadraticDiscriminantAnalysis(BaseTransformer):
|
|
581
581
|
) -> List[str]:
|
582
582
|
# in case the inferred output column names dimension is different
|
583
583
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
584
|
-
|
584
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
585
|
+
|
586
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
587
|
+
# seen during the fit.
|
588
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
589
|
+
sample_pd_df.columns = snowpark_column_names
|
590
|
+
|
591
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
585
592
|
output_df_columns = list(output_df_pd.columns)
|
586
593
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
587
594
|
if self.sample_weight_col:
|
@@ -273,7 +273,7 @@ class AdaBoostClassifier(BaseTransformer):
|
|
273
273
|
inspect.currentframe(), AdaBoostClassifier.__class__.__name__
|
274
274
|
),
|
275
275
|
api_calls=[Session.call],
|
276
|
-
custom_tags=
|
276
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
277
277
|
)
|
278
278
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
279
279
|
pd_df.columns = dataset.columns
|
@@ -606,7 +606,14 @@ class AdaBoostClassifier(BaseTransformer):
|
|
606
606
|
) -> List[str]:
|
607
607
|
# in case the inferred output column names dimension is different
|
608
608
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
609
|
-
|
609
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
610
|
+
|
611
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
612
|
+
# seen during the fit.
|
613
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
614
|
+
sample_pd_df.columns = snowpark_column_names
|
615
|
+
|
616
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
610
617
|
output_df_columns = list(output_df_pd.columns)
|
611
618
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
612
619
|
if self.sample_weight_col:
|
@@ -270,7 +270,7 @@ class AdaBoostRegressor(BaseTransformer):
|
|
270
270
|
inspect.currentframe(), AdaBoostRegressor.__class__.__name__
|
271
271
|
),
|
272
272
|
api_calls=[Session.call],
|
273
|
-
custom_tags=
|
273
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
274
274
|
)
|
275
275
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
276
276
|
pd_df.columns = dataset.columns
|
@@ -603,7 +603,14 @@ class AdaBoostRegressor(BaseTransformer):
|
|
603
603
|
) -> List[str]:
|
604
604
|
# in case the inferred output column names dimension is different
|
605
605
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
606
|
-
|
606
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
607
|
+
|
608
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
609
|
+
# seen during the fit.
|
610
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
611
|
+
sample_pd_df.columns = snowpark_column_names
|
612
|
+
|
613
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
607
614
|
output_df_columns = list(output_df_pd.columns)
|
608
615
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
609
616
|
if self.sample_weight_col:
|
@@ -305,7 +305,7 @@ class BaggingClassifier(BaseTransformer):
|
|
305
305
|
inspect.currentframe(), BaggingClassifier.__class__.__name__
|
306
306
|
),
|
307
307
|
api_calls=[Session.call],
|
308
|
-
custom_tags=
|
308
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
309
309
|
)
|
310
310
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
311
311
|
pd_df.columns = dataset.columns
|
@@ -638,7 +638,14 @@ class BaggingClassifier(BaseTransformer):
|
|
638
638
|
) -> List[str]:
|
639
639
|
# in case the inferred output column names dimension is different
|
640
640
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
641
|
-
|
641
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
642
|
+
|
643
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
644
|
+
# seen during the fit.
|
645
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
646
|
+
sample_pd_df.columns = snowpark_column_names
|
647
|
+
|
648
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
642
649
|
output_df_columns = list(output_df_pd.columns)
|
643
650
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
644
651
|
if self.sample_weight_col:
|
@@ -305,7 +305,7 @@ class BaggingRegressor(BaseTransformer):
|
|
305
305
|
inspect.currentframe(), BaggingRegressor.__class__.__name__
|
306
306
|
),
|
307
307
|
api_calls=[Session.call],
|
308
|
-
custom_tags=
|
308
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
309
309
|
)
|
310
310
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
311
311
|
pd_df.columns = dataset.columns
|
@@ -638,7 +638,14 @@ class BaggingRegressor(BaseTransformer):
|
|
638
638
|
) -> List[str]:
|
639
639
|
# in case the inferred output column names dimension is different
|
640
640
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
641
|
-
|
641
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
642
|
+
|
643
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
644
|
+
# seen during the fit.
|
645
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
646
|
+
sample_pd_df.columns = snowpark_column_names
|
647
|
+
|
648
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
642
649
|
output_df_columns = list(output_df_pd.columns)
|
643
650
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
644
651
|
if self.sample_weight_col:
|
@@ -408,7 +408,7 @@ class ExtraTreesClassifier(BaseTransformer):
|
|
408
408
|
inspect.currentframe(), ExtraTreesClassifier.__class__.__name__
|
409
409
|
),
|
410
410
|
api_calls=[Session.call],
|
411
|
-
custom_tags=
|
411
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
412
412
|
)
|
413
413
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
414
414
|
pd_df.columns = dataset.columns
|
@@ -741,7 +741,14 @@ class ExtraTreesClassifier(BaseTransformer):
|
|
741
741
|
) -> List[str]:
|
742
742
|
# in case the inferred output column names dimension is different
|
743
743
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
744
|
-
|
744
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
745
|
+
|
746
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
747
|
+
# seen during the fit.
|
748
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
749
|
+
sample_pd_df.columns = snowpark_column_names
|
750
|
+
|
751
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
745
752
|
output_df_columns = list(output_df_pd.columns)
|
746
753
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
747
754
|
if self.sample_weight_col:
|
@@ -387,7 +387,7 @@ class ExtraTreesRegressor(BaseTransformer):
|
|
387
387
|
inspect.currentframe(), ExtraTreesRegressor.__class__.__name__
|
388
388
|
),
|
389
389
|
api_calls=[Session.call],
|
390
|
-
custom_tags=
|
390
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
391
391
|
)
|
392
392
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
393
393
|
pd_df.columns = dataset.columns
|
@@ -720,7 +720,14 @@ class ExtraTreesRegressor(BaseTransformer):
|
|
720
720
|
) -> List[str]:
|
721
721
|
# in case the inferred output column names dimension is different
|
722
722
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
723
|
-
|
723
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
724
|
+
|
725
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
726
|
+
# seen during the fit.
|
727
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
728
|
+
sample_pd_df.columns = snowpark_column_names
|
729
|
+
|
730
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
724
731
|
output_df_columns = list(output_df_pd.columns)
|
725
732
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
726
733
|
if self.sample_weight_col:
|
@@ -420,7 +420,7 @@ class GradientBoostingClassifier(BaseTransformer):
|
|
420
420
|
inspect.currentframe(), GradientBoostingClassifier.__class__.__name__
|
421
421
|
),
|
422
422
|
api_calls=[Session.call],
|
423
|
-
custom_tags=
|
423
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
424
424
|
)
|
425
425
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
426
426
|
pd_df.columns = dataset.columns
|
@@ -753,7 +753,14 @@ class GradientBoostingClassifier(BaseTransformer):
|
|
753
753
|
) -> List[str]:
|
754
754
|
# in case the inferred output column names dimension is different
|
755
755
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
756
|
-
|
756
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
757
|
+
|
758
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
759
|
+
# seen during the fit.
|
760
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
761
|
+
sample_pd_df.columns = snowpark_column_names
|
762
|
+
|
763
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
757
764
|
output_df_columns = list(output_df_pd.columns)
|
758
765
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
759
766
|
if self.sample_weight_col:
|
@@ -429,7 +429,7 @@ class GradientBoostingRegressor(BaseTransformer):
|
|
429
429
|
inspect.currentframe(), GradientBoostingRegressor.__class__.__name__
|
430
430
|
),
|
431
431
|
api_calls=[Session.call],
|
432
|
-
custom_tags=
|
432
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
433
433
|
)
|
434
434
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
435
435
|
pd_df.columns = dataset.columns
|
@@ -762,7 +762,14 @@ class GradientBoostingRegressor(BaseTransformer):
|
|
762
762
|
) -> List[str]:
|
763
763
|
# in case the inferred output column names dimension is different
|
764
764
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
765
|
-
|
765
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
766
|
+
|
767
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
768
|
+
# seen during the fit.
|
769
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
770
|
+
sample_pd_df.columns = snowpark_column_names
|
771
|
+
|
772
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
766
773
|
output_df_columns = list(output_df_pd.columns)
|
767
774
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
768
775
|
if self.sample_weight_col:
|
@@ -401,7 +401,7 @@ class HistGradientBoostingClassifier(BaseTransformer):
|
|
401
401
|
inspect.currentframe(), HistGradientBoostingClassifier.__class__.__name__
|
402
402
|
),
|
403
403
|
api_calls=[Session.call],
|
404
|
-
custom_tags=
|
404
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
405
405
|
)
|
406
406
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
407
407
|
pd_df.columns = dataset.columns
|
@@ -734,7 +734,14 @@ class HistGradientBoostingClassifier(BaseTransformer):
|
|
734
734
|
) -> List[str]:
|
735
735
|
# in case the inferred output column names dimension is different
|
736
736
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
737
|
-
|
737
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
738
|
+
|
739
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
740
|
+
# seen during the fit.
|
741
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
742
|
+
sample_pd_df.columns = snowpark_column_names
|
743
|
+
|
744
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
738
745
|
output_df_columns = list(output_df_pd.columns)
|
739
746
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
740
747
|
if self.sample_weight_col:
|
@@ -392,7 +392,7 @@ class HistGradientBoostingRegressor(BaseTransformer):
|
|
392
392
|
inspect.currentframe(), HistGradientBoostingRegressor.__class__.__name__
|
393
393
|
),
|
394
394
|
api_calls=[Session.call],
|
395
|
-
custom_tags=
|
395
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
396
396
|
)
|
397
397
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
398
398
|
pd_df.columns = dataset.columns
|
@@ -725,7 +725,14 @@ class HistGradientBoostingRegressor(BaseTransformer):
|
|
725
725
|
) -> List[str]:
|
726
726
|
# in case the inferred output column names dimension is different
|
727
727
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
728
|
-
|
728
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
729
|
+
|
730
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
731
|
+
# seen during the fit.
|
732
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
733
|
+
sample_pd_df.columns = snowpark_column_names
|
734
|
+
|
735
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
729
736
|
output_df_columns = list(output_df_pd.columns)
|
730
737
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
731
738
|
if self.sample_weight_col:
|
@@ -292,7 +292,7 @@ class IsolationForest(BaseTransformer):
|
|
292
292
|
inspect.currentframe(), IsolationForest.__class__.__name__
|
293
293
|
),
|
294
294
|
api_calls=[Session.call],
|
295
|
-
custom_tags=
|
295
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
296
296
|
)
|
297
297
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
298
298
|
pd_df.columns = dataset.columns
|
@@ -627,7 +627,14 @@ class IsolationForest(BaseTransformer):
|
|
627
627
|
) -> List[str]:
|
628
628
|
# in case the inferred output column names dimension is different
|
629
629
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
630
|
-
|
630
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
631
|
+
|
632
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
633
|
+
# seen during the fit.
|
634
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
635
|
+
sample_pd_df.columns = snowpark_column_names
|
636
|
+
|
637
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
631
638
|
output_df_columns = list(output_df_pd.columns)
|
632
639
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
633
640
|
if self.sample_weight_col:
|
@@ -404,7 +404,7 @@ class RandomForestClassifier(BaseTransformer):
|
|
404
404
|
inspect.currentframe(), RandomForestClassifier.__class__.__name__
|
405
405
|
),
|
406
406
|
api_calls=[Session.call],
|
407
|
-
custom_tags=
|
407
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
408
408
|
)
|
409
409
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
410
410
|
pd_df.columns = dataset.columns
|
@@ -737,7 +737,14 @@ class RandomForestClassifier(BaseTransformer):
|
|
737
737
|
) -> List[str]:
|
738
738
|
# in case the inferred output column names dimension is different
|
739
739
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
740
|
-
|
740
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
741
|
+
|
742
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
743
|
+
# seen during the fit.
|
744
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
745
|
+
sample_pd_df.columns = snowpark_column_names
|
746
|
+
|
747
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
741
748
|
output_df_columns = list(output_df_pd.columns)
|
742
749
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
743
750
|
if self.sample_weight_col:
|
@@ -383,7 +383,7 @@ class RandomForestRegressor(BaseTransformer):
|
|
383
383
|
inspect.currentframe(), RandomForestRegressor.__class__.__name__
|
384
384
|
),
|
385
385
|
api_calls=[Session.call],
|
386
|
-
custom_tags=
|
386
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
387
387
|
)
|
388
388
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
389
389
|
pd_df.columns = dataset.columns
|
@@ -716,7 +716,14 @@ class RandomForestRegressor(BaseTransformer):
|
|
716
716
|
) -> List[str]:
|
717
717
|
# in case the inferred output column names dimension is different
|
718
718
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
719
|
-
|
719
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
720
|
+
|
721
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
722
|
+
# seen during the fit.
|
723
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
724
|
+
sample_pd_df.columns = snowpark_column_names
|
725
|
+
|
726
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
720
727
|
output_df_columns = list(output_df_pd.columns)
|
721
728
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
722
729
|
if self.sample_weight_col:
|
@@ -284,7 +284,7 @@ class StackingRegressor(BaseTransformer):
|
|
284
284
|
inspect.currentframe(), StackingRegressor.__class__.__name__
|
285
285
|
),
|
286
286
|
api_calls=[Session.call],
|
287
|
-
custom_tags=
|
287
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
288
288
|
)
|
289
289
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
290
290
|
pd_df.columns = dataset.columns
|
@@ -621,7 +621,14 @@ class StackingRegressor(BaseTransformer):
|
|
621
621
|
) -> List[str]:
|
622
622
|
# in case the inferred output column names dimension is different
|
623
623
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
624
|
-
|
624
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
625
|
+
|
626
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
627
|
+
# seen during the fit.
|
628
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
629
|
+
sample_pd_df.columns = snowpark_column_names
|
630
|
+
|
631
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
625
632
|
output_df_columns = list(output_df_pd.columns)
|
626
633
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
627
634
|
if self.sample_weight_col:
|
@@ -266,7 +266,7 @@ class VotingClassifier(BaseTransformer):
|
|
266
266
|
inspect.currentframe(), VotingClassifier.__class__.__name__
|
267
267
|
),
|
268
268
|
api_calls=[Session.call],
|
269
|
-
custom_tags=
|
269
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
270
270
|
)
|
271
271
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
272
272
|
pd_df.columns = dataset.columns
|
@@ -603,7 +603,14 @@ class VotingClassifier(BaseTransformer):
|
|
603
603
|
) -> List[str]:
|
604
604
|
# in case the inferred output column names dimension is different
|
605
605
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
606
|
-
|
606
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
607
|
+
|
608
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
609
|
+
# seen during the fit.
|
610
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
611
|
+
sample_pd_df.columns = snowpark_column_names
|
612
|
+
|
613
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
607
614
|
output_df_columns = list(output_df_pd.columns)
|
608
615
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
609
616
|
if self.sample_weight_col:
|