snowflake-ml-python 1.5.1__py3-none-any.whl → 1.5.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/cortex/_complete.py +26 -5
- snowflake/cortex/_sentiment.py +7 -4
- snowflake/cortex/_sse_client.py +81 -0
- snowflake/cortex/_util.py +105 -8
- snowflake/ml/_internal/lineage/lineage_utils.py +34 -25
- snowflake/ml/_internal/utils/temp_file_utils.py +5 -2
- snowflake/ml/dataset/dataset.py +15 -12
- snowflake/ml/dataset/dataset_factory.py +3 -4
- snowflake/ml/feature_store/access_manager.py +34 -30
- snowflake/ml/feature_store/feature_store.py +3 -3
- snowflake/ml/feature_store/feature_view.py +12 -11
- snowflake/ml/fileset/snowfs.py +2 -31
- snowflake/ml/model/_client/ops/model_ops.py +43 -0
- snowflake/ml/model/_client/sql/model_version.py +55 -3
- snowflake/ml/model/_model_composer/model_composer.py +7 -3
- snowflake/ml/model/_model_composer/model_method/infer_table_function.py_template +3 -1
- snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
- snowflake/ml/model/_packager/model_meta/model_meta.py +1 -3
- snowflake/ml/model/_packager/model_runtime/_snowml_inference_alternative_requirements.py +1 -1
- snowflake/ml/model/_packager/model_runtime/model_runtime.py +3 -27
- snowflake/ml/model/_signatures/builtins_handler.py +2 -1
- snowflake/ml/model/_signatures/core.py +13 -1
- snowflake/ml/model/_signatures/pandas_handler.py +2 -0
- snowflake/ml/model/_signatures/snowpark_handler.py +3 -3
- snowflake/ml/model/model_signature.py +2 -0
- snowflake/ml/model/type_hints.py +1 -0
- snowflake/ml/modeling/_internal/estimator_utils.py +58 -1
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +196 -242
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_search_udf_file.py +161 -0
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +38 -18
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +82 -134
- snowflake/ml/modeling/_internal/snowpark_implementations/xgboost_external_memory_trainer.py +21 -17
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +9 -2
- snowflake/ml/modeling/cluster/affinity_propagation.py +9 -2
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +9 -2
- snowflake/ml/modeling/cluster/birch.py +9 -2
- snowflake/ml/modeling/cluster/bisecting_k_means.py +9 -2
- snowflake/ml/modeling/cluster/dbscan.py +9 -2
- snowflake/ml/modeling/cluster/feature_agglomeration.py +9 -2
- snowflake/ml/modeling/cluster/k_means.py +9 -2
- snowflake/ml/modeling/cluster/mean_shift.py +9 -2
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +9 -2
- snowflake/ml/modeling/cluster/optics.py +9 -2
- snowflake/ml/modeling/cluster/spectral_biclustering.py +9 -2
- snowflake/ml/modeling/cluster/spectral_clustering.py +9 -2
- snowflake/ml/modeling/cluster/spectral_coclustering.py +9 -2
- snowflake/ml/modeling/compose/column_transformer.py +9 -2
- snowflake/ml/modeling/compose/transformed_target_regressor.py +9 -2
- snowflake/ml/modeling/covariance/elliptic_envelope.py +9 -2
- snowflake/ml/modeling/covariance/empirical_covariance.py +9 -2
- snowflake/ml/modeling/covariance/graphical_lasso.py +9 -2
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +9 -2
- snowflake/ml/modeling/covariance/ledoit_wolf.py +9 -2
- snowflake/ml/modeling/covariance/min_cov_det.py +9 -2
- snowflake/ml/modeling/covariance/oas.py +9 -2
- snowflake/ml/modeling/covariance/shrunk_covariance.py +9 -2
- snowflake/ml/modeling/decomposition/dictionary_learning.py +9 -2
- snowflake/ml/modeling/decomposition/factor_analysis.py +9 -2
- snowflake/ml/modeling/decomposition/fast_ica.py +9 -2
- snowflake/ml/modeling/decomposition/incremental_pca.py +9 -2
- snowflake/ml/modeling/decomposition/kernel_pca.py +9 -2
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +9 -2
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +9 -2
- snowflake/ml/modeling/decomposition/pca.py +9 -2
- snowflake/ml/modeling/decomposition/sparse_pca.py +9 -2
- snowflake/ml/modeling/decomposition/truncated_svd.py +9 -2
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +9 -2
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +9 -2
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +9 -2
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +9 -2
- snowflake/ml/modeling/ensemble/bagging_classifier.py +9 -2
- snowflake/ml/modeling/ensemble/bagging_regressor.py +9 -2
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +9 -2
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +9 -2
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +9 -2
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +9 -2
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +9 -2
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +9 -2
- snowflake/ml/modeling/ensemble/isolation_forest.py +9 -2
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +9 -2
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +9 -2
- snowflake/ml/modeling/ensemble/stacking_regressor.py +9 -2
- snowflake/ml/modeling/ensemble/voting_classifier.py +9 -2
- snowflake/ml/modeling/ensemble/voting_regressor.py +9 -2
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +9 -2
- snowflake/ml/modeling/feature_selection/select_fdr.py +9 -2
- snowflake/ml/modeling/feature_selection/select_fpr.py +9 -2
- snowflake/ml/modeling/feature_selection/select_fwe.py +9 -2
- snowflake/ml/modeling/feature_selection/select_k_best.py +9 -2
- snowflake/ml/modeling/feature_selection/select_percentile.py +9 -2
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +9 -2
- snowflake/ml/modeling/feature_selection/variance_threshold.py +9 -2
- snowflake/ml/modeling/framework/base.py +3 -8
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +9 -2
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +9 -2
- snowflake/ml/modeling/impute/iterative_imputer.py +9 -2
- snowflake/ml/modeling/impute/knn_imputer.py +9 -2
- snowflake/ml/modeling/impute/missing_indicator.py +9 -2
- snowflake/ml/modeling/impute/simple_imputer.py +28 -5
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +9 -2
- snowflake/ml/modeling/kernel_approximation/nystroem.py +9 -2
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +9 -2
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +9 -2
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +9 -2
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +9 -2
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +9 -2
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +9 -2
- snowflake/ml/modeling/linear_model/ard_regression.py +9 -2
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +9 -2
- snowflake/ml/modeling/linear_model/elastic_net.py +9 -2
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +9 -2
- snowflake/ml/modeling/linear_model/gamma_regressor.py +9 -2
- snowflake/ml/modeling/linear_model/huber_regressor.py +9 -2
- snowflake/ml/modeling/linear_model/lars.py +9 -2
- snowflake/ml/modeling/linear_model/lars_cv.py +9 -2
- snowflake/ml/modeling/linear_model/lasso.py +9 -2
- snowflake/ml/modeling/linear_model/lasso_cv.py +9 -2
- snowflake/ml/modeling/linear_model/lasso_lars.py +9 -2
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +9 -2
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +9 -2
- snowflake/ml/modeling/linear_model/linear_regression.py +9 -2
- snowflake/ml/modeling/linear_model/logistic_regression.py +9 -2
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +9 -2
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +9 -2
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +9 -2
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +9 -2
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +9 -2
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +9 -2
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +9 -2
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +9 -2
- snowflake/ml/modeling/linear_model/perceptron.py +9 -2
- snowflake/ml/modeling/linear_model/poisson_regressor.py +9 -2
- snowflake/ml/modeling/linear_model/ransac_regressor.py +9 -2
- snowflake/ml/modeling/linear_model/ridge.py +9 -2
- snowflake/ml/modeling/linear_model/ridge_classifier.py +9 -2
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +9 -2
- snowflake/ml/modeling/linear_model/ridge_cv.py +9 -2
- snowflake/ml/modeling/linear_model/sgd_classifier.py +9 -2
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +9 -2
- snowflake/ml/modeling/linear_model/sgd_regressor.py +9 -2
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +9 -2
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +9 -2
- snowflake/ml/modeling/manifold/isomap.py +9 -2
- snowflake/ml/modeling/manifold/mds.py +9 -2
- snowflake/ml/modeling/manifold/spectral_embedding.py +9 -2
- snowflake/ml/modeling/manifold/tsne.py +9 -2
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +9 -2
- snowflake/ml/modeling/mixture/gaussian_mixture.py +9 -2
- snowflake/ml/modeling/model_selection/grid_search_cv.py +1 -5
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +1 -5
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +9 -2
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +9 -2
- snowflake/ml/modeling/multiclass/output_code_classifier.py +9 -2
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +9 -2
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +9 -2
- snowflake/ml/modeling/naive_bayes/complement_nb.py +9 -2
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +9 -2
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +9 -2
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +9 -2
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +9 -2
- snowflake/ml/modeling/neighbors/kernel_density.py +9 -2
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +9 -2
- snowflake/ml/modeling/neighbors/nearest_centroid.py +9 -2
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +9 -2
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +9 -2
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +9 -2
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +9 -2
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +9 -2
- snowflake/ml/modeling/neural_network/mlp_classifier.py +9 -2
- snowflake/ml/modeling/neural_network/mlp_regressor.py +9 -2
- snowflake/ml/modeling/parameters/enable_anonymous_sproc.py +5 -0
- snowflake/ml/modeling/pipeline/pipeline.py +5 -0
- snowflake/ml/modeling/preprocessing/binarizer.py +7 -3
- snowflake/ml/modeling/preprocessing/k_bins_discretizer.py +7 -2
- snowflake/ml/modeling/preprocessing/label_encoder.py +8 -7
- snowflake/ml/modeling/preprocessing/max_abs_scaler.py +7 -3
- snowflake/ml/modeling/preprocessing/min_max_scaler.py +7 -4
- snowflake/ml/modeling/preprocessing/normalizer.py +7 -3
- snowflake/ml/modeling/preprocessing/one_hot_encoder.py +10 -2
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +8 -5
- snowflake/ml/modeling/preprocessing/polynomial_features.py +9 -2
- snowflake/ml/modeling/preprocessing/robust_scaler.py +7 -4
- snowflake/ml/modeling/preprocessing/standard_scaler.py +7 -3
- snowflake/ml/modeling/semi_supervised/label_propagation.py +9 -2
- snowflake/ml/modeling/semi_supervised/label_spreading.py +9 -2
- snowflake/ml/modeling/svm/linear_svc.py +9 -2
- snowflake/ml/modeling/svm/linear_svr.py +9 -2
- snowflake/ml/modeling/svm/nu_svc.py +9 -2
- snowflake/ml/modeling/svm/nu_svr.py +9 -2
- snowflake/ml/modeling/svm/svc.py +9 -2
- snowflake/ml/modeling/svm/svr.py +9 -2
- snowflake/ml/modeling/tree/decision_tree_classifier.py +9 -2
- snowflake/ml/modeling/tree/decision_tree_regressor.py +9 -2
- snowflake/ml/modeling/tree/extra_tree_classifier.py +9 -2
- snowflake/ml/modeling/tree/extra_tree_regressor.py +9 -2
- snowflake/ml/modeling/xgboost/xgb_classifier.py +9 -2
- snowflake/ml/modeling/xgboost/xgb_regressor.py +9 -2
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +9 -2
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +9 -2
- snowflake/ml/registry/_manager/model_manager.py +59 -1
- snowflake/ml/registry/registry.py +10 -1
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.5.1.dist-info → snowflake_ml_python-1.5.3.dist-info}/METADATA +32 -4
- {snowflake_ml_python-1.5.1.dist-info → snowflake_ml_python-1.5.3.dist-info}/RECORD +207 -204
- {snowflake_ml_python-1.5.1.dist-info → snowflake_ml_python-1.5.3.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.5.1.dist-info → snowflake_ml_python-1.5.3.dist-info}/WHEEL +0 -0
- {snowflake_ml_python-1.5.1.dist-info → snowflake_ml_python-1.5.3.dist-info}/top_level.txt +0 -0
@@ -262,7 +262,7 @@ class Birch(BaseTransformer):
|
|
262
262
|
inspect.currentframe(), Birch.__class__.__name__
|
263
263
|
),
|
264
264
|
api_calls=[Session.call],
|
265
|
-
custom_tags=
|
265
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
266
266
|
)
|
267
267
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
268
268
|
pd_df.columns = dataset.columns
|
@@ -601,7 +601,14 @@ class Birch(BaseTransformer):
|
|
601
601
|
) -> List[str]:
|
602
602
|
# in case the inferred output column names dimension is different
|
603
603
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
604
|
-
|
604
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
605
|
+
|
606
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
607
|
+
# seen during the fit.
|
608
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
609
|
+
sample_pd_df.columns = snowpark_column_names
|
610
|
+
|
611
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
605
612
|
output_df_columns = list(output_df_pd.columns)
|
606
613
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
607
614
|
if self.sample_weight_col:
|
@@ -311,7 +311,7 @@ class BisectingKMeans(BaseTransformer):
|
|
311
311
|
inspect.currentframe(), BisectingKMeans.__class__.__name__
|
312
312
|
),
|
313
313
|
api_calls=[Session.call],
|
314
|
-
custom_tags=
|
314
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
315
315
|
)
|
316
316
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
317
317
|
pd_df.columns = dataset.columns
|
@@ -650,7 +650,14 @@ class BisectingKMeans(BaseTransformer):
|
|
650
650
|
) -> List[str]:
|
651
651
|
# in case the inferred output column names dimension is different
|
652
652
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
653
|
-
|
653
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
654
|
+
|
655
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
656
|
+
# seen during the fit.
|
657
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
658
|
+
sample_pd_df.columns = snowpark_column_names
|
659
|
+
|
660
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
654
661
|
output_df_columns = list(output_df_pd.columns)
|
655
662
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
656
663
|
if self.sample_weight_col:
|
@@ -279,7 +279,7 @@ class DBSCAN(BaseTransformer):
|
|
279
279
|
inspect.currentframe(), DBSCAN.__class__.__name__
|
280
280
|
),
|
281
281
|
api_calls=[Session.call],
|
282
|
-
custom_tags=
|
282
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
283
283
|
)
|
284
284
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
285
285
|
pd_df.columns = dataset.columns
|
@@ -612,7 +612,14 @@ class DBSCAN(BaseTransformer):
|
|
612
612
|
) -> List[str]:
|
613
613
|
# in case the inferred output column names dimension is different
|
614
614
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
615
|
-
|
615
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
616
|
+
|
617
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
618
|
+
# seen during the fit.
|
619
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
620
|
+
sample_pd_df.columns = snowpark_column_names
|
621
|
+
|
622
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
616
623
|
output_df_columns = list(output_df_pd.columns)
|
617
624
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
618
625
|
if self.sample_weight_col:
|
@@ -311,7 +311,7 @@ class FeatureAgglomeration(BaseTransformer):
|
|
311
311
|
inspect.currentframe(), FeatureAgglomeration.__class__.__name__
|
312
312
|
),
|
313
313
|
api_calls=[Session.call],
|
314
|
-
custom_tags=
|
314
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
315
315
|
)
|
316
316
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
317
317
|
pd_df.columns = dataset.columns
|
@@ -648,7 +648,14 @@ class FeatureAgglomeration(BaseTransformer):
|
|
648
648
|
) -> List[str]:
|
649
649
|
# in case the inferred output column names dimension is different
|
650
650
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
651
|
-
|
651
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
652
|
+
|
653
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
654
|
+
# seen during the fit.
|
655
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
656
|
+
sample_pd_df.columns = snowpark_column_names
|
657
|
+
|
658
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
652
659
|
output_df_columns = list(output_df_pd.columns)
|
653
660
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
654
661
|
if self.sample_weight_col:
|
@@ -306,7 +306,7 @@ class KMeans(BaseTransformer):
|
|
306
306
|
inspect.currentframe(), KMeans.__class__.__name__
|
307
307
|
),
|
308
308
|
api_calls=[Session.call],
|
309
|
-
custom_tags=
|
309
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
310
310
|
)
|
311
311
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
312
312
|
pd_df.columns = dataset.columns
|
@@ -645,7 +645,14 @@ class KMeans(BaseTransformer):
|
|
645
645
|
) -> List[str]:
|
646
646
|
# in case the inferred output column names dimension is different
|
647
647
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
648
|
-
|
648
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
649
|
+
|
650
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
651
|
+
# seen during the fit.
|
652
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
653
|
+
sample_pd_df.columns = snowpark_column_names
|
654
|
+
|
655
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
649
656
|
output_df_columns = list(output_df_pd.columns)
|
650
657
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
651
658
|
if self.sample_weight_col:
|
@@ -282,7 +282,7 @@ class MeanShift(BaseTransformer):
|
|
282
282
|
inspect.currentframe(), MeanShift.__class__.__name__
|
283
283
|
),
|
284
284
|
api_calls=[Session.call],
|
285
|
-
custom_tags=
|
285
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
286
286
|
)
|
287
287
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
288
288
|
pd_df.columns = dataset.columns
|
@@ -617,7 +617,14 @@ class MeanShift(BaseTransformer):
|
|
617
617
|
) -> List[str]:
|
618
618
|
# in case the inferred output column names dimension is different
|
619
619
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
620
|
-
|
620
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
621
|
+
|
622
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
623
|
+
# seen during the fit.
|
624
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
625
|
+
sample_pd_df.columns = snowpark_column_names
|
626
|
+
|
627
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
621
628
|
output_df_columns = list(output_df_pd.columns)
|
622
629
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
623
630
|
if self.sample_weight_col:
|
@@ -332,7 +332,7 @@ class MiniBatchKMeans(BaseTransformer):
|
|
332
332
|
inspect.currentframe(), MiniBatchKMeans.__class__.__name__
|
333
333
|
),
|
334
334
|
api_calls=[Session.call],
|
335
|
-
custom_tags=
|
335
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
336
336
|
)
|
337
337
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
338
338
|
pd_df.columns = dataset.columns
|
@@ -671,7 +671,14 @@ class MiniBatchKMeans(BaseTransformer):
|
|
671
671
|
) -> List[str]:
|
672
672
|
# in case the inferred output column names dimension is different
|
673
673
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
674
|
-
|
674
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
675
|
+
|
676
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
677
|
+
# seen during the fit.
|
678
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
679
|
+
sample_pd_df.columns = snowpark_column_names
|
680
|
+
|
681
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
675
682
|
output_df_columns = list(output_df_pd.columns)
|
676
683
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
677
684
|
if self.sample_weight_col:
|
@@ -352,7 +352,7 @@ class OPTICS(BaseTransformer):
|
|
352
352
|
inspect.currentframe(), OPTICS.__class__.__name__
|
353
353
|
),
|
354
354
|
api_calls=[Session.call],
|
355
|
-
custom_tags=
|
355
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
356
356
|
)
|
357
357
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
358
358
|
pd_df.columns = dataset.columns
|
@@ -685,7 +685,14 @@ class OPTICS(BaseTransformer):
|
|
685
685
|
) -> List[str]:
|
686
686
|
# in case the inferred output column names dimension is different
|
687
687
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
688
|
-
|
688
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
689
|
+
|
690
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
691
|
+
# seen during the fit.
|
692
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
693
|
+
sample_pd_df.columns = snowpark_column_names
|
694
|
+
|
695
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
689
696
|
output_df_columns = list(output_df_pd.columns)
|
690
697
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
691
698
|
if self.sample_weight_col:
|
@@ -290,7 +290,7 @@ class SpectralBiclustering(BaseTransformer):
|
|
290
290
|
inspect.currentframe(), SpectralBiclustering.__class__.__name__
|
291
291
|
),
|
292
292
|
api_calls=[Session.call],
|
293
|
-
custom_tags=
|
293
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
294
294
|
)
|
295
295
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
296
296
|
pd_df.columns = dataset.columns
|
@@ -621,7 +621,14 @@ class SpectralBiclustering(BaseTransformer):
|
|
621
621
|
) -> List[str]:
|
622
622
|
# in case the inferred output column names dimension is different
|
623
623
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
624
|
-
|
624
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
625
|
+
|
626
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
627
|
+
# seen during the fit.
|
628
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
629
|
+
sample_pd_df.columns = snowpark_column_names
|
630
|
+
|
631
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
625
632
|
output_df_columns = list(output_df_pd.columns)
|
626
633
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
627
634
|
if self.sample_weight_col:
|
@@ -348,7 +348,7 @@ class SpectralClustering(BaseTransformer):
|
|
348
348
|
inspect.currentframe(), SpectralClustering.__class__.__name__
|
349
349
|
),
|
350
350
|
api_calls=[Session.call],
|
351
|
-
custom_tags=
|
351
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
352
352
|
)
|
353
353
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
354
354
|
pd_df.columns = dataset.columns
|
@@ -681,7 +681,14 @@ class SpectralClustering(BaseTransformer):
|
|
681
681
|
) -> List[str]:
|
682
682
|
# in case the inferred output column names dimension is different
|
683
683
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
684
|
-
|
684
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
685
|
+
|
686
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
687
|
+
# seen during the fit.
|
688
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
689
|
+
sample_pd_df.columns = snowpark_column_names
|
690
|
+
|
691
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
685
692
|
output_df_columns = list(output_df_pd.columns)
|
686
693
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
687
694
|
if self.sample_weight_col:
|
@@ -269,7 +269,7 @@ class SpectralCoclustering(BaseTransformer):
|
|
269
269
|
inspect.currentframe(), SpectralCoclustering.__class__.__name__
|
270
270
|
),
|
271
271
|
api_calls=[Session.call],
|
272
|
-
custom_tags=
|
272
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
273
273
|
)
|
274
274
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
275
275
|
pd_df.columns = dataset.columns
|
@@ -600,7 +600,14 @@ class SpectralCoclustering(BaseTransformer):
|
|
600
600
|
) -> List[str]:
|
601
601
|
# in case the inferred output column names dimension is different
|
602
602
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
603
|
-
|
603
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
604
|
+
|
605
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
606
|
+
# seen during the fit.
|
607
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
608
|
+
sample_pd_df.columns = snowpark_column_names
|
609
|
+
|
610
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
604
611
|
output_df_columns = list(output_df_pd.columns)
|
605
612
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
606
613
|
if self.sample_weight_col:
|
@@ -299,7 +299,7 @@ class ColumnTransformer(BaseTransformer):
|
|
299
299
|
inspect.currentframe(), ColumnTransformer.__class__.__name__
|
300
300
|
),
|
301
301
|
api_calls=[Session.call],
|
302
|
-
custom_tags=
|
302
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
303
303
|
)
|
304
304
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
305
305
|
pd_df.columns = dataset.columns
|
@@ -634,7 +634,14 @@ class ColumnTransformer(BaseTransformer):
|
|
634
634
|
) -> List[str]:
|
635
635
|
# in case the inferred output column names dimension is different
|
636
636
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
637
|
-
|
637
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
638
|
+
|
639
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
640
|
+
# seen during the fit.
|
641
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
642
|
+
sample_pd_df.columns = snowpark_column_names
|
643
|
+
|
644
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
638
645
|
output_df_columns = list(output_df_pd.columns)
|
639
646
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
640
647
|
if self.sample_weight_col:
|
@@ -260,7 +260,7 @@ class TransformedTargetRegressor(BaseTransformer):
|
|
260
260
|
inspect.currentframe(), TransformedTargetRegressor.__class__.__name__
|
261
261
|
),
|
262
262
|
api_calls=[Session.call],
|
263
|
-
custom_tags=
|
263
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
264
264
|
)
|
265
265
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
266
266
|
pd_df.columns = dataset.columns
|
@@ -593,7 +593,14 @@ class TransformedTargetRegressor(BaseTransformer):
|
|
593
593
|
) -> List[str]:
|
594
594
|
# in case the inferred output column names dimension is different
|
595
595
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
596
|
-
|
596
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
597
|
+
|
598
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
599
|
+
# seen during the fit.
|
600
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
601
|
+
sample_pd_df.columns = snowpark_column_names
|
602
|
+
|
603
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
597
604
|
output_df_columns = list(output_df_pd.columns)
|
598
605
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
599
606
|
if self.sample_weight_col:
|
@@ -255,7 +255,7 @@ class EllipticEnvelope(BaseTransformer):
|
|
255
255
|
inspect.currentframe(), EllipticEnvelope.__class__.__name__
|
256
256
|
),
|
257
257
|
api_calls=[Session.call],
|
258
|
-
custom_tags=
|
258
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
259
259
|
)
|
260
260
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
261
261
|
pd_df.columns = dataset.columns
|
@@ -590,7 +590,14 @@ class EllipticEnvelope(BaseTransformer):
|
|
590
590
|
) -> List[str]:
|
591
591
|
# in case the inferred output column names dimension is different
|
592
592
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
593
|
-
|
593
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
594
|
+
|
595
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
596
|
+
# seen during the fit.
|
597
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
598
|
+
sample_pd_df.columns = snowpark_column_names
|
599
|
+
|
600
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
594
601
|
output_df_columns = list(output_df_pd.columns)
|
595
602
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
596
603
|
if self.sample_weight_col:
|
@@ -231,7 +231,7 @@ class EmpiricalCovariance(BaseTransformer):
|
|
231
231
|
inspect.currentframe(), EmpiricalCovariance.__class__.__name__
|
232
232
|
),
|
233
233
|
api_calls=[Session.call],
|
234
|
-
custom_tags=
|
234
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
235
235
|
)
|
236
236
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
237
237
|
pd_df.columns = dataset.columns
|
@@ -562,7 +562,14 @@ class EmpiricalCovariance(BaseTransformer):
|
|
562
562
|
) -> List[str]:
|
563
563
|
# in case the inferred output column names dimension is different
|
564
564
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
565
|
-
|
565
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
566
|
+
|
567
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
568
|
+
# seen during the fit.
|
569
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
570
|
+
sample_pd_df.columns = snowpark_column_names
|
571
|
+
|
572
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
566
573
|
output_df_columns = list(output_df_pd.columns)
|
567
574
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
568
575
|
if self.sample_weight_col:
|
@@ -279,7 +279,7 @@ class GraphicalLasso(BaseTransformer):
|
|
279
279
|
inspect.currentframe(), GraphicalLasso.__class__.__name__
|
280
280
|
),
|
281
281
|
api_calls=[Session.call],
|
282
|
-
custom_tags=
|
282
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
283
283
|
)
|
284
284
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
285
285
|
pd_df.columns = dataset.columns
|
@@ -610,7 +610,14 @@ class GraphicalLasso(BaseTransformer):
|
|
610
610
|
) -> List[str]:
|
611
611
|
# in case the inferred output column names dimension is different
|
612
612
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
613
|
-
|
613
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
614
|
+
|
615
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
616
|
+
# seen during the fit.
|
617
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
618
|
+
sample_pd_df.columns = snowpark_column_names
|
619
|
+
|
620
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
614
621
|
output_df_columns = list(output_df_pd.columns)
|
615
622
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
616
623
|
if self.sample_weight_col:
|
@@ -305,7 +305,7 @@ class GraphicalLassoCV(BaseTransformer):
|
|
305
305
|
inspect.currentframe(), GraphicalLassoCV.__class__.__name__
|
306
306
|
),
|
307
307
|
api_calls=[Session.call],
|
308
|
-
custom_tags=
|
308
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
309
309
|
)
|
310
310
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
311
311
|
pd_df.columns = dataset.columns
|
@@ -636,7 +636,14 @@ class GraphicalLassoCV(BaseTransformer):
|
|
636
636
|
) -> List[str]:
|
637
637
|
# in case the inferred output column names dimension is different
|
638
638
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
639
|
-
|
639
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
640
|
+
|
641
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
642
|
+
# seen during the fit.
|
643
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
644
|
+
sample_pd_df.columns = snowpark_column_names
|
645
|
+
|
646
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
640
647
|
output_df_columns = list(output_df_pd.columns)
|
641
648
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
642
649
|
if self.sample_weight_col:
|
@@ -238,7 +238,7 @@ class LedoitWolf(BaseTransformer):
|
|
238
238
|
inspect.currentframe(), LedoitWolf.__class__.__name__
|
239
239
|
),
|
240
240
|
api_calls=[Session.call],
|
241
|
-
custom_tags=
|
241
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
242
242
|
)
|
243
243
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
244
244
|
pd_df.columns = dataset.columns
|
@@ -569,7 +569,14 @@ class LedoitWolf(BaseTransformer):
|
|
569
569
|
) -> List[str]:
|
570
570
|
# in case the inferred output column names dimension is different
|
571
571
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
572
|
-
|
572
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
573
|
+
|
574
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
575
|
+
# seen during the fit.
|
576
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
577
|
+
sample_pd_df.columns = snowpark_column_names
|
578
|
+
|
579
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
573
580
|
output_df_columns = list(output_df_pd.columns)
|
574
581
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
575
582
|
if self.sample_weight_col:
|
@@ -250,7 +250,7 @@ class MinCovDet(BaseTransformer):
|
|
250
250
|
inspect.currentframe(), MinCovDet.__class__.__name__
|
251
251
|
),
|
252
252
|
api_calls=[Session.call],
|
253
|
-
custom_tags=
|
253
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
254
254
|
)
|
255
255
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
256
256
|
pd_df.columns = dataset.columns
|
@@ -581,7 +581,14 @@ class MinCovDet(BaseTransformer):
|
|
581
581
|
) -> List[str]:
|
582
582
|
# in case the inferred output column names dimension is different
|
583
583
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
584
|
-
|
584
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
585
|
+
|
586
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
587
|
+
# seen during the fit.
|
588
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
589
|
+
sample_pd_df.columns = snowpark_column_names
|
590
|
+
|
591
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
585
592
|
output_df_columns = list(output_df_pd.columns)
|
586
593
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
587
594
|
if self.sample_weight_col:
|
@@ -231,7 +231,7 @@ class OAS(BaseTransformer):
|
|
231
231
|
inspect.currentframe(), OAS.__class__.__name__
|
232
232
|
),
|
233
233
|
api_calls=[Session.call],
|
234
|
-
custom_tags=
|
234
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
235
235
|
)
|
236
236
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
237
237
|
pd_df.columns = dataset.columns
|
@@ -562,7 +562,14 @@ class OAS(BaseTransformer):
|
|
562
562
|
) -> List[str]:
|
563
563
|
# in case the inferred output column names dimension is different
|
564
564
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
565
|
-
|
565
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
566
|
+
|
567
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
568
|
+
# seen during the fit.
|
569
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
570
|
+
sample_pd_df.columns = snowpark_column_names
|
571
|
+
|
572
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
566
573
|
output_df_columns = list(output_df_pd.columns)
|
567
574
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
568
575
|
if self.sample_weight_col:
|
@@ -237,7 +237,7 @@ class ShrunkCovariance(BaseTransformer):
|
|
237
237
|
inspect.currentframe(), ShrunkCovariance.__class__.__name__
|
238
238
|
),
|
239
239
|
api_calls=[Session.call],
|
240
|
-
custom_tags=
|
240
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
241
241
|
)
|
242
242
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
243
243
|
pd_df.columns = dataset.columns
|
@@ -568,7 +568,14 @@ class ShrunkCovariance(BaseTransformer):
|
|
568
568
|
) -> List[str]:
|
569
569
|
# in case the inferred output column names dimension is different
|
570
570
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
571
|
-
|
571
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
572
|
+
|
573
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
574
|
+
# seen during the fit.
|
575
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
576
|
+
sample_pd_df.columns = snowpark_column_names
|
577
|
+
|
578
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
572
579
|
output_df_columns = list(output_df_pd.columns)
|
573
580
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
574
581
|
if self.sample_weight_col:
|
@@ -343,7 +343,7 @@ class DictionaryLearning(BaseTransformer):
|
|
343
343
|
inspect.currentframe(), DictionaryLearning.__class__.__name__
|
344
344
|
),
|
345
345
|
api_calls=[Session.call],
|
346
|
-
custom_tags=
|
346
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
347
347
|
)
|
348
348
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
349
349
|
pd_df.columns = dataset.columns
|
@@ -678,7 +678,14 @@ class DictionaryLearning(BaseTransformer):
|
|
678
678
|
) -> List[str]:
|
679
679
|
# in case the inferred output column names dimension is different
|
680
680
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
681
|
-
|
681
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
682
|
+
|
683
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
684
|
+
# seen during the fit.
|
685
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
686
|
+
sample_pd_df.columns = snowpark_column_names
|
687
|
+
|
688
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
682
689
|
output_df_columns = list(output_df_pd.columns)
|
683
690
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
684
691
|
if self.sample_weight_col:
|
@@ -280,7 +280,7 @@ class FactorAnalysis(BaseTransformer):
|
|
280
280
|
inspect.currentframe(), FactorAnalysis.__class__.__name__
|
281
281
|
),
|
282
282
|
api_calls=[Session.call],
|
283
|
-
custom_tags=
|
283
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
284
284
|
)
|
285
285
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
286
286
|
pd_df.columns = dataset.columns
|
@@ -615,7 +615,14 @@ class FactorAnalysis(BaseTransformer):
|
|
615
615
|
) -> List[str]:
|
616
616
|
# in case the inferred output column names dimension is different
|
617
617
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
618
|
-
|
618
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
619
|
+
|
620
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
621
|
+
# seen during the fit.
|
622
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
623
|
+
sample_pd_df.columns = snowpark_column_names
|
624
|
+
|
625
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
619
626
|
output_df_columns = list(output_df_pd.columns)
|
620
627
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
621
628
|
if self.sample_weight_col:
|
@@ -298,7 +298,7 @@ class FastICA(BaseTransformer):
|
|
298
298
|
inspect.currentframe(), FastICA.__class__.__name__
|
299
299
|
),
|
300
300
|
api_calls=[Session.call],
|
301
|
-
custom_tags=
|
301
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
302
302
|
)
|
303
303
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
304
304
|
pd_df.columns = dataset.columns
|
@@ -633,7 +633,14 @@ class FastICA(BaseTransformer):
|
|
633
633
|
) -> List[str]:
|
634
634
|
# in case the inferred output column names dimension is different
|
635
635
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
636
|
-
|
636
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
637
|
+
|
638
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
639
|
+
# seen during the fit.
|
640
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
641
|
+
sample_pd_df.columns = snowpark_column_names
|
642
|
+
|
643
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
637
644
|
output_df_columns = list(output_df_pd.columns)
|
638
645
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
639
646
|
if self.sample_weight_col:
|