snowflake-ml-python 1.5.1__py3-none-any.whl → 1.5.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/cortex/_complete.py +26 -5
- snowflake/cortex/_sentiment.py +7 -4
- snowflake/cortex/_sse_client.py +81 -0
- snowflake/cortex/_util.py +105 -8
- snowflake/ml/_internal/lineage/lineage_utils.py +34 -25
- snowflake/ml/_internal/utils/temp_file_utils.py +5 -2
- snowflake/ml/dataset/dataset.py +15 -12
- snowflake/ml/dataset/dataset_factory.py +3 -4
- snowflake/ml/feature_store/access_manager.py +34 -30
- snowflake/ml/feature_store/feature_store.py +3 -3
- snowflake/ml/feature_store/feature_view.py +12 -11
- snowflake/ml/fileset/snowfs.py +2 -31
- snowflake/ml/model/_client/ops/model_ops.py +43 -0
- snowflake/ml/model/_client/sql/model_version.py +55 -3
- snowflake/ml/model/_model_composer/model_composer.py +7 -3
- snowflake/ml/model/_model_composer/model_method/infer_table_function.py_template +3 -1
- snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
- snowflake/ml/model/_packager/model_meta/model_meta.py +1 -3
- snowflake/ml/model/_packager/model_runtime/_snowml_inference_alternative_requirements.py +1 -1
- snowflake/ml/model/_packager/model_runtime/model_runtime.py +3 -27
- snowflake/ml/model/_signatures/builtins_handler.py +2 -1
- snowflake/ml/model/_signatures/core.py +13 -1
- snowflake/ml/model/_signatures/pandas_handler.py +2 -0
- snowflake/ml/model/_signatures/snowpark_handler.py +3 -3
- snowflake/ml/model/model_signature.py +2 -0
- snowflake/ml/model/type_hints.py +1 -0
- snowflake/ml/modeling/_internal/estimator_utils.py +58 -1
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +196 -242
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_search_udf_file.py +161 -0
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +38 -18
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +82 -134
- snowflake/ml/modeling/_internal/snowpark_implementations/xgboost_external_memory_trainer.py +21 -17
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +9 -2
- snowflake/ml/modeling/cluster/affinity_propagation.py +9 -2
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +9 -2
- snowflake/ml/modeling/cluster/birch.py +9 -2
- snowflake/ml/modeling/cluster/bisecting_k_means.py +9 -2
- snowflake/ml/modeling/cluster/dbscan.py +9 -2
- snowflake/ml/modeling/cluster/feature_agglomeration.py +9 -2
- snowflake/ml/modeling/cluster/k_means.py +9 -2
- snowflake/ml/modeling/cluster/mean_shift.py +9 -2
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +9 -2
- snowflake/ml/modeling/cluster/optics.py +9 -2
- snowflake/ml/modeling/cluster/spectral_biclustering.py +9 -2
- snowflake/ml/modeling/cluster/spectral_clustering.py +9 -2
- snowflake/ml/modeling/cluster/spectral_coclustering.py +9 -2
- snowflake/ml/modeling/compose/column_transformer.py +9 -2
- snowflake/ml/modeling/compose/transformed_target_regressor.py +9 -2
- snowflake/ml/modeling/covariance/elliptic_envelope.py +9 -2
- snowflake/ml/modeling/covariance/empirical_covariance.py +9 -2
- snowflake/ml/modeling/covariance/graphical_lasso.py +9 -2
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +9 -2
- snowflake/ml/modeling/covariance/ledoit_wolf.py +9 -2
- snowflake/ml/modeling/covariance/min_cov_det.py +9 -2
- snowflake/ml/modeling/covariance/oas.py +9 -2
- snowflake/ml/modeling/covariance/shrunk_covariance.py +9 -2
- snowflake/ml/modeling/decomposition/dictionary_learning.py +9 -2
- snowflake/ml/modeling/decomposition/factor_analysis.py +9 -2
- snowflake/ml/modeling/decomposition/fast_ica.py +9 -2
- snowflake/ml/modeling/decomposition/incremental_pca.py +9 -2
- snowflake/ml/modeling/decomposition/kernel_pca.py +9 -2
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +9 -2
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +9 -2
- snowflake/ml/modeling/decomposition/pca.py +9 -2
- snowflake/ml/modeling/decomposition/sparse_pca.py +9 -2
- snowflake/ml/modeling/decomposition/truncated_svd.py +9 -2
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +9 -2
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +9 -2
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +9 -2
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +9 -2
- snowflake/ml/modeling/ensemble/bagging_classifier.py +9 -2
- snowflake/ml/modeling/ensemble/bagging_regressor.py +9 -2
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +9 -2
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +9 -2
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +9 -2
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +9 -2
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +9 -2
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +9 -2
- snowflake/ml/modeling/ensemble/isolation_forest.py +9 -2
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +9 -2
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +9 -2
- snowflake/ml/modeling/ensemble/stacking_regressor.py +9 -2
- snowflake/ml/modeling/ensemble/voting_classifier.py +9 -2
- snowflake/ml/modeling/ensemble/voting_regressor.py +9 -2
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +9 -2
- snowflake/ml/modeling/feature_selection/select_fdr.py +9 -2
- snowflake/ml/modeling/feature_selection/select_fpr.py +9 -2
- snowflake/ml/modeling/feature_selection/select_fwe.py +9 -2
- snowflake/ml/modeling/feature_selection/select_k_best.py +9 -2
- snowflake/ml/modeling/feature_selection/select_percentile.py +9 -2
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +9 -2
- snowflake/ml/modeling/feature_selection/variance_threshold.py +9 -2
- snowflake/ml/modeling/framework/base.py +3 -8
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +9 -2
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +9 -2
- snowflake/ml/modeling/impute/iterative_imputer.py +9 -2
- snowflake/ml/modeling/impute/knn_imputer.py +9 -2
- snowflake/ml/modeling/impute/missing_indicator.py +9 -2
- snowflake/ml/modeling/impute/simple_imputer.py +28 -5
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +9 -2
- snowflake/ml/modeling/kernel_approximation/nystroem.py +9 -2
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +9 -2
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +9 -2
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +9 -2
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +9 -2
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +9 -2
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +9 -2
- snowflake/ml/modeling/linear_model/ard_regression.py +9 -2
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +9 -2
- snowflake/ml/modeling/linear_model/elastic_net.py +9 -2
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +9 -2
- snowflake/ml/modeling/linear_model/gamma_regressor.py +9 -2
- snowflake/ml/modeling/linear_model/huber_regressor.py +9 -2
- snowflake/ml/modeling/linear_model/lars.py +9 -2
- snowflake/ml/modeling/linear_model/lars_cv.py +9 -2
- snowflake/ml/modeling/linear_model/lasso.py +9 -2
- snowflake/ml/modeling/linear_model/lasso_cv.py +9 -2
- snowflake/ml/modeling/linear_model/lasso_lars.py +9 -2
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +9 -2
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +9 -2
- snowflake/ml/modeling/linear_model/linear_regression.py +9 -2
- snowflake/ml/modeling/linear_model/logistic_regression.py +9 -2
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +9 -2
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +9 -2
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +9 -2
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +9 -2
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +9 -2
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +9 -2
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +9 -2
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +9 -2
- snowflake/ml/modeling/linear_model/perceptron.py +9 -2
- snowflake/ml/modeling/linear_model/poisson_regressor.py +9 -2
- snowflake/ml/modeling/linear_model/ransac_regressor.py +9 -2
- snowflake/ml/modeling/linear_model/ridge.py +9 -2
- snowflake/ml/modeling/linear_model/ridge_classifier.py +9 -2
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +9 -2
- snowflake/ml/modeling/linear_model/ridge_cv.py +9 -2
- snowflake/ml/modeling/linear_model/sgd_classifier.py +9 -2
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +9 -2
- snowflake/ml/modeling/linear_model/sgd_regressor.py +9 -2
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +9 -2
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +9 -2
- snowflake/ml/modeling/manifold/isomap.py +9 -2
- snowflake/ml/modeling/manifold/mds.py +9 -2
- snowflake/ml/modeling/manifold/spectral_embedding.py +9 -2
- snowflake/ml/modeling/manifold/tsne.py +9 -2
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +9 -2
- snowflake/ml/modeling/mixture/gaussian_mixture.py +9 -2
- snowflake/ml/modeling/model_selection/grid_search_cv.py +1 -5
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +1 -5
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +9 -2
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +9 -2
- snowflake/ml/modeling/multiclass/output_code_classifier.py +9 -2
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +9 -2
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +9 -2
- snowflake/ml/modeling/naive_bayes/complement_nb.py +9 -2
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +9 -2
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +9 -2
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +9 -2
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +9 -2
- snowflake/ml/modeling/neighbors/kernel_density.py +9 -2
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +9 -2
- snowflake/ml/modeling/neighbors/nearest_centroid.py +9 -2
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +9 -2
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +9 -2
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +9 -2
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +9 -2
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +9 -2
- snowflake/ml/modeling/neural_network/mlp_classifier.py +9 -2
- snowflake/ml/modeling/neural_network/mlp_regressor.py +9 -2
- snowflake/ml/modeling/parameters/enable_anonymous_sproc.py +5 -0
- snowflake/ml/modeling/pipeline/pipeline.py +5 -0
- snowflake/ml/modeling/preprocessing/binarizer.py +7 -3
- snowflake/ml/modeling/preprocessing/k_bins_discretizer.py +7 -2
- snowflake/ml/modeling/preprocessing/label_encoder.py +8 -7
- snowflake/ml/modeling/preprocessing/max_abs_scaler.py +7 -3
- snowflake/ml/modeling/preprocessing/min_max_scaler.py +7 -4
- snowflake/ml/modeling/preprocessing/normalizer.py +7 -3
- snowflake/ml/modeling/preprocessing/one_hot_encoder.py +10 -2
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +8 -5
- snowflake/ml/modeling/preprocessing/polynomial_features.py +9 -2
- snowflake/ml/modeling/preprocessing/robust_scaler.py +7 -4
- snowflake/ml/modeling/preprocessing/standard_scaler.py +7 -3
- snowflake/ml/modeling/semi_supervised/label_propagation.py +9 -2
- snowflake/ml/modeling/semi_supervised/label_spreading.py +9 -2
- snowflake/ml/modeling/svm/linear_svc.py +9 -2
- snowflake/ml/modeling/svm/linear_svr.py +9 -2
- snowflake/ml/modeling/svm/nu_svc.py +9 -2
- snowflake/ml/modeling/svm/nu_svr.py +9 -2
- snowflake/ml/modeling/svm/svc.py +9 -2
- snowflake/ml/modeling/svm/svr.py +9 -2
- snowflake/ml/modeling/tree/decision_tree_classifier.py +9 -2
- snowflake/ml/modeling/tree/decision_tree_regressor.py +9 -2
- snowflake/ml/modeling/tree/extra_tree_classifier.py +9 -2
- snowflake/ml/modeling/tree/extra_tree_regressor.py +9 -2
- snowflake/ml/modeling/xgboost/xgb_classifier.py +9 -2
- snowflake/ml/modeling/xgboost/xgb_regressor.py +9 -2
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +9 -2
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +9 -2
- snowflake/ml/registry/_manager/model_manager.py +59 -1
- snowflake/ml/registry/registry.py +10 -1
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.5.1.dist-info → snowflake_ml_python-1.5.3.dist-info}/METADATA +32 -4
- {snowflake_ml_python-1.5.1.dist-info → snowflake_ml_python-1.5.3.dist-info}/RECORD +207 -204
- {snowflake_ml_python-1.5.1.dist-info → snowflake_ml_python-1.5.3.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.5.1.dist-info → snowflake_ml_python-1.5.3.dist-info}/WHEEL +0 -0
- {snowflake_ml_python-1.5.1.dist-info → snowflake_ml_python-1.5.3.dist-info}/top_level.txt +0 -0
@@ -316,7 +316,7 @@ class PassiveAggressiveRegressor(BaseTransformer):
|
|
316
316
|
inspect.currentframe(), PassiveAggressiveRegressor.__class__.__name__
|
317
317
|
),
|
318
318
|
api_calls=[Session.call],
|
319
|
-
custom_tags=
|
319
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
320
320
|
)
|
321
321
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
322
322
|
pd_df.columns = dataset.columns
|
@@ -649,7 +649,14 @@ class PassiveAggressiveRegressor(BaseTransformer):
|
|
649
649
|
) -> List[str]:
|
650
650
|
# in case the inferred output column names dimension is different
|
651
651
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
652
|
-
|
652
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
653
|
+
|
654
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
655
|
+
# seen during the fit.
|
656
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
657
|
+
sample_pd_df.columns = snowpark_column_names
|
658
|
+
|
659
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
653
660
|
output_df_columns = list(output_df_pd.columns)
|
654
661
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
655
662
|
if self.sample_weight_col:
|
@@ -329,7 +329,7 @@ class Perceptron(BaseTransformer):
|
|
329
329
|
inspect.currentframe(), Perceptron.__class__.__name__
|
330
330
|
),
|
331
331
|
api_calls=[Session.call],
|
332
|
-
custom_tags=
|
332
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
333
333
|
)
|
334
334
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
335
335
|
pd_df.columns = dataset.columns
|
@@ -662,7 +662,14 @@ class Perceptron(BaseTransformer):
|
|
662
662
|
) -> List[str]:
|
663
663
|
# in case the inferred output column names dimension is different
|
664
664
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
665
|
-
|
665
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
666
|
+
|
667
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
668
|
+
# seen during the fit.
|
669
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
670
|
+
sample_pd_df.columns = snowpark_column_names
|
671
|
+
|
672
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
666
673
|
output_df_columns = list(output_df_pd.columns)
|
667
674
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
668
675
|
if self.sample_weight_col:
|
@@ -278,7 +278,7 @@ class PoissonRegressor(BaseTransformer):
|
|
278
278
|
inspect.currentframe(), PoissonRegressor.__class__.__name__
|
279
279
|
),
|
280
280
|
api_calls=[Session.call],
|
281
|
-
custom_tags=
|
281
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
282
282
|
)
|
283
283
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
284
284
|
pd_df.columns = dataset.columns
|
@@ -611,7 +611,14 @@ class PoissonRegressor(BaseTransformer):
|
|
611
611
|
) -> List[str]:
|
612
612
|
# in case the inferred output column names dimension is different
|
613
613
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
614
|
-
|
614
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
615
|
+
|
616
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
617
|
+
# seen during the fit.
|
618
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
619
|
+
sample_pd_df.columns = snowpark_column_names
|
620
|
+
|
621
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
615
622
|
output_df_columns = list(output_df_pd.columns)
|
616
623
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
617
624
|
if self.sample_weight_col:
|
@@ -334,7 +334,7 @@ class RANSACRegressor(BaseTransformer):
|
|
334
334
|
inspect.currentframe(), RANSACRegressor.__class__.__name__
|
335
335
|
),
|
336
336
|
api_calls=[Session.call],
|
337
|
-
custom_tags=
|
337
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
338
338
|
)
|
339
339
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
340
340
|
pd_df.columns = dataset.columns
|
@@ -667,7 +667,14 @@ class RANSACRegressor(BaseTransformer):
|
|
667
667
|
) -> List[str]:
|
668
668
|
# in case the inferred output column names dimension is different
|
669
669
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
670
|
-
|
670
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
671
|
+
|
672
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
673
|
+
# seen during the fit.
|
674
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
675
|
+
sample_pd_df.columns = snowpark_column_names
|
676
|
+
|
677
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
671
678
|
output_df_columns = list(output_df_pd.columns)
|
672
679
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
673
680
|
if self.sample_weight_col:
|
@@ -326,7 +326,7 @@ class Ridge(BaseTransformer):
|
|
326
326
|
inspect.currentframe(), Ridge.__class__.__name__
|
327
327
|
),
|
328
328
|
api_calls=[Session.call],
|
329
|
-
custom_tags=
|
329
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
330
330
|
)
|
331
331
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
332
332
|
pd_df.columns = dataset.columns
|
@@ -659,7 +659,14 @@ class Ridge(BaseTransformer):
|
|
659
659
|
) -> List[str]:
|
660
660
|
# in case the inferred output column names dimension is different
|
661
661
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
662
|
-
|
662
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
663
|
+
|
664
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
665
|
+
# seen during the fit.
|
666
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
667
|
+
sample_pd_df.columns = snowpark_column_names
|
668
|
+
|
669
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
663
670
|
output_df_columns = list(output_df_pd.columns)
|
664
671
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
665
672
|
if self.sample_weight_col:
|
@@ -326,7 +326,7 @@ class RidgeClassifier(BaseTransformer):
|
|
326
326
|
inspect.currentframe(), RidgeClassifier.__class__.__name__
|
327
327
|
),
|
328
328
|
api_calls=[Session.call],
|
329
|
-
custom_tags=
|
329
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
330
330
|
)
|
331
331
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
332
332
|
pd_df.columns = dataset.columns
|
@@ -659,7 +659,14 @@ class RidgeClassifier(BaseTransformer):
|
|
659
659
|
) -> List[str]:
|
660
660
|
# in case the inferred output column names dimension is different
|
661
661
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
662
|
-
|
662
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
663
|
+
|
664
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
665
|
+
# seen during the fit.
|
666
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
667
|
+
sample_pd_df.columns = snowpark_column_names
|
668
|
+
|
669
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
663
670
|
output_df_columns = list(output_df_pd.columns)
|
664
671
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
665
672
|
if self.sample_weight_col:
|
@@ -277,7 +277,7 @@ class RidgeClassifierCV(BaseTransformer):
|
|
277
277
|
inspect.currentframe(), RidgeClassifierCV.__class__.__name__
|
278
278
|
),
|
279
279
|
api_calls=[Session.call],
|
280
|
-
custom_tags=
|
280
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
281
281
|
)
|
282
282
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
283
283
|
pd_df.columns = dataset.columns
|
@@ -610,7 +610,14 @@ class RidgeClassifierCV(BaseTransformer):
|
|
610
610
|
) -> List[str]:
|
611
611
|
# in case the inferred output column names dimension is different
|
612
612
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
613
|
-
|
613
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
614
|
+
|
615
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
616
|
+
# seen during the fit.
|
617
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
618
|
+
sample_pd_df.columns = snowpark_column_names
|
619
|
+
|
620
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
614
621
|
output_df_columns = list(output_df_pd.columns)
|
615
622
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
616
623
|
if self.sample_weight_col:
|
@@ -298,7 +298,7 @@ class RidgeCV(BaseTransformer):
|
|
298
298
|
inspect.currentframe(), RidgeCV.__class__.__name__
|
299
299
|
),
|
300
300
|
api_calls=[Session.call],
|
301
|
-
custom_tags=
|
301
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
302
302
|
)
|
303
303
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
304
304
|
pd_df.columns = dataset.columns
|
@@ -631,7 +631,14 @@ class RidgeCV(BaseTransformer):
|
|
631
631
|
) -> List[str]:
|
632
632
|
# in case the inferred output column names dimension is different
|
633
633
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
634
|
-
|
634
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
635
|
+
|
636
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
637
|
+
# seen during the fit.
|
638
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
639
|
+
sample_pd_df.columns = snowpark_column_names
|
640
|
+
|
641
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
635
642
|
output_df_columns = list(output_df_pd.columns)
|
636
643
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
637
644
|
if self.sample_weight_col:
|
@@ -417,7 +417,7 @@ class SGDClassifier(BaseTransformer):
|
|
417
417
|
inspect.currentframe(), SGDClassifier.__class__.__name__
|
418
418
|
),
|
419
419
|
api_calls=[Session.call],
|
420
|
-
custom_tags=
|
420
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
421
421
|
)
|
422
422
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
423
423
|
pd_df.columns = dataset.columns
|
@@ -750,7 +750,14 @@ class SGDClassifier(BaseTransformer):
|
|
750
750
|
) -> List[str]:
|
751
751
|
# in case the inferred output column names dimension is different
|
752
752
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
753
|
-
|
753
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
754
|
+
|
755
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
756
|
+
# seen during the fit.
|
757
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
758
|
+
sample_pd_df.columns = snowpark_column_names
|
759
|
+
|
760
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
754
761
|
output_df_columns = list(output_df_pd.columns)
|
755
762
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
756
763
|
if self.sample_weight_col:
|
@@ -315,7 +315,7 @@ class SGDOneClassSVM(BaseTransformer):
|
|
315
315
|
inspect.currentframe(), SGDOneClassSVM.__class__.__name__
|
316
316
|
),
|
317
317
|
api_calls=[Session.call],
|
318
|
-
custom_tags=
|
318
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
319
319
|
)
|
320
320
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
321
321
|
pd_df.columns = dataset.columns
|
@@ -650,7 +650,14 @@ class SGDOneClassSVM(BaseTransformer):
|
|
650
650
|
) -> List[str]:
|
651
651
|
# in case the inferred output column names dimension is different
|
652
652
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
653
|
-
|
653
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
654
|
+
|
655
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
656
|
+
# seen during the fit.
|
657
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
658
|
+
sample_pd_df.columns = snowpark_column_names
|
659
|
+
|
660
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
654
661
|
output_df_columns = list(output_df_pd.columns)
|
655
662
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
656
663
|
if self.sample_weight_col:
|
@@ -383,7 +383,7 @@ class SGDRegressor(BaseTransformer):
|
|
383
383
|
inspect.currentframe(), SGDRegressor.__class__.__name__
|
384
384
|
),
|
385
385
|
api_calls=[Session.call],
|
386
|
-
custom_tags=
|
386
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
387
387
|
)
|
388
388
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
389
389
|
pd_df.columns = dataset.columns
|
@@ -716,7 +716,14 @@ class SGDRegressor(BaseTransformer):
|
|
716
716
|
) -> List[str]:
|
717
717
|
# in case the inferred output column names dimension is different
|
718
718
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
719
|
-
|
719
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
720
|
+
|
721
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
722
|
+
# seen during the fit.
|
723
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
724
|
+
sample_pd_df.columns = snowpark_column_names
|
725
|
+
|
726
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
720
727
|
output_df_columns = list(output_df_pd.columns)
|
721
728
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
722
729
|
if self.sample_weight_col:
|
@@ -285,7 +285,7 @@ class TheilSenRegressor(BaseTransformer):
|
|
285
285
|
inspect.currentframe(), TheilSenRegressor.__class__.__name__
|
286
286
|
),
|
287
287
|
api_calls=[Session.call],
|
288
|
-
custom_tags=
|
288
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
289
289
|
)
|
290
290
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
291
291
|
pd_df.columns = dataset.columns
|
@@ -618,7 +618,14 @@ class TheilSenRegressor(BaseTransformer):
|
|
618
618
|
) -> List[str]:
|
619
619
|
# in case the inferred output column names dimension is different
|
620
620
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
621
|
-
|
621
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
622
|
+
|
623
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
624
|
+
# seen during the fit.
|
625
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
626
|
+
sample_pd_df.columns = snowpark_column_names
|
627
|
+
|
628
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
622
629
|
output_df_columns = list(output_df_pd.columns)
|
623
630
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
624
631
|
if self.sample_weight_col:
|
@@ -311,7 +311,7 @@ class TweedieRegressor(BaseTransformer):
|
|
311
311
|
inspect.currentframe(), TweedieRegressor.__class__.__name__
|
312
312
|
),
|
313
313
|
api_calls=[Session.call],
|
314
|
-
custom_tags=
|
314
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
315
315
|
)
|
316
316
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
317
317
|
pd_df.columns = dataset.columns
|
@@ -644,7 +644,14 @@ class TweedieRegressor(BaseTransformer):
|
|
644
644
|
) -> List[str]:
|
645
645
|
# in case the inferred output column names dimension is different
|
646
646
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
647
|
-
|
647
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
648
|
+
|
649
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
650
|
+
# seen during the fit.
|
651
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
652
|
+
sample_pd_df.columns = snowpark_column_names
|
653
|
+
|
654
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
648
655
|
output_df_columns = list(output_df_pd.columns)
|
649
656
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
650
657
|
if self.sample_weight_col:
|
@@ -307,7 +307,7 @@ class Isomap(BaseTransformer):
|
|
307
307
|
inspect.currentframe(), Isomap.__class__.__name__
|
308
308
|
),
|
309
309
|
api_calls=[Session.call],
|
310
|
-
custom_tags=
|
310
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
311
311
|
)
|
312
312
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
313
313
|
pd_df.columns = dataset.columns
|
@@ -642,7 +642,14 @@ class Isomap(BaseTransformer):
|
|
642
642
|
) -> List[str]:
|
643
643
|
# in case the inferred output column names dimension is different
|
644
644
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
645
|
-
|
645
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
646
|
+
|
647
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
648
|
+
# seen during the fit.
|
649
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
650
|
+
sample_pd_df.columns = snowpark_column_names
|
651
|
+
|
652
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
646
653
|
output_df_columns = list(output_df_pd.columns)
|
647
654
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
648
655
|
if self.sample_weight_col:
|
@@ -290,7 +290,7 @@ class MDS(BaseTransformer):
|
|
290
290
|
inspect.currentframe(), MDS.__class__.__name__
|
291
291
|
),
|
292
292
|
api_calls=[Session.call],
|
293
|
-
custom_tags=
|
293
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
294
294
|
)
|
295
295
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
296
296
|
pd_df.columns = dataset.columns
|
@@ -623,7 +623,14 @@ class MDS(BaseTransformer):
|
|
623
623
|
) -> List[str]:
|
624
624
|
# in case the inferred output column names dimension is different
|
625
625
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
626
|
-
|
626
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
627
|
+
|
628
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
629
|
+
# seen during the fit.
|
630
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
631
|
+
sample_pd_df.columns = snowpark_column_names
|
632
|
+
|
633
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
627
634
|
output_df_columns = list(output_df_pd.columns)
|
628
635
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
629
636
|
if self.sample_weight_col:
|
@@ -292,7 +292,7 @@ class SpectralEmbedding(BaseTransformer):
|
|
292
292
|
inspect.currentframe(), SpectralEmbedding.__class__.__name__
|
293
293
|
),
|
294
294
|
api_calls=[Session.call],
|
295
|
-
custom_tags=
|
295
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
296
296
|
)
|
297
297
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
298
298
|
pd_df.columns = dataset.columns
|
@@ -625,7 +625,14 @@ class SpectralEmbedding(BaseTransformer):
|
|
625
625
|
) -> List[str]:
|
626
626
|
# in case the inferred output column names dimension is different
|
627
627
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
628
|
-
|
628
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
629
|
+
|
630
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
631
|
+
# seen during the fit.
|
632
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
633
|
+
sample_pd_df.columns = snowpark_column_names
|
634
|
+
|
635
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
629
636
|
output_df_columns = list(output_df_pd.columns)
|
630
637
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
631
638
|
if self.sample_weight_col:
|
@@ -351,7 +351,7 @@ class TSNE(BaseTransformer):
|
|
351
351
|
inspect.currentframe(), TSNE.__class__.__name__
|
352
352
|
),
|
353
353
|
api_calls=[Session.call],
|
354
|
-
custom_tags=
|
354
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
355
355
|
)
|
356
356
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
357
357
|
pd_df.columns = dataset.columns
|
@@ -684,7 +684,14 @@ class TSNE(BaseTransformer):
|
|
684
684
|
) -> List[str]:
|
685
685
|
# in case the inferred output column names dimension is different
|
686
686
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
687
|
-
|
687
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
688
|
+
|
689
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
690
|
+
# seen during the fit.
|
691
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
692
|
+
sample_pd_df.columns = snowpark_column_names
|
693
|
+
|
694
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
688
695
|
output_df_columns = list(output_df_pd.columns)
|
689
696
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
690
697
|
if self.sample_weight_col:
|
@@ -354,7 +354,7 @@ class BayesianGaussianMixture(BaseTransformer):
|
|
354
354
|
inspect.currentframe(), BayesianGaussianMixture.__class__.__name__
|
355
355
|
),
|
356
356
|
api_calls=[Session.call],
|
357
|
-
custom_tags=
|
357
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
358
358
|
)
|
359
359
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
360
360
|
pd_df.columns = dataset.columns
|
@@ -689,7 +689,14 @@ class BayesianGaussianMixture(BaseTransformer):
|
|
689
689
|
) -> List[str]:
|
690
690
|
# in case the inferred output column names dimension is different
|
691
691
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
692
|
-
|
692
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
693
|
+
|
694
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
695
|
+
# seen during the fit.
|
696
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
697
|
+
sample_pd_df.columns = snowpark_column_names
|
698
|
+
|
699
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
693
700
|
output_df_columns = list(output_df_pd.columns)
|
694
701
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
695
702
|
if self.sample_weight_col:
|
@@ -327,7 +327,7 @@ class GaussianMixture(BaseTransformer):
|
|
327
327
|
inspect.currentframe(), GaussianMixture.__class__.__name__
|
328
328
|
),
|
329
329
|
api_calls=[Session.call],
|
330
|
-
custom_tags=
|
330
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
331
331
|
)
|
332
332
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
333
333
|
pd_df.columns = dataset.columns
|
@@ -662,7 +662,14 @@ class GaussianMixture(BaseTransformer):
|
|
662
662
|
) -> List[str]:
|
663
663
|
# in case the inferred output column names dimension is different
|
664
664
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
665
|
-
|
665
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
666
|
+
|
667
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
668
|
+
# seen during the fit.
|
669
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
670
|
+
sample_pd_df.columns = snowpark_column_names
|
671
|
+
|
672
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
666
673
|
output_df_columns = list(output_df_pd.columns)
|
667
674
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
668
675
|
if self.sample_weight_col:
|
@@ -285,11 +285,7 @@ class GridSearchCV(BaseTransformer):
|
|
285
285
|
)
|
286
286
|
return selected_cols
|
287
287
|
|
288
|
-
|
289
|
-
project=_PROJECT,
|
290
|
-
subproject=_SUBPROJECT,
|
291
|
-
)
|
292
|
-
def fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "GridSearchCV":
|
288
|
+
def _fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "GridSearchCV":
|
293
289
|
"""Run fit with all sets of parameters
|
294
290
|
For more details on this function, see [sklearn.model_selection.GridSearchCV.fit]
|
295
291
|
(https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.GridSearchCV.html#sklearn.model_selection.GridSearchCV.fit)
|
@@ -298,11 +298,7 @@ class RandomizedSearchCV(BaseTransformer):
|
|
298
298
|
)
|
299
299
|
return selected_cols
|
300
300
|
|
301
|
-
|
302
|
-
project=_PROJECT,
|
303
|
-
subproject=_SUBPROJECT,
|
304
|
-
)
|
305
|
-
def fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "RandomizedSearchCV":
|
301
|
+
def _fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "RandomizedSearchCV":
|
306
302
|
"""Run fit with all sets of parameters
|
307
303
|
For more details on this function, see [sklearn.model_selection.RandomizedSearchCV.fit]
|
308
304
|
(https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.RandomizedSearchCV.html#sklearn.model_selection.RandomizedSearchCV.fit)
|
@@ -239,7 +239,7 @@ class OneVsOneClassifier(BaseTransformer):
|
|
239
239
|
inspect.currentframe(), OneVsOneClassifier.__class__.__name__
|
240
240
|
),
|
241
241
|
api_calls=[Session.call],
|
242
|
-
custom_tags=
|
242
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
243
243
|
)
|
244
244
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
245
245
|
pd_df.columns = dataset.columns
|
@@ -572,7 +572,14 @@ class OneVsOneClassifier(BaseTransformer):
|
|
572
572
|
) -> List[str]:
|
573
573
|
# in case the inferred output column names dimension is different
|
574
574
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
575
|
-
|
575
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
576
|
+
|
577
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
578
|
+
# seen during the fit.
|
579
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
580
|
+
sample_pd_df.columns = snowpark_column_names
|
581
|
+
|
582
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
576
583
|
output_df_columns = list(output_df_pd.columns)
|
577
584
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
578
585
|
if self.sample_weight_col:
|
@@ -248,7 +248,7 @@ class OneVsRestClassifier(BaseTransformer):
|
|
248
248
|
inspect.currentframe(), OneVsRestClassifier.__class__.__name__
|
249
249
|
),
|
250
250
|
api_calls=[Session.call],
|
251
|
-
custom_tags=
|
251
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
252
252
|
)
|
253
253
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
254
254
|
pd_df.columns = dataset.columns
|
@@ -581,7 +581,14 @@ class OneVsRestClassifier(BaseTransformer):
|
|
581
581
|
) -> List[str]:
|
582
582
|
# in case the inferred output column names dimension is different
|
583
583
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
584
|
-
|
584
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
585
|
+
|
586
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
587
|
+
# seen during the fit.
|
588
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
589
|
+
sample_pd_df.columns = snowpark_column_names
|
590
|
+
|
591
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
585
592
|
output_df_columns = list(output_df_pd.columns)
|
586
593
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
587
594
|
if self.sample_weight_col:
|
@@ -251,7 +251,7 @@ class OutputCodeClassifier(BaseTransformer):
|
|
251
251
|
inspect.currentframe(), OutputCodeClassifier.__class__.__name__
|
252
252
|
),
|
253
253
|
api_calls=[Session.call],
|
254
|
-
custom_tags=
|
254
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
255
255
|
)
|
256
256
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
257
257
|
pd_df.columns = dataset.columns
|
@@ -584,7 +584,14 @@ class OutputCodeClassifier(BaseTransformer):
|
|
584
584
|
) -> List[str]:
|
585
585
|
# in case the inferred output column names dimension is different
|
586
586
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
587
|
-
|
587
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
588
|
+
|
589
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
590
|
+
# seen during the fit.
|
591
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
592
|
+
sample_pd_df.columns = snowpark_column_names
|
593
|
+
|
594
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
588
595
|
output_df_columns = list(output_df_pd.columns)
|
589
596
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
590
597
|
if self.sample_weight_col:
|
@@ -251,7 +251,7 @@ class BernoulliNB(BaseTransformer):
|
|
251
251
|
inspect.currentframe(), BernoulliNB.__class__.__name__
|
252
252
|
),
|
253
253
|
api_calls=[Session.call],
|
254
|
-
custom_tags=
|
254
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
255
255
|
)
|
256
256
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
257
257
|
pd_df.columns = dataset.columns
|
@@ -584,7 +584,14 @@ class BernoulliNB(BaseTransformer):
|
|
584
584
|
) -> List[str]:
|
585
585
|
# in case the inferred output column names dimension is different
|
586
586
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
587
|
-
|
587
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
588
|
+
|
589
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
590
|
+
# seen during the fit.
|
591
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
592
|
+
sample_pd_df.columns = snowpark_column_names
|
593
|
+
|
594
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
588
595
|
output_df_columns = list(output_df_pd.columns)
|
589
596
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
590
597
|
if self.sample_weight_col:
|