snowflake-ml-python 1.5.1__py3-none-any.whl → 1.5.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/cortex/_complete.py +26 -5
- snowflake/cortex/_sentiment.py +7 -4
- snowflake/cortex/_sse_client.py +81 -0
- snowflake/cortex/_util.py +105 -8
- snowflake/ml/_internal/lineage/lineage_utils.py +34 -25
- snowflake/ml/_internal/utils/temp_file_utils.py +5 -2
- snowflake/ml/dataset/dataset.py +15 -12
- snowflake/ml/dataset/dataset_factory.py +3 -4
- snowflake/ml/feature_store/access_manager.py +34 -30
- snowflake/ml/feature_store/feature_store.py +3 -3
- snowflake/ml/feature_store/feature_view.py +12 -11
- snowflake/ml/fileset/snowfs.py +2 -31
- snowflake/ml/model/_client/ops/model_ops.py +43 -0
- snowflake/ml/model/_client/sql/model_version.py +55 -3
- snowflake/ml/model/_model_composer/model_composer.py +7 -3
- snowflake/ml/model/_model_composer/model_method/infer_table_function.py_template +3 -1
- snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
- snowflake/ml/model/_packager/model_meta/model_meta.py +1 -3
- snowflake/ml/model/_packager/model_runtime/_snowml_inference_alternative_requirements.py +1 -1
- snowflake/ml/model/_packager/model_runtime/model_runtime.py +3 -27
- snowflake/ml/model/_signatures/builtins_handler.py +2 -1
- snowflake/ml/model/_signatures/core.py +13 -1
- snowflake/ml/model/_signatures/pandas_handler.py +2 -0
- snowflake/ml/model/_signatures/snowpark_handler.py +3 -3
- snowflake/ml/model/model_signature.py +2 -0
- snowflake/ml/model/type_hints.py +1 -0
- snowflake/ml/modeling/_internal/estimator_utils.py +58 -1
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +196 -242
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_search_udf_file.py +161 -0
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +38 -18
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +82 -134
- snowflake/ml/modeling/_internal/snowpark_implementations/xgboost_external_memory_trainer.py +21 -17
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +9 -2
- snowflake/ml/modeling/cluster/affinity_propagation.py +9 -2
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +9 -2
- snowflake/ml/modeling/cluster/birch.py +9 -2
- snowflake/ml/modeling/cluster/bisecting_k_means.py +9 -2
- snowflake/ml/modeling/cluster/dbscan.py +9 -2
- snowflake/ml/modeling/cluster/feature_agglomeration.py +9 -2
- snowflake/ml/modeling/cluster/k_means.py +9 -2
- snowflake/ml/modeling/cluster/mean_shift.py +9 -2
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +9 -2
- snowflake/ml/modeling/cluster/optics.py +9 -2
- snowflake/ml/modeling/cluster/spectral_biclustering.py +9 -2
- snowflake/ml/modeling/cluster/spectral_clustering.py +9 -2
- snowflake/ml/modeling/cluster/spectral_coclustering.py +9 -2
- snowflake/ml/modeling/compose/column_transformer.py +9 -2
- snowflake/ml/modeling/compose/transformed_target_regressor.py +9 -2
- snowflake/ml/modeling/covariance/elliptic_envelope.py +9 -2
- snowflake/ml/modeling/covariance/empirical_covariance.py +9 -2
- snowflake/ml/modeling/covariance/graphical_lasso.py +9 -2
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +9 -2
- snowflake/ml/modeling/covariance/ledoit_wolf.py +9 -2
- snowflake/ml/modeling/covariance/min_cov_det.py +9 -2
- snowflake/ml/modeling/covariance/oas.py +9 -2
- snowflake/ml/modeling/covariance/shrunk_covariance.py +9 -2
- snowflake/ml/modeling/decomposition/dictionary_learning.py +9 -2
- snowflake/ml/modeling/decomposition/factor_analysis.py +9 -2
- snowflake/ml/modeling/decomposition/fast_ica.py +9 -2
- snowflake/ml/modeling/decomposition/incremental_pca.py +9 -2
- snowflake/ml/modeling/decomposition/kernel_pca.py +9 -2
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +9 -2
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +9 -2
- snowflake/ml/modeling/decomposition/pca.py +9 -2
- snowflake/ml/modeling/decomposition/sparse_pca.py +9 -2
- snowflake/ml/modeling/decomposition/truncated_svd.py +9 -2
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +9 -2
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +9 -2
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +9 -2
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +9 -2
- snowflake/ml/modeling/ensemble/bagging_classifier.py +9 -2
- snowflake/ml/modeling/ensemble/bagging_regressor.py +9 -2
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +9 -2
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +9 -2
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +9 -2
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +9 -2
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +9 -2
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +9 -2
- snowflake/ml/modeling/ensemble/isolation_forest.py +9 -2
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +9 -2
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +9 -2
- snowflake/ml/modeling/ensemble/stacking_regressor.py +9 -2
- snowflake/ml/modeling/ensemble/voting_classifier.py +9 -2
- snowflake/ml/modeling/ensemble/voting_regressor.py +9 -2
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +9 -2
- snowflake/ml/modeling/feature_selection/select_fdr.py +9 -2
- snowflake/ml/modeling/feature_selection/select_fpr.py +9 -2
- snowflake/ml/modeling/feature_selection/select_fwe.py +9 -2
- snowflake/ml/modeling/feature_selection/select_k_best.py +9 -2
- snowflake/ml/modeling/feature_selection/select_percentile.py +9 -2
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +9 -2
- snowflake/ml/modeling/feature_selection/variance_threshold.py +9 -2
- snowflake/ml/modeling/framework/base.py +3 -8
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +9 -2
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +9 -2
- snowflake/ml/modeling/impute/iterative_imputer.py +9 -2
- snowflake/ml/modeling/impute/knn_imputer.py +9 -2
- snowflake/ml/modeling/impute/missing_indicator.py +9 -2
- snowflake/ml/modeling/impute/simple_imputer.py +28 -5
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +9 -2
- snowflake/ml/modeling/kernel_approximation/nystroem.py +9 -2
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +9 -2
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +9 -2
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +9 -2
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +9 -2
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +9 -2
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +9 -2
- snowflake/ml/modeling/linear_model/ard_regression.py +9 -2
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +9 -2
- snowflake/ml/modeling/linear_model/elastic_net.py +9 -2
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +9 -2
- snowflake/ml/modeling/linear_model/gamma_regressor.py +9 -2
- snowflake/ml/modeling/linear_model/huber_regressor.py +9 -2
- snowflake/ml/modeling/linear_model/lars.py +9 -2
- snowflake/ml/modeling/linear_model/lars_cv.py +9 -2
- snowflake/ml/modeling/linear_model/lasso.py +9 -2
- snowflake/ml/modeling/linear_model/lasso_cv.py +9 -2
- snowflake/ml/modeling/linear_model/lasso_lars.py +9 -2
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +9 -2
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +9 -2
- snowflake/ml/modeling/linear_model/linear_regression.py +9 -2
- snowflake/ml/modeling/linear_model/logistic_regression.py +9 -2
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +9 -2
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +9 -2
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +9 -2
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +9 -2
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +9 -2
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +9 -2
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +9 -2
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +9 -2
- snowflake/ml/modeling/linear_model/perceptron.py +9 -2
- snowflake/ml/modeling/linear_model/poisson_regressor.py +9 -2
- snowflake/ml/modeling/linear_model/ransac_regressor.py +9 -2
- snowflake/ml/modeling/linear_model/ridge.py +9 -2
- snowflake/ml/modeling/linear_model/ridge_classifier.py +9 -2
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +9 -2
- snowflake/ml/modeling/linear_model/ridge_cv.py +9 -2
- snowflake/ml/modeling/linear_model/sgd_classifier.py +9 -2
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +9 -2
- snowflake/ml/modeling/linear_model/sgd_regressor.py +9 -2
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +9 -2
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +9 -2
- snowflake/ml/modeling/manifold/isomap.py +9 -2
- snowflake/ml/modeling/manifold/mds.py +9 -2
- snowflake/ml/modeling/manifold/spectral_embedding.py +9 -2
- snowflake/ml/modeling/manifold/tsne.py +9 -2
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +9 -2
- snowflake/ml/modeling/mixture/gaussian_mixture.py +9 -2
- snowflake/ml/modeling/model_selection/grid_search_cv.py +1 -5
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +1 -5
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +9 -2
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +9 -2
- snowflake/ml/modeling/multiclass/output_code_classifier.py +9 -2
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +9 -2
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +9 -2
- snowflake/ml/modeling/naive_bayes/complement_nb.py +9 -2
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +9 -2
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +9 -2
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +9 -2
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +9 -2
- snowflake/ml/modeling/neighbors/kernel_density.py +9 -2
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +9 -2
- snowflake/ml/modeling/neighbors/nearest_centroid.py +9 -2
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +9 -2
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +9 -2
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +9 -2
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +9 -2
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +9 -2
- snowflake/ml/modeling/neural_network/mlp_classifier.py +9 -2
- snowflake/ml/modeling/neural_network/mlp_regressor.py +9 -2
- snowflake/ml/modeling/parameters/enable_anonymous_sproc.py +5 -0
- snowflake/ml/modeling/pipeline/pipeline.py +5 -0
- snowflake/ml/modeling/preprocessing/binarizer.py +7 -3
- snowflake/ml/modeling/preprocessing/k_bins_discretizer.py +7 -2
- snowflake/ml/modeling/preprocessing/label_encoder.py +8 -7
- snowflake/ml/modeling/preprocessing/max_abs_scaler.py +7 -3
- snowflake/ml/modeling/preprocessing/min_max_scaler.py +7 -4
- snowflake/ml/modeling/preprocessing/normalizer.py +7 -3
- snowflake/ml/modeling/preprocessing/one_hot_encoder.py +10 -2
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +8 -5
- snowflake/ml/modeling/preprocessing/polynomial_features.py +9 -2
- snowflake/ml/modeling/preprocessing/robust_scaler.py +7 -4
- snowflake/ml/modeling/preprocessing/standard_scaler.py +7 -3
- snowflake/ml/modeling/semi_supervised/label_propagation.py +9 -2
- snowflake/ml/modeling/semi_supervised/label_spreading.py +9 -2
- snowflake/ml/modeling/svm/linear_svc.py +9 -2
- snowflake/ml/modeling/svm/linear_svr.py +9 -2
- snowflake/ml/modeling/svm/nu_svc.py +9 -2
- snowflake/ml/modeling/svm/nu_svr.py +9 -2
- snowflake/ml/modeling/svm/svc.py +9 -2
- snowflake/ml/modeling/svm/svr.py +9 -2
- snowflake/ml/modeling/tree/decision_tree_classifier.py +9 -2
- snowflake/ml/modeling/tree/decision_tree_regressor.py +9 -2
- snowflake/ml/modeling/tree/extra_tree_classifier.py +9 -2
- snowflake/ml/modeling/tree/extra_tree_regressor.py +9 -2
- snowflake/ml/modeling/xgboost/xgb_classifier.py +9 -2
- snowflake/ml/modeling/xgboost/xgb_regressor.py +9 -2
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +9 -2
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +9 -2
- snowflake/ml/registry/_manager/model_manager.py +59 -1
- snowflake/ml/registry/registry.py +10 -1
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.5.1.dist-info → snowflake_ml_python-1.5.3.dist-info}/METADATA +32 -4
- {snowflake_ml_python-1.5.1.dist-info → snowflake_ml_python-1.5.3.dist-info}/RECORD +207 -204
- {snowflake_ml_python-1.5.1.dist-info → snowflake_ml_python-1.5.3.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.5.1.dist-info → snowflake_ml_python-1.5.3.dist-info}/WHEEL +0 -0
- {snowflake_ml_python-1.5.1.dist-info → snowflake_ml_python-1.5.3.dist-info}/top_level.txt +0 -0
@@ -262,7 +262,7 @@ class LGBMClassifier(BaseTransformer):
|
|
262
262
|
inspect.currentframe(), LGBMClassifier.__class__.__name__
|
263
263
|
),
|
264
264
|
api_calls=[Session.call],
|
265
|
-
custom_tags=
|
265
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
266
266
|
)
|
267
267
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
268
268
|
pd_df.columns = dataset.columns
|
@@ -595,7 +595,14 @@ class LGBMClassifier(BaseTransformer):
|
|
595
595
|
) -> List[str]:
|
596
596
|
# in case the inferred output column names dimension is different
|
597
597
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
598
|
-
|
598
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
599
|
+
|
600
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
601
|
+
# seen during the fit.
|
602
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
603
|
+
sample_pd_df.columns = snowpark_column_names
|
604
|
+
|
605
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
599
606
|
output_df_columns = list(output_df_pd.columns)
|
600
607
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
601
608
|
if self.sample_weight_col:
|
@@ -262,7 +262,7 @@ class LGBMRegressor(BaseTransformer):
|
|
262
262
|
inspect.currentframe(), LGBMRegressor.__class__.__name__
|
263
263
|
),
|
264
264
|
api_calls=[Session.call],
|
265
|
-
custom_tags=
|
265
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
266
266
|
)
|
267
267
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
268
268
|
pd_df.columns = dataset.columns
|
@@ -595,7 +595,14 @@ class LGBMRegressor(BaseTransformer):
|
|
595
595
|
) -> List[str]:
|
596
596
|
# in case the inferred output column names dimension is different
|
597
597
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
598
|
-
|
598
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
599
|
+
|
600
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
601
|
+
# seen during the fit.
|
602
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
603
|
+
sample_pd_df.columns = snowpark_column_names
|
604
|
+
|
605
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
599
606
|
output_df_columns = list(output_df_pd.columns)
|
600
607
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
601
608
|
if self.sample_weight_col:
|
@@ -287,7 +287,7 @@ class ARDRegression(BaseTransformer):
|
|
287
287
|
inspect.currentframe(), ARDRegression.__class__.__name__
|
288
288
|
),
|
289
289
|
api_calls=[Session.call],
|
290
|
-
custom_tags=
|
290
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
291
291
|
)
|
292
292
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
293
293
|
pd_df.columns = dataset.columns
|
@@ -620,7 +620,14 @@ class ARDRegression(BaseTransformer):
|
|
620
620
|
) -> List[str]:
|
621
621
|
# in case the inferred output column names dimension is different
|
622
622
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
623
|
-
|
623
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
624
|
+
|
625
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
626
|
+
# seen during the fit.
|
627
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
628
|
+
sample_pd_df.columns = snowpark_column_names
|
629
|
+
|
630
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
624
631
|
output_df_columns = list(output_df_pd.columns)
|
625
632
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
626
633
|
if self.sample_weight_col:
|
@@ -298,7 +298,7 @@ class BayesianRidge(BaseTransformer):
|
|
298
298
|
inspect.currentframe(), BayesianRidge.__class__.__name__
|
299
299
|
),
|
300
300
|
api_calls=[Session.call],
|
301
|
-
custom_tags=
|
301
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
302
302
|
)
|
303
303
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
304
304
|
pd_df.columns = dataset.columns
|
@@ -631,7 +631,14 @@ class BayesianRidge(BaseTransformer):
|
|
631
631
|
) -> List[str]:
|
632
632
|
# in case the inferred output column names dimension is different
|
633
633
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
634
|
-
|
634
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
635
|
+
|
636
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
637
|
+
# seen during the fit.
|
638
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
639
|
+
sample_pd_df.columns = snowpark_column_names
|
640
|
+
|
641
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
635
642
|
output_df_columns = list(output_df_pd.columns)
|
636
643
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
637
644
|
if self.sample_weight_col:
|
@@ -297,7 +297,7 @@ class ElasticNet(BaseTransformer):
|
|
297
297
|
inspect.currentframe(), ElasticNet.__class__.__name__
|
298
298
|
),
|
299
299
|
api_calls=[Session.call],
|
300
|
-
custom_tags=
|
300
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
301
301
|
)
|
302
302
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
303
303
|
pd_df.columns = dataset.columns
|
@@ -630,7 +630,14 @@ class ElasticNet(BaseTransformer):
|
|
630
630
|
) -> List[str]:
|
631
631
|
# in case the inferred output column names dimension is different
|
632
632
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
633
|
-
|
633
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
634
|
+
|
635
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
636
|
+
# seen during the fit.
|
637
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
638
|
+
sample_pd_df.columns = snowpark_column_names
|
639
|
+
|
640
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
634
641
|
output_df_columns = list(output_df_pd.columns)
|
635
642
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
636
643
|
if self.sample_weight_col:
|
@@ -333,7 +333,7 @@ class ElasticNetCV(BaseTransformer):
|
|
333
333
|
inspect.currentframe(), ElasticNetCV.__class__.__name__
|
334
334
|
),
|
335
335
|
api_calls=[Session.call],
|
336
|
-
custom_tags=
|
336
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
337
337
|
)
|
338
338
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
339
339
|
pd_df.columns = dataset.columns
|
@@ -666,7 +666,14 @@ class ElasticNetCV(BaseTransformer):
|
|
666
666
|
) -> List[str]:
|
667
667
|
# in case the inferred output column names dimension is different
|
668
668
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
669
|
-
|
669
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
670
|
+
|
671
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
672
|
+
# seen during the fit.
|
673
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
674
|
+
sample_pd_df.columns = snowpark_column_names
|
675
|
+
|
676
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
670
677
|
output_df_columns = list(output_df_pd.columns)
|
671
678
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
672
679
|
if self.sample_weight_col:
|
@@ -278,7 +278,7 @@ class GammaRegressor(BaseTransformer):
|
|
278
278
|
inspect.currentframe(), GammaRegressor.__class__.__name__
|
279
279
|
),
|
280
280
|
api_calls=[Session.call],
|
281
|
-
custom_tags=
|
281
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
282
282
|
)
|
283
283
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
284
284
|
pd_df.columns = dataset.columns
|
@@ -611,7 +611,14 @@ class GammaRegressor(BaseTransformer):
|
|
611
611
|
) -> List[str]:
|
612
612
|
# in case the inferred output column names dimension is different
|
613
613
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
614
|
-
|
614
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
615
|
+
|
616
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
617
|
+
# seen during the fit.
|
618
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
619
|
+
sample_pd_df.columns = snowpark_column_names
|
620
|
+
|
621
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
615
622
|
output_df_columns = list(output_df_pd.columns)
|
616
623
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
617
624
|
if self.sample_weight_col:
|
@@ -261,7 +261,7 @@ class HuberRegressor(BaseTransformer):
|
|
261
261
|
inspect.currentframe(), HuberRegressor.__class__.__name__
|
262
262
|
),
|
263
263
|
api_calls=[Session.call],
|
264
|
-
custom_tags=
|
264
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
265
265
|
)
|
266
266
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
267
267
|
pd_df.columns = dataset.columns
|
@@ -594,7 +594,14 @@ class HuberRegressor(BaseTransformer):
|
|
594
594
|
) -> List[str]:
|
595
595
|
# in case the inferred output column names dimension is different
|
596
596
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
597
|
-
|
597
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
598
|
+
|
599
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
600
|
+
# seen during the fit.
|
601
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
602
|
+
sample_pd_df.columns = snowpark_column_names
|
603
|
+
|
604
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
598
605
|
output_df_columns = list(output_df_pd.columns)
|
599
606
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
600
607
|
if self.sample_weight_col:
|
@@ -290,7 +290,7 @@ class Lars(BaseTransformer):
|
|
290
290
|
inspect.currentframe(), Lars.__class__.__name__
|
291
291
|
),
|
292
292
|
api_calls=[Session.call],
|
293
|
-
custom_tags=
|
293
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
294
294
|
)
|
295
295
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
296
296
|
pd_df.columns = dataset.columns
|
@@ -623,7 +623,14 @@ class Lars(BaseTransformer):
|
|
623
623
|
) -> List[str]:
|
624
624
|
# in case the inferred output column names dimension is different
|
625
625
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
626
|
-
|
626
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
627
|
+
|
628
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
629
|
+
# seen during the fit.
|
630
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
631
|
+
sample_pd_df.columns = snowpark_column_names
|
632
|
+
|
633
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
627
634
|
output_df_columns = list(output_df_pd.columns)
|
628
635
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
629
636
|
if self.sample_weight_col:
|
@@ -298,7 +298,7 @@ class LarsCV(BaseTransformer):
|
|
298
298
|
inspect.currentframe(), LarsCV.__class__.__name__
|
299
299
|
),
|
300
300
|
api_calls=[Session.call],
|
301
|
-
custom_tags=
|
301
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
302
302
|
)
|
303
303
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
304
304
|
pd_df.columns = dataset.columns
|
@@ -631,7 +631,14 @@ class LarsCV(BaseTransformer):
|
|
631
631
|
) -> List[str]:
|
632
632
|
# in case the inferred output column names dimension is different
|
633
633
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
634
|
-
|
634
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
635
|
+
|
636
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
637
|
+
# seen during the fit.
|
638
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
639
|
+
sample_pd_df.columns = snowpark_column_names
|
640
|
+
|
641
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
635
642
|
output_df_columns = list(output_df_pd.columns)
|
636
643
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
637
644
|
if self.sample_weight_col:
|
@@ -291,7 +291,7 @@ class Lasso(BaseTransformer):
|
|
291
291
|
inspect.currentframe(), Lasso.__class__.__name__
|
292
292
|
),
|
293
293
|
api_calls=[Session.call],
|
294
|
-
custom_tags=
|
294
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
295
295
|
)
|
296
296
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
297
297
|
pd_df.columns = dataset.columns
|
@@ -624,7 +624,14 @@ class Lasso(BaseTransformer):
|
|
624
624
|
) -> List[str]:
|
625
625
|
# in case the inferred output column names dimension is different
|
626
626
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
627
|
-
|
627
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
628
|
+
|
629
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
630
|
+
# seen during the fit.
|
631
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
632
|
+
sample_pd_df.columns = snowpark_column_names
|
633
|
+
|
634
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
628
635
|
output_df_columns = list(output_df_pd.columns)
|
629
636
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
630
637
|
if self.sample_weight_col:
|
@@ -319,7 +319,7 @@ class LassoCV(BaseTransformer):
|
|
319
319
|
inspect.currentframe(), LassoCV.__class__.__name__
|
320
320
|
),
|
321
321
|
api_calls=[Session.call],
|
322
|
-
custom_tags=
|
322
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
323
323
|
)
|
324
324
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
325
325
|
pd_df.columns = dataset.columns
|
@@ -652,7 +652,14 @@ class LassoCV(BaseTransformer):
|
|
652
652
|
) -> List[str]:
|
653
653
|
# in case the inferred output column names dimension is different
|
654
654
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
655
|
-
|
655
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
656
|
+
|
657
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
658
|
+
# seen during the fit.
|
659
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
660
|
+
sample_pd_df.columns = snowpark_column_names
|
661
|
+
|
662
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
656
663
|
output_df_columns = list(output_df_pd.columns)
|
657
664
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
658
665
|
if self.sample_weight_col:
|
@@ -311,7 +311,7 @@ class LassoLars(BaseTransformer):
|
|
311
311
|
inspect.currentframe(), LassoLars.__class__.__name__
|
312
312
|
),
|
313
313
|
api_calls=[Session.call],
|
314
|
-
custom_tags=
|
314
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
315
315
|
)
|
316
316
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
317
317
|
pd_df.columns = dataset.columns
|
@@ -644,7 +644,14 @@ class LassoLars(BaseTransformer):
|
|
644
644
|
) -> List[str]:
|
645
645
|
# in case the inferred output column names dimension is different
|
646
646
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
647
|
-
|
647
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
648
|
+
|
649
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
650
|
+
# seen during the fit.
|
651
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
652
|
+
sample_pd_df.columns = snowpark_column_names
|
653
|
+
|
654
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
648
655
|
output_df_columns = list(output_df_pd.columns)
|
649
656
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
650
657
|
if self.sample_weight_col:
|
@@ -312,7 +312,7 @@ class LassoLarsCV(BaseTransformer):
|
|
312
312
|
inspect.currentframe(), LassoLarsCV.__class__.__name__
|
313
313
|
),
|
314
314
|
api_calls=[Session.call],
|
315
|
-
custom_tags=
|
315
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
316
316
|
)
|
317
317
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
318
318
|
pd_df.columns = dataset.columns
|
@@ -645,7 +645,14 @@ class LassoLarsCV(BaseTransformer):
|
|
645
645
|
) -> List[str]:
|
646
646
|
# in case the inferred output column names dimension is different
|
647
647
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
648
|
-
|
648
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
649
|
+
|
650
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
651
|
+
# seen during the fit.
|
652
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
653
|
+
sample_pd_df.columns = snowpark_column_names
|
654
|
+
|
655
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
649
656
|
output_df_columns = list(output_df_pd.columns)
|
650
657
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
651
658
|
if self.sample_weight_col:
|
@@ -295,7 +295,7 @@ class LassoLarsIC(BaseTransformer):
|
|
295
295
|
inspect.currentframe(), LassoLarsIC.__class__.__name__
|
296
296
|
),
|
297
297
|
api_calls=[Session.call],
|
298
|
-
custom_tags=
|
298
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
299
299
|
)
|
300
300
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
301
301
|
pd_df.columns = dataset.columns
|
@@ -628,7 +628,14 @@ class LassoLarsIC(BaseTransformer):
|
|
628
628
|
) -> List[str]:
|
629
629
|
# in case the inferred output column names dimension is different
|
630
630
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
631
|
-
|
631
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
632
|
+
|
633
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
634
|
+
# seen during the fit.
|
635
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
636
|
+
sample_pd_df.columns = snowpark_column_names
|
637
|
+
|
638
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
632
639
|
output_df_columns = list(output_df_pd.columns)
|
633
640
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
634
641
|
if self.sample_weight_col:
|
@@ -248,7 +248,7 @@ class LinearRegression(BaseTransformer):
|
|
248
248
|
inspect.currentframe(), LinearRegression.__class__.__name__
|
249
249
|
),
|
250
250
|
api_calls=[Session.call],
|
251
|
-
custom_tags=
|
251
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
252
252
|
)
|
253
253
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
254
254
|
pd_df.columns = dataset.columns
|
@@ -581,7 +581,14 @@ class LinearRegression(BaseTransformer):
|
|
581
581
|
) -> List[str]:
|
582
582
|
# in case the inferred output column names dimension is different
|
583
583
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
584
|
-
|
584
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
585
|
+
|
586
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
587
|
+
# seen during the fit.
|
588
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
589
|
+
sample_pd_df.columns = snowpark_column_names
|
590
|
+
|
591
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
585
592
|
output_df_columns = list(output_df_pd.columns)
|
586
593
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
587
594
|
if self.sample_weight_col:
|
@@ -362,7 +362,7 @@ class LogisticRegression(BaseTransformer):
|
|
362
362
|
inspect.currentframe(), LogisticRegression.__class__.__name__
|
363
363
|
),
|
364
364
|
api_calls=[Session.call],
|
365
|
-
custom_tags=
|
365
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
366
366
|
)
|
367
367
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
368
368
|
pd_df.columns = dataset.columns
|
@@ -695,7 +695,14 @@ class LogisticRegression(BaseTransformer):
|
|
695
695
|
) -> List[str]:
|
696
696
|
# in case the inferred output column names dimension is different
|
697
697
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
698
|
-
|
698
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
699
|
+
|
700
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
701
|
+
# seen during the fit.
|
702
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
703
|
+
sample_pd_df.columns = snowpark_column_names
|
704
|
+
|
705
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
699
706
|
output_df_columns = list(output_df_pd.columns)
|
700
707
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
701
708
|
if self.sample_weight_col:
|
@@ -383,7 +383,7 @@ class LogisticRegressionCV(BaseTransformer):
|
|
383
383
|
inspect.currentframe(), LogisticRegressionCV.__class__.__name__
|
384
384
|
),
|
385
385
|
api_calls=[Session.call],
|
386
|
-
custom_tags=
|
386
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
387
387
|
)
|
388
388
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
389
389
|
pd_df.columns = dataset.columns
|
@@ -716,7 +716,14 @@ class LogisticRegressionCV(BaseTransformer):
|
|
716
716
|
) -> List[str]:
|
717
717
|
# in case the inferred output column names dimension is different
|
718
718
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
719
|
-
|
719
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
720
|
+
|
721
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
722
|
+
# seen during the fit.
|
723
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
724
|
+
sample_pd_df.columns = snowpark_column_names
|
725
|
+
|
726
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
720
727
|
output_df_columns = list(output_df_pd.columns)
|
721
728
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
722
729
|
if self.sample_weight_col:
|
@@ -281,7 +281,7 @@ class MultiTaskElasticNet(BaseTransformer):
|
|
281
281
|
inspect.currentframe(), MultiTaskElasticNet.__class__.__name__
|
282
282
|
),
|
283
283
|
api_calls=[Session.call],
|
284
|
-
custom_tags=
|
284
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
285
285
|
)
|
286
286
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
287
287
|
pd_df.columns = dataset.columns
|
@@ -614,7 +614,14 @@ class MultiTaskElasticNet(BaseTransformer):
|
|
614
614
|
) -> List[str]:
|
615
615
|
# in case the inferred output column names dimension is different
|
616
616
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
617
|
-
|
617
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
618
|
+
|
619
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
620
|
+
# seen during the fit.
|
621
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
622
|
+
sample_pd_df.columns = snowpark_column_names
|
623
|
+
|
624
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
618
625
|
output_df_columns = list(output_df_pd.columns)
|
619
626
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
620
627
|
if self.sample_weight_col:
|
@@ -322,7 +322,7 @@ class MultiTaskElasticNetCV(BaseTransformer):
|
|
322
322
|
inspect.currentframe(), MultiTaskElasticNetCV.__class__.__name__
|
323
323
|
),
|
324
324
|
api_calls=[Session.call],
|
325
|
-
custom_tags=
|
325
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
326
326
|
)
|
327
327
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
328
328
|
pd_df.columns = dataset.columns
|
@@ -655,7 +655,14 @@ class MultiTaskElasticNetCV(BaseTransformer):
|
|
655
655
|
) -> List[str]:
|
656
656
|
# in case the inferred output column names dimension is different
|
657
657
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
658
|
-
|
658
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
659
|
+
|
660
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
661
|
+
# seen during the fit.
|
662
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
663
|
+
sample_pd_df.columns = snowpark_column_names
|
664
|
+
|
665
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
659
666
|
output_df_columns = list(output_df_pd.columns)
|
660
667
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
661
668
|
if self.sample_weight_col:
|
@@ -273,7 +273,7 @@ class MultiTaskLasso(BaseTransformer):
|
|
273
273
|
inspect.currentframe(), MultiTaskLasso.__class__.__name__
|
274
274
|
),
|
275
275
|
api_calls=[Session.call],
|
276
|
-
custom_tags=
|
276
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
277
277
|
)
|
278
278
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
279
279
|
pd_df.columns = dataset.columns
|
@@ -606,7 +606,14 @@ class MultiTaskLasso(BaseTransformer):
|
|
606
606
|
) -> List[str]:
|
607
607
|
# in case the inferred output column names dimension is different
|
608
608
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
609
|
-
|
609
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
610
|
+
|
611
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
612
|
+
# seen during the fit.
|
613
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
614
|
+
sample_pd_df.columns = snowpark_column_names
|
615
|
+
|
616
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
610
617
|
output_df_columns = list(output_df_pd.columns)
|
611
618
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
612
619
|
if self.sample_weight_col:
|
@@ -308,7 +308,7 @@ class MultiTaskLassoCV(BaseTransformer):
|
|
308
308
|
inspect.currentframe(), MultiTaskLassoCV.__class__.__name__
|
309
309
|
),
|
310
310
|
api_calls=[Session.call],
|
311
|
-
custom_tags=
|
311
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
312
312
|
)
|
313
313
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
314
314
|
pd_df.columns = dataset.columns
|
@@ -641,7 +641,14 @@ class MultiTaskLassoCV(BaseTransformer):
|
|
641
641
|
) -> List[str]:
|
642
642
|
# in case the inferred output column names dimension is different
|
643
643
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
644
|
-
|
644
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
645
|
+
|
646
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
647
|
+
# seen during the fit.
|
648
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
649
|
+
sample_pd_df.columns = snowpark_column_names
|
650
|
+
|
651
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
645
652
|
output_df_columns = list(output_df_pd.columns)
|
646
653
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
647
654
|
if self.sample_weight_col:
|
@@ -256,7 +256,7 @@ class OrthogonalMatchingPursuit(BaseTransformer):
|
|
256
256
|
inspect.currentframe(), OrthogonalMatchingPursuit.__class__.__name__
|
257
257
|
),
|
258
258
|
api_calls=[Session.call],
|
259
|
-
custom_tags=
|
259
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
260
260
|
)
|
261
261
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
262
262
|
pd_df.columns = dataset.columns
|
@@ -589,7 +589,14 @@ class OrthogonalMatchingPursuit(BaseTransformer):
|
|
589
589
|
) -> List[str]:
|
590
590
|
# in case the inferred output column names dimension is different
|
591
591
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
592
|
-
|
592
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
593
|
+
|
594
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
595
|
+
# seen during the fit.
|
596
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
597
|
+
sample_pd_df.columns = snowpark_column_names
|
598
|
+
|
599
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
593
600
|
output_df_columns = list(output_df_pd.columns)
|
594
601
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
595
602
|
if self.sample_weight_col:
|
@@ -330,7 +330,7 @@ class PassiveAggressiveClassifier(BaseTransformer):
|
|
330
330
|
inspect.currentframe(), PassiveAggressiveClassifier.__class__.__name__
|
331
331
|
),
|
332
332
|
api_calls=[Session.call],
|
333
|
-
custom_tags=
|
333
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
334
334
|
)
|
335
335
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
336
336
|
pd_df.columns = dataset.columns
|
@@ -663,7 +663,14 @@ class PassiveAggressiveClassifier(BaseTransformer):
|
|
663
663
|
) -> List[str]:
|
664
664
|
# in case the inferred output column names dimension is different
|
665
665
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
666
|
-
|
666
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
667
|
+
|
668
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
669
|
+
# seen during the fit.
|
670
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
671
|
+
sample_pd_df.columns = snowpark_column_names
|
672
|
+
|
673
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
667
674
|
output_df_columns = list(output_df_pd.columns)
|
668
675
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
669
676
|
if self.sample_weight_col:
|