snowflake-ml-python 1.5.1__py3-none-any.whl → 1.5.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/cortex/_complete.py +26 -5
- snowflake/cortex/_sentiment.py +7 -4
- snowflake/cortex/_sse_client.py +81 -0
- snowflake/cortex/_util.py +105 -8
- snowflake/ml/_internal/lineage/lineage_utils.py +34 -25
- snowflake/ml/_internal/utils/temp_file_utils.py +5 -2
- snowflake/ml/dataset/dataset.py +15 -12
- snowflake/ml/dataset/dataset_factory.py +3 -4
- snowflake/ml/feature_store/access_manager.py +34 -30
- snowflake/ml/feature_store/feature_store.py +3 -3
- snowflake/ml/feature_store/feature_view.py +12 -11
- snowflake/ml/fileset/snowfs.py +2 -31
- snowflake/ml/model/_client/ops/model_ops.py +43 -0
- snowflake/ml/model/_client/sql/model_version.py +55 -3
- snowflake/ml/model/_model_composer/model_composer.py +7 -3
- snowflake/ml/model/_model_composer/model_method/infer_table_function.py_template +3 -1
- snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
- snowflake/ml/model/_packager/model_meta/model_meta.py +1 -3
- snowflake/ml/model/_packager/model_runtime/_snowml_inference_alternative_requirements.py +1 -1
- snowflake/ml/model/_packager/model_runtime/model_runtime.py +3 -27
- snowflake/ml/model/_signatures/builtins_handler.py +2 -1
- snowflake/ml/model/_signatures/core.py +13 -1
- snowflake/ml/model/_signatures/pandas_handler.py +2 -0
- snowflake/ml/model/_signatures/snowpark_handler.py +3 -3
- snowflake/ml/model/model_signature.py +2 -0
- snowflake/ml/model/type_hints.py +1 -0
- snowflake/ml/modeling/_internal/estimator_utils.py +58 -1
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +196 -242
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_search_udf_file.py +161 -0
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +38 -18
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +82 -134
- snowflake/ml/modeling/_internal/snowpark_implementations/xgboost_external_memory_trainer.py +21 -17
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +9 -2
- snowflake/ml/modeling/cluster/affinity_propagation.py +9 -2
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +9 -2
- snowflake/ml/modeling/cluster/birch.py +9 -2
- snowflake/ml/modeling/cluster/bisecting_k_means.py +9 -2
- snowflake/ml/modeling/cluster/dbscan.py +9 -2
- snowflake/ml/modeling/cluster/feature_agglomeration.py +9 -2
- snowflake/ml/modeling/cluster/k_means.py +9 -2
- snowflake/ml/modeling/cluster/mean_shift.py +9 -2
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +9 -2
- snowflake/ml/modeling/cluster/optics.py +9 -2
- snowflake/ml/modeling/cluster/spectral_biclustering.py +9 -2
- snowflake/ml/modeling/cluster/spectral_clustering.py +9 -2
- snowflake/ml/modeling/cluster/spectral_coclustering.py +9 -2
- snowflake/ml/modeling/compose/column_transformer.py +9 -2
- snowflake/ml/modeling/compose/transformed_target_regressor.py +9 -2
- snowflake/ml/modeling/covariance/elliptic_envelope.py +9 -2
- snowflake/ml/modeling/covariance/empirical_covariance.py +9 -2
- snowflake/ml/modeling/covariance/graphical_lasso.py +9 -2
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +9 -2
- snowflake/ml/modeling/covariance/ledoit_wolf.py +9 -2
- snowflake/ml/modeling/covariance/min_cov_det.py +9 -2
- snowflake/ml/modeling/covariance/oas.py +9 -2
- snowflake/ml/modeling/covariance/shrunk_covariance.py +9 -2
- snowflake/ml/modeling/decomposition/dictionary_learning.py +9 -2
- snowflake/ml/modeling/decomposition/factor_analysis.py +9 -2
- snowflake/ml/modeling/decomposition/fast_ica.py +9 -2
- snowflake/ml/modeling/decomposition/incremental_pca.py +9 -2
- snowflake/ml/modeling/decomposition/kernel_pca.py +9 -2
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +9 -2
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +9 -2
- snowflake/ml/modeling/decomposition/pca.py +9 -2
- snowflake/ml/modeling/decomposition/sparse_pca.py +9 -2
- snowflake/ml/modeling/decomposition/truncated_svd.py +9 -2
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +9 -2
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +9 -2
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +9 -2
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +9 -2
- snowflake/ml/modeling/ensemble/bagging_classifier.py +9 -2
- snowflake/ml/modeling/ensemble/bagging_regressor.py +9 -2
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +9 -2
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +9 -2
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +9 -2
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +9 -2
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +9 -2
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +9 -2
- snowflake/ml/modeling/ensemble/isolation_forest.py +9 -2
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +9 -2
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +9 -2
- snowflake/ml/modeling/ensemble/stacking_regressor.py +9 -2
- snowflake/ml/modeling/ensemble/voting_classifier.py +9 -2
- snowflake/ml/modeling/ensemble/voting_regressor.py +9 -2
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +9 -2
- snowflake/ml/modeling/feature_selection/select_fdr.py +9 -2
- snowflake/ml/modeling/feature_selection/select_fpr.py +9 -2
- snowflake/ml/modeling/feature_selection/select_fwe.py +9 -2
- snowflake/ml/modeling/feature_selection/select_k_best.py +9 -2
- snowflake/ml/modeling/feature_selection/select_percentile.py +9 -2
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +9 -2
- snowflake/ml/modeling/feature_selection/variance_threshold.py +9 -2
- snowflake/ml/modeling/framework/base.py +3 -8
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +9 -2
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +9 -2
- snowflake/ml/modeling/impute/iterative_imputer.py +9 -2
- snowflake/ml/modeling/impute/knn_imputer.py +9 -2
- snowflake/ml/modeling/impute/missing_indicator.py +9 -2
- snowflake/ml/modeling/impute/simple_imputer.py +28 -5
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +9 -2
- snowflake/ml/modeling/kernel_approximation/nystroem.py +9 -2
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +9 -2
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +9 -2
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +9 -2
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +9 -2
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +9 -2
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +9 -2
- snowflake/ml/modeling/linear_model/ard_regression.py +9 -2
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +9 -2
- snowflake/ml/modeling/linear_model/elastic_net.py +9 -2
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +9 -2
- snowflake/ml/modeling/linear_model/gamma_regressor.py +9 -2
- snowflake/ml/modeling/linear_model/huber_regressor.py +9 -2
- snowflake/ml/modeling/linear_model/lars.py +9 -2
- snowflake/ml/modeling/linear_model/lars_cv.py +9 -2
- snowflake/ml/modeling/linear_model/lasso.py +9 -2
- snowflake/ml/modeling/linear_model/lasso_cv.py +9 -2
- snowflake/ml/modeling/linear_model/lasso_lars.py +9 -2
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +9 -2
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +9 -2
- snowflake/ml/modeling/linear_model/linear_regression.py +9 -2
- snowflake/ml/modeling/linear_model/logistic_regression.py +9 -2
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +9 -2
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +9 -2
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +9 -2
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +9 -2
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +9 -2
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +9 -2
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +9 -2
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +9 -2
- snowflake/ml/modeling/linear_model/perceptron.py +9 -2
- snowflake/ml/modeling/linear_model/poisson_regressor.py +9 -2
- snowflake/ml/modeling/linear_model/ransac_regressor.py +9 -2
- snowflake/ml/modeling/linear_model/ridge.py +9 -2
- snowflake/ml/modeling/linear_model/ridge_classifier.py +9 -2
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +9 -2
- snowflake/ml/modeling/linear_model/ridge_cv.py +9 -2
- snowflake/ml/modeling/linear_model/sgd_classifier.py +9 -2
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +9 -2
- snowflake/ml/modeling/linear_model/sgd_regressor.py +9 -2
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +9 -2
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +9 -2
- snowflake/ml/modeling/manifold/isomap.py +9 -2
- snowflake/ml/modeling/manifold/mds.py +9 -2
- snowflake/ml/modeling/manifold/spectral_embedding.py +9 -2
- snowflake/ml/modeling/manifold/tsne.py +9 -2
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +9 -2
- snowflake/ml/modeling/mixture/gaussian_mixture.py +9 -2
- snowflake/ml/modeling/model_selection/grid_search_cv.py +1 -5
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +1 -5
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +9 -2
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +9 -2
- snowflake/ml/modeling/multiclass/output_code_classifier.py +9 -2
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +9 -2
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +9 -2
- snowflake/ml/modeling/naive_bayes/complement_nb.py +9 -2
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +9 -2
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +9 -2
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +9 -2
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +9 -2
- snowflake/ml/modeling/neighbors/kernel_density.py +9 -2
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +9 -2
- snowflake/ml/modeling/neighbors/nearest_centroid.py +9 -2
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +9 -2
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +9 -2
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +9 -2
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +9 -2
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +9 -2
- snowflake/ml/modeling/neural_network/mlp_classifier.py +9 -2
- snowflake/ml/modeling/neural_network/mlp_regressor.py +9 -2
- snowflake/ml/modeling/parameters/enable_anonymous_sproc.py +5 -0
- snowflake/ml/modeling/pipeline/pipeline.py +5 -0
- snowflake/ml/modeling/preprocessing/binarizer.py +7 -3
- snowflake/ml/modeling/preprocessing/k_bins_discretizer.py +7 -2
- snowflake/ml/modeling/preprocessing/label_encoder.py +8 -7
- snowflake/ml/modeling/preprocessing/max_abs_scaler.py +7 -3
- snowflake/ml/modeling/preprocessing/min_max_scaler.py +7 -4
- snowflake/ml/modeling/preprocessing/normalizer.py +7 -3
- snowflake/ml/modeling/preprocessing/one_hot_encoder.py +10 -2
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +8 -5
- snowflake/ml/modeling/preprocessing/polynomial_features.py +9 -2
- snowflake/ml/modeling/preprocessing/robust_scaler.py +7 -4
- snowflake/ml/modeling/preprocessing/standard_scaler.py +7 -3
- snowflake/ml/modeling/semi_supervised/label_propagation.py +9 -2
- snowflake/ml/modeling/semi_supervised/label_spreading.py +9 -2
- snowflake/ml/modeling/svm/linear_svc.py +9 -2
- snowflake/ml/modeling/svm/linear_svr.py +9 -2
- snowflake/ml/modeling/svm/nu_svc.py +9 -2
- snowflake/ml/modeling/svm/nu_svr.py +9 -2
- snowflake/ml/modeling/svm/svc.py +9 -2
- snowflake/ml/modeling/svm/svr.py +9 -2
- snowflake/ml/modeling/tree/decision_tree_classifier.py +9 -2
- snowflake/ml/modeling/tree/decision_tree_regressor.py +9 -2
- snowflake/ml/modeling/tree/extra_tree_classifier.py +9 -2
- snowflake/ml/modeling/tree/extra_tree_regressor.py +9 -2
- snowflake/ml/modeling/xgboost/xgb_classifier.py +9 -2
- snowflake/ml/modeling/xgboost/xgb_regressor.py +9 -2
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +9 -2
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +9 -2
- snowflake/ml/registry/_manager/model_manager.py +59 -1
- snowflake/ml/registry/registry.py +10 -1
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.5.1.dist-info → snowflake_ml_python-1.5.3.dist-info}/METADATA +32 -4
- {snowflake_ml_python-1.5.1.dist-info → snowflake_ml_python-1.5.3.dist-info}/RECORD +207 -204
- {snowflake_ml_python-1.5.1.dist-info → snowflake_ml_python-1.5.3.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.5.1.dist-info → snowflake_ml_python-1.5.3.dist-info}/WHEEL +0 -0
- {snowflake_ml_python-1.5.1.dist-info → snowflake_ml_python-1.5.3.dist-info}/top_level.txt +0 -0
@@ -257,7 +257,7 @@ class CategoricalNB(BaseTransformer):
|
|
257
257
|
inspect.currentframe(), CategoricalNB.__class__.__name__
|
258
258
|
),
|
259
259
|
api_calls=[Session.call],
|
260
|
-
custom_tags=
|
260
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
261
261
|
)
|
262
262
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
263
263
|
pd_df.columns = dataset.columns
|
@@ -590,7 +590,14 @@ class CategoricalNB(BaseTransformer):
|
|
590
590
|
) -> List[str]:
|
591
591
|
# in case the inferred output column names dimension is different
|
592
592
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
593
|
-
|
593
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
594
|
+
|
595
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
596
|
+
# seen during the fit.
|
597
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
598
|
+
sample_pd_df.columns = snowpark_column_names
|
599
|
+
|
600
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
594
601
|
output_df_columns = list(output_df_pd.columns)
|
595
602
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
596
603
|
if self.sample_weight_col:
|
@@ -251,7 +251,7 @@ class ComplementNB(BaseTransformer):
|
|
251
251
|
inspect.currentframe(), ComplementNB.__class__.__name__
|
252
252
|
),
|
253
253
|
api_calls=[Session.call],
|
254
|
-
custom_tags=
|
254
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
255
255
|
)
|
256
256
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
257
257
|
pd_df.columns = dataset.columns
|
@@ -584,7 +584,14 @@ class ComplementNB(BaseTransformer):
|
|
584
584
|
) -> List[str]:
|
585
585
|
# in case the inferred output column names dimension is different
|
586
586
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
587
|
-
|
587
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
588
|
+
|
589
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
590
|
+
# seen during the fit.
|
591
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
592
|
+
sample_pd_df.columns = snowpark_column_names
|
593
|
+
|
594
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
588
595
|
output_df_columns = list(output_df_pd.columns)
|
589
596
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
590
597
|
if self.sample_weight_col:
|
@@ -232,7 +232,7 @@ class GaussianNB(BaseTransformer):
|
|
232
232
|
inspect.currentframe(), GaussianNB.__class__.__name__
|
233
233
|
),
|
234
234
|
api_calls=[Session.call],
|
235
|
-
custom_tags=
|
235
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
236
236
|
)
|
237
237
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
238
238
|
pd_df.columns = dataset.columns
|
@@ -565,7 +565,14 @@ class GaussianNB(BaseTransformer):
|
|
565
565
|
) -> List[str]:
|
566
566
|
# in case the inferred output column names dimension is different
|
567
567
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
568
|
-
|
568
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
569
|
+
|
570
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
571
|
+
# seen during the fit.
|
572
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
573
|
+
sample_pd_df.columns = snowpark_column_names
|
574
|
+
|
575
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
569
576
|
output_df_columns = list(output_df_pd.columns)
|
570
577
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
571
578
|
if self.sample_weight_col:
|
@@ -245,7 +245,7 @@ class MultinomialNB(BaseTransformer):
|
|
245
245
|
inspect.currentframe(), MultinomialNB.__class__.__name__
|
246
246
|
),
|
247
247
|
api_calls=[Session.call],
|
248
|
-
custom_tags=
|
248
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
249
249
|
)
|
250
250
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
251
251
|
pd_df.columns = dataset.columns
|
@@ -578,7 +578,14 @@ class MultinomialNB(BaseTransformer):
|
|
578
578
|
) -> List[str]:
|
579
579
|
# in case the inferred output column names dimension is different
|
580
580
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
581
|
-
|
581
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
582
|
+
|
583
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
584
|
+
# seen during the fit.
|
585
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
586
|
+
sample_pd_df.columns = snowpark_column_names
|
587
|
+
|
588
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
582
589
|
output_df_columns = list(output_df_pd.columns)
|
583
590
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
584
591
|
if self.sample_weight_col:
|
@@ -302,7 +302,7 @@ class KNeighborsClassifier(BaseTransformer):
|
|
302
302
|
inspect.currentframe(), KNeighborsClassifier.__class__.__name__
|
303
303
|
),
|
304
304
|
api_calls=[Session.call],
|
305
|
-
custom_tags=
|
305
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
306
306
|
)
|
307
307
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
308
308
|
pd_df.columns = dataset.columns
|
@@ -635,7 +635,14 @@ class KNeighborsClassifier(BaseTransformer):
|
|
635
635
|
) -> List[str]:
|
636
636
|
# in case the inferred output column names dimension is different
|
637
637
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
638
|
-
|
638
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
639
|
+
|
640
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
641
|
+
# seen during the fit.
|
642
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
643
|
+
sample_pd_df.columns = snowpark_column_names
|
644
|
+
|
645
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
639
646
|
output_df_columns = list(output_df_pd.columns)
|
640
647
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
641
648
|
if self.sample_weight_col:
|
@@ -304,7 +304,7 @@ class KNeighborsRegressor(BaseTransformer):
|
|
304
304
|
inspect.currentframe(), KNeighborsRegressor.__class__.__name__
|
305
305
|
),
|
306
306
|
api_calls=[Session.call],
|
307
|
-
custom_tags=
|
307
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
308
308
|
)
|
309
309
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
310
310
|
pd_df.columns = dataset.columns
|
@@ -637,7 +637,14 @@ class KNeighborsRegressor(BaseTransformer):
|
|
637
637
|
) -> List[str]:
|
638
638
|
# in case the inferred output column names dimension is different
|
639
639
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
640
|
-
|
640
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
641
|
+
|
642
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
643
|
+
# seen during the fit.
|
644
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
645
|
+
sample_pd_df.columns = snowpark_column_names
|
646
|
+
|
647
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
641
648
|
output_df_columns = list(output_df_pd.columns)
|
642
649
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
643
650
|
if self.sample_weight_col:
|
@@ -281,7 +281,7 @@ class KernelDensity(BaseTransformer):
|
|
281
281
|
inspect.currentframe(), KernelDensity.__class__.__name__
|
282
282
|
),
|
283
283
|
api_calls=[Session.call],
|
284
|
-
custom_tags=
|
284
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
285
285
|
)
|
286
286
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
287
287
|
pd_df.columns = dataset.columns
|
@@ -612,7 +612,14 @@ class KernelDensity(BaseTransformer):
|
|
612
612
|
) -> List[str]:
|
613
613
|
# in case the inferred output column names dimension is different
|
614
614
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
615
|
-
|
615
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
616
|
+
|
617
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
618
|
+
# seen during the fit.
|
619
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
620
|
+
sample_pd_df.columns = snowpark_column_names
|
621
|
+
|
622
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
616
623
|
output_df_columns = list(output_df_pd.columns)
|
617
624
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
618
625
|
if self.sample_weight_col:
|
@@ -309,7 +309,7 @@ class LocalOutlierFactor(BaseTransformer):
|
|
309
309
|
inspect.currentframe(), LocalOutlierFactor.__class__.__name__
|
310
310
|
),
|
311
311
|
api_calls=[Session.call],
|
312
|
-
custom_tags=
|
312
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
313
313
|
)
|
314
314
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
315
315
|
pd_df.columns = dataset.columns
|
@@ -644,7 +644,14 @@ class LocalOutlierFactor(BaseTransformer):
|
|
644
644
|
) -> List[str]:
|
645
645
|
# in case the inferred output column names dimension is different
|
646
646
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
647
|
-
|
647
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
648
|
+
|
649
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
650
|
+
# seen during the fit.
|
651
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
652
|
+
sample_pd_df.columns = snowpark_column_names
|
653
|
+
|
654
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
648
655
|
output_df_columns = list(output_df_pd.columns)
|
649
656
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
650
657
|
if self.sample_weight_col:
|
@@ -242,7 +242,7 @@ class NearestCentroid(BaseTransformer):
|
|
242
242
|
inspect.currentframe(), NearestCentroid.__class__.__name__
|
243
243
|
),
|
244
244
|
api_calls=[Session.call],
|
245
|
-
custom_tags=
|
245
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
246
246
|
)
|
247
247
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
248
248
|
pd_df.columns = dataset.columns
|
@@ -575,7 +575,14 @@ class NearestCentroid(BaseTransformer):
|
|
575
575
|
) -> List[str]:
|
576
576
|
# in case the inferred output column names dimension is different
|
577
577
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
578
|
-
|
578
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
579
|
+
|
580
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
581
|
+
# seen during the fit.
|
582
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
583
|
+
sample_pd_df.columns = snowpark_column_names
|
584
|
+
|
585
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
579
586
|
output_df_columns = list(output_df_pd.columns)
|
580
587
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
581
588
|
if self.sample_weight_col:
|
@@ -292,7 +292,7 @@ class NearestNeighbors(BaseTransformer):
|
|
292
292
|
inspect.currentframe(), NearestNeighbors.__class__.__name__
|
293
293
|
),
|
294
294
|
api_calls=[Session.call],
|
295
|
-
custom_tags=
|
295
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
296
296
|
)
|
297
297
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
298
298
|
pd_df.columns = dataset.columns
|
@@ -623,7 +623,14 @@ class NearestNeighbors(BaseTransformer):
|
|
623
623
|
) -> List[str]:
|
624
624
|
# in case the inferred output column names dimension is different
|
625
625
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
626
|
-
|
626
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
627
|
+
|
628
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
629
|
+
# seen during the fit.
|
630
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
631
|
+
sample_pd_df.columns = snowpark_column_names
|
632
|
+
|
633
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
627
634
|
output_df_columns = list(output_df_pd.columns)
|
628
635
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
629
636
|
if self.sample_weight_col:
|
@@ -313,7 +313,7 @@ class NeighborhoodComponentsAnalysis(BaseTransformer):
|
|
313
313
|
inspect.currentframe(), NeighborhoodComponentsAnalysis.__class__.__name__
|
314
314
|
),
|
315
315
|
api_calls=[Session.call],
|
316
|
-
custom_tags=
|
316
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
317
317
|
)
|
318
318
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
319
319
|
pd_df.columns = dataset.columns
|
@@ -648,7 +648,14 @@ class NeighborhoodComponentsAnalysis(BaseTransformer):
|
|
648
648
|
) -> List[str]:
|
649
649
|
# in case the inferred output column names dimension is different
|
650
650
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
651
|
-
|
651
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
652
|
+
|
653
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
654
|
+
# seen during the fit.
|
655
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
656
|
+
sample_pd_df.columns = snowpark_column_names
|
657
|
+
|
658
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
652
659
|
output_df_columns = list(output_df_pd.columns)
|
653
660
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
654
661
|
if self.sample_weight_col:
|
@@ -314,7 +314,7 @@ class RadiusNeighborsClassifier(BaseTransformer):
|
|
314
314
|
inspect.currentframe(), RadiusNeighborsClassifier.__class__.__name__
|
315
315
|
),
|
316
316
|
api_calls=[Session.call],
|
317
|
-
custom_tags=
|
317
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
318
318
|
)
|
319
319
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
320
320
|
pd_df.columns = dataset.columns
|
@@ -647,7 +647,14 @@ class RadiusNeighborsClassifier(BaseTransformer):
|
|
647
647
|
) -> List[str]:
|
648
648
|
# in case the inferred output column names dimension is different
|
649
649
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
650
|
-
|
650
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
651
|
+
|
652
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
653
|
+
# seen during the fit.
|
654
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
655
|
+
sample_pd_df.columns = snowpark_column_names
|
656
|
+
|
657
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
651
658
|
output_df_columns = list(output_df_pd.columns)
|
652
659
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
653
660
|
if self.sample_weight_col:
|
@@ -304,7 +304,7 @@ class RadiusNeighborsRegressor(BaseTransformer):
|
|
304
304
|
inspect.currentframe(), RadiusNeighborsRegressor.__class__.__name__
|
305
305
|
),
|
306
306
|
api_calls=[Session.call],
|
307
|
-
custom_tags=
|
307
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
308
308
|
)
|
309
309
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
310
310
|
pd_df.columns = dataset.columns
|
@@ -637,7 +637,14 @@ class RadiusNeighborsRegressor(BaseTransformer):
|
|
637
637
|
) -> List[str]:
|
638
638
|
# in case the inferred output column names dimension is different
|
639
639
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
640
|
-
|
640
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
641
|
+
|
642
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
643
|
+
# seen during the fit.
|
644
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
645
|
+
sample_pd_df.columns = snowpark_column_names
|
646
|
+
|
647
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
641
648
|
output_df_columns = list(output_df_pd.columns)
|
642
649
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
643
650
|
if self.sample_weight_col:
|
@@ -261,7 +261,7 @@ class BernoulliRBM(BaseTransformer):
|
|
261
261
|
inspect.currentframe(), BernoulliRBM.__class__.__name__
|
262
262
|
),
|
263
263
|
api_calls=[Session.call],
|
264
|
-
custom_tags=
|
264
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
265
265
|
)
|
266
266
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
267
267
|
pd_df.columns = dataset.columns
|
@@ -596,7 +596,14 @@ class BernoulliRBM(BaseTransformer):
|
|
596
596
|
) -> List[str]:
|
597
597
|
# in case the inferred output column names dimension is different
|
598
598
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
599
|
-
|
599
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
600
|
+
|
601
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
602
|
+
# seen during the fit.
|
603
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
604
|
+
sample_pd_df.columns = snowpark_column_names
|
605
|
+
|
606
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
600
607
|
output_df_columns = list(output_df_pd.columns)
|
601
608
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
602
609
|
if self.sample_weight_col:
|
@@ -416,7 +416,7 @@ class MLPClassifier(BaseTransformer):
|
|
416
416
|
inspect.currentframe(), MLPClassifier.__class__.__name__
|
417
417
|
),
|
418
418
|
api_calls=[Session.call],
|
419
|
-
custom_tags=
|
419
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
420
420
|
)
|
421
421
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
422
422
|
pd_df.columns = dataset.columns
|
@@ -749,7 +749,14 @@ class MLPClassifier(BaseTransformer):
|
|
749
749
|
) -> List[str]:
|
750
750
|
# in case the inferred output column names dimension is different
|
751
751
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
752
|
-
|
752
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
753
|
+
|
754
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
755
|
+
# seen during the fit.
|
756
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
757
|
+
sample_pd_df.columns = snowpark_column_names
|
758
|
+
|
759
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
753
760
|
output_df_columns = list(output_df_pd.columns)
|
754
761
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
755
762
|
if self.sample_weight_col:
|
@@ -412,7 +412,7 @@ class MLPRegressor(BaseTransformer):
|
|
412
412
|
inspect.currentframe(), MLPRegressor.__class__.__name__
|
413
413
|
),
|
414
414
|
api_calls=[Session.call],
|
415
|
-
custom_tags=
|
415
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
416
416
|
)
|
417
417
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
418
418
|
pd_df.columns = dataset.columns
|
@@ -745,7 +745,14 @@ class MLPRegressor(BaseTransformer):
|
|
745
745
|
) -> List[str]:
|
746
746
|
# in case the inferred output column names dimension is different
|
747
747
|
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
748
|
-
|
748
|
+
sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
|
749
|
+
|
750
|
+
# Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
|
751
|
+
# seen during the fit.
|
752
|
+
snowpark_column_names = dataset.select(self.input_cols).columns
|
753
|
+
sample_pd_df.columns = snowpark_column_names
|
754
|
+
|
755
|
+
output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
|
749
756
|
output_df_columns = list(output_df_pd.columns)
|
750
757
|
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
751
758
|
if self.sample_weight_col:
|
@@ -17,6 +17,7 @@ from sklearn.utils import metaestimators
|
|
17
17
|
from snowflake import snowpark
|
18
18
|
from snowflake.ml._internal import file_utils, telemetry
|
19
19
|
from snowflake.ml._internal.exceptions import error_codes, exceptions
|
20
|
+
from snowflake.ml._internal.lineage import lineage_utils
|
20
21
|
from snowflake.ml._internal.utils import snowpark_dataframe_utils, temp_file_utils
|
21
22
|
from snowflake.ml.model.model_signature import ModelSignature, _infer_signature
|
22
23
|
from snowflake.ml.modeling._internal.model_transformer_builder import (
|
@@ -427,6 +428,10 @@ class Pipeline(base.BaseTransformer):
|
|
427
428
|
else dataset
|
428
429
|
)
|
429
430
|
|
431
|
+
# Extract lineage information here since we're overriding fit() directly
|
432
|
+
data_sources = lineage_utils.get_data_sources(dataset)
|
433
|
+
lineage_utils.set_data_sources(self, data_sources)
|
434
|
+
|
430
435
|
if self._can_be_trained_in_ml_runtime(dataset):
|
431
436
|
if not self._is_convertible_to_sklearn:
|
432
437
|
raise ValueError("This pipeline cannot be converted to an sklearn pipeline.")
|
@@ -25,11 +25,15 @@ class Binarizer(base.BaseTransformer):
|
|
25
25
|
Feature values below or equal to this are replaced by 0, above it by 1. Default values is 0.0.
|
26
26
|
|
27
27
|
input_cols: Optional[Union[str, Iterable[str]]], default=None
|
28
|
-
The name(s) of one or more columns in
|
28
|
+
The name(s) of one or more columns in the input DataFrame containing feature(s) to be binarized. Input
|
29
|
+
columns must be specified before transform with this argument or after initialization with the
|
30
|
+
`set_input_cols` method. This argument is optional for API consistency.
|
29
31
|
|
30
32
|
output_cols: Optional[Union[str, Iterable[str]]], default=None
|
31
|
-
The name(s)
|
32
|
-
columns specified must
|
33
|
+
The name(s) to assign output columns in the output DataFrame. The number of
|
34
|
+
columns specified must equal the number of input columns. Output columns must be specified before transform
|
35
|
+
with this argument or after initialization with the `set_output_cols` method. This argument is optional for
|
36
|
+
API consistency.
|
33
37
|
|
34
38
|
passthrough_cols: Optional[Union[str, Iterable[str]]], default=None
|
35
39
|
A string or a list of strings indicating column names to be excluded from any
|
@@ -74,10 +74,15 @@ class KBinsDiscretizer(base.BaseTransformer):
|
|
74
74
|
- 'quantile': All bins in each feature have the same number of points.
|
75
75
|
|
76
76
|
input_cols: str or Iterable [column_name], default=None
|
77
|
-
|
77
|
+
The name(s) of one or more columns in the input DataFrame containing feature(s) to be discretized.
|
78
|
+
Input columns must be specified before fit with this argument or after initialization with the
|
79
|
+
`set_input_cols` method. This argument is optional for API consistency.
|
78
80
|
|
79
81
|
output_cols: str or Iterable [column_name], default=None
|
80
|
-
|
82
|
+
The name(s) to assign output columns in the output DataFrame. The number of
|
83
|
+
columns specified must equal the number of input columns. Output columns must be specified before transform
|
84
|
+
with this argument or after initialization with the `set_output_cols` method. This argument is optional for
|
85
|
+
API consistency.
|
81
86
|
|
82
87
|
passthrough_cols: A string or a list of strings indicating column names to be excluded from any
|
83
88
|
operations (such as train, transform, or inference). These specified column(s)
|
@@ -25,11 +25,12 @@ class LabelEncoder(base.BaseTransformer):
|
|
25
25
|
|
26
26
|
Args:
|
27
27
|
input_cols: Optional[Union[str, List[str]]]
|
28
|
-
The name of a column
|
28
|
+
The name of a column or a list containing one column name to be encoded in the input DataFrame. There must
|
29
|
+
be exactly one input column specified before fit. This argument is optional for API consistency.
|
29
30
|
|
30
31
|
output_cols: Optional[Union[str, List[str]]]
|
31
|
-
The name of a column
|
32
|
-
|
32
|
+
The name of a column or a list containing one column name where the results will be stored. There must be
|
33
|
+
exactly one output column specified before trainsform. This argument is optional for API consistency.
|
33
34
|
|
34
35
|
passthrough_cols: Optional[Union[str, List[str]]]
|
35
36
|
A string or a list of strings indicating column names to be excluded from any
|
@@ -54,11 +55,11 @@ class LabelEncoder(base.BaseTransformer):
|
|
54
55
|
|
55
56
|
Args:
|
56
57
|
input_cols: Optional[Union[str, List[str]]]
|
57
|
-
The name of a column
|
58
|
-
|
58
|
+
The name of a column or a list containing one column name to be encoded in the input DataFrame. There
|
59
|
+
must be exactly one input column specified before fit. This argument is optional for API consistency.
|
59
60
|
output_cols: Optional[Union[str, List[str]]]
|
60
|
-
The name of a column
|
61
|
-
|
61
|
+
The name of a column or a list containing one column name where the results will be stored. There must
|
62
|
+
be exactly one output column specified before transform. This argument is optional for API consistency.
|
62
63
|
passthrough_cols: Optional[Union[str, List[str]]]
|
63
64
|
A string or a list of strings indicating column names to be excluded from any
|
64
65
|
operations (such as train, transform, or inference). These specified column(s)
|
@@ -28,11 +28,15 @@ class MaxAbsScaler(base.BaseTransformer):
|
|
28
28
|
|
29
29
|
Args:
|
30
30
|
input_cols: Optional[Union[str, List[str]]], default=None
|
31
|
-
The name(s) of one or more columns in
|
31
|
+
The name(s) of one or more columns in the input DataFrame containing feature(s) to be scaled. Input
|
32
|
+
columns must be specified before fit with this argument or after initialization with the
|
33
|
+
`set_input_cols` method. This argument is optional for API consistency.
|
32
34
|
|
33
35
|
output_cols: Optional[Union[str, List[str]]], default=None
|
34
|
-
The name(s)
|
35
|
-
columns specified must
|
36
|
+
The name(s) to assign output columns in the output DataFrame. The number of
|
37
|
+
columns specified must equal the number of input columns. Output columns must be specified before transform
|
38
|
+
with this argument or after initialization with the `set_output_cols` method. This argument is optional for
|
39
|
+
API consistency.
|
36
40
|
|
37
41
|
passthrough_cols: Optional[Union[str, List[str]]], default=None
|
38
42
|
A string or a list of strings indicating column names to be excluded from any
|
@@ -29,12 +29,15 @@ class MinMaxScaler(base.BaseTransformer):
|
|
29
29
|
Whether to clip transformed values of held-out data to the specified feature range (default is True).
|
30
30
|
|
31
31
|
input_cols: Optional[Union[str, List[str]]], default=None
|
32
|
-
The name(s) of one or more columns in
|
33
|
-
|
32
|
+
The name(s) of one or more columns in the input DataFrame containing feature(s) to be scaled. Input
|
33
|
+
columns must be specified before fit with this argument or after initialization with the
|
34
|
+
`set_input_cols` method. This argument is optional for API consistency.
|
34
35
|
|
35
36
|
output_cols: Optional[Union[str, List[str]]], default=None
|
36
|
-
The name(s)
|
37
|
-
columns specified must
|
37
|
+
The name(s) to assign output columns in the output DataFrame. The number of
|
38
|
+
columns specified must equal the number of input columns. Output columns must be specified before transform
|
39
|
+
with this argument or after initialization with the `set_output_cols` method. This argument is optional for
|
40
|
+
API consistency.
|
38
41
|
|
39
42
|
passthrough_cols: Optional[Union[str, List[str]]], default=None
|
40
43
|
A string or a list of strings indicating column names to be excluded from any
|
@@ -28,11 +28,15 @@ class Normalizer(base.BaseTransformer):
|
|
28
28
|
values. It must be one of 'l1', 'l2', or 'max'.
|
29
29
|
|
30
30
|
input_cols: Optional[Union[str, List[str]]]
|
31
|
-
|
31
|
+
The name(s) of one or more columns in the input DataFrame containing feature(s) to be normalized. Input
|
32
|
+
columns must be specified before transform with this argument or after initialization with the
|
33
|
+
`set_input_cols` method. This argument is optional for API consistency.
|
32
34
|
|
33
35
|
output_cols: Optional[Union[str, List[str]]]
|
34
|
-
|
35
|
-
|
36
|
+
The name(s) to assign output columns in the output DataFrame. The number of
|
37
|
+
columns specified must equal the number of input columns. Output columns must be specified before transform
|
38
|
+
with this argument or after initialization with the `set_output_cols` method. This argument is optional for
|
39
|
+
API consistency.
|
36
40
|
|
37
41
|
passthrough_cols: Optional[Union[str, List[str]]]
|
38
42
|
A string or a list of strings indicating column names to be excluded from any
|
@@ -157,10 +157,18 @@ class OneHotEncoder(base.BaseTransformer):
|
|
157
157
|
there is no limit to the number of output features.
|
158
158
|
|
159
159
|
input_cols: Optional[Union[str, List[str]]], default=None
|
160
|
-
|
160
|
+
The name(s) of one or more columns in the input DataFrame containing feature(s) to be encoded. Input
|
161
|
+
columns must be specified before fit with this argument or after initialization with the
|
162
|
+
`set_input_cols` method. This argument is optional for API consistency.
|
161
163
|
|
162
164
|
output_cols: Optional[Union[str, List[str]]], default=None
|
163
|
-
|
165
|
+
The prefix to be used for encoded output for each input column. The number of
|
166
|
+
output column prefixes specified must match the number of input columns. Output column prefixes must be
|
167
|
+
specified before transform with this argument or after initialization with the `set_output_cols` method.
|
168
|
+
|
169
|
+
Note: Dense output column names are case-sensitive and resolve identifiers following Snowflake rules, e.g.
|
170
|
+
`"PREFIX_a"`, `PREFIX_A`, `"prefix_A"`. Therefore, there is no need to provide double-quoted column names
|
171
|
+
as that would result in invalid identifiers.
|
164
172
|
|
165
173
|
passthrough_cols: Optional[Union[str, List[str]]]
|
166
174
|
A string or a list of strings indicating column names to be excluded from any
|