snowflake-ml-python 1.5.2__py3-none-any.whl → 1.5.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/cortex/__init__.py +2 -1
- snowflake/cortex/_complete.py +240 -16
- snowflake/cortex/_extract_answer.py +0 -1
- snowflake/cortex/_sentiment.py +0 -1
- snowflake/cortex/_sse_client.py +81 -0
- snowflake/cortex/_summarize.py +0 -1
- snowflake/cortex/_translate.py +0 -1
- snowflake/cortex/_util.py +34 -10
- snowflake/ml/_internal/container_services/image_registry/http_client.py +10 -3
- snowflake/ml/_internal/container_services/image_registry/imagelib.py +23 -10
- snowflake/ml/_internal/container_services/image_registry/registry_client.py +7 -1
- snowflake/ml/_internal/exceptions/dataset_errors.py +7 -7
- snowflake/ml/_internal/exceptions/fileset_errors.py +3 -3
- snowflake/ml/_internal/exceptions/sql_error_codes.py +6 -0
- snowflake/ml/_internal/lineage/lineage_utils.py +34 -25
- snowflake/ml/_internal/telemetry.py +26 -0
- snowflake/ml/_internal/utils/identifier.py +14 -0
- snowflake/ml/_internal/utils/snowpark_dataframe_utils.py +15 -4
- snowflake/ml/dataset/dataset.py +54 -32
- snowflake/ml/dataset/dataset_factory.py +3 -4
- snowflake/ml/feature_store/feature_store.py +440 -243
- snowflake/ml/feature_store/feature_view.py +61 -9
- snowflake/ml/fileset/embedded_stage_fs.py +25 -21
- snowflake/ml/fileset/fileset.py +2 -2
- snowflake/ml/fileset/snowfs.py +4 -15
- snowflake/ml/fileset/stage_fs.py +6 -8
- snowflake/ml/lineage/__init__.py +3 -0
- snowflake/ml/lineage/lineage_node.py +139 -0
- snowflake/ml/model/_client/model/model_impl.py +47 -14
- snowflake/ml/model/_client/model/model_version_impl.py +82 -2
- snowflake/ml/model/_client/ops/model_ops.py +77 -5
- snowflake/ml/model/_client/sql/model.py +1 -0
- snowflake/ml/model/_client/sql/model_version.py +47 -4
- snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +2 -3
- snowflake/ml/model/_model_composer/model_composer.py +7 -6
- snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +7 -1
- snowflake/ml/model/_model_composer/model_method/function_generator.py +17 -1
- snowflake/ml/model/_model_composer/model_method/infer_partitioned.py_template +79 -0
- snowflake/ml/model/_model_composer/model_method/infer_table_function.py_template +5 -3
- snowflake/ml/model/_model_composer/model_method/model_method.py +5 -5
- snowflake/ml/model/_packager/model_handlers/_base.py +2 -2
- snowflake/ml/model/_packager/model_handlers/_utils.py +1 -0
- snowflake/ml/model/_packager/model_handlers/catboost.py +2 -2
- snowflake/ml/model/_packager/model_handlers/custom.py +12 -4
- snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +18 -15
- snowflake/ml/model/_packager/model_handlers/lightgbm.py +2 -2
- snowflake/ml/model/_packager/model_handlers/llm.py +2 -2
- snowflake/ml/model/_packager/model_handlers/mlflow.py +2 -2
- snowflake/ml/model/_packager/model_handlers/pytorch.py +2 -2
- snowflake/ml/model/_packager/model_handlers/sentence_transformers.py +2 -2
- snowflake/ml/model/_packager/model_handlers/sklearn.py +2 -2
- snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +2 -2
- snowflake/ml/model/_packager/model_handlers/tensorflow.py +2 -2
- snowflake/ml/model/_packager/model_handlers/torchscript.py +2 -2
- snowflake/ml/model/_packager/model_handlers/xgboost.py +2 -2
- snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
- snowflake/ml/model/_packager/model_meta/model_blob_meta.py +2 -0
- snowflake/ml/model/_packager/model_meta/model_meta.py +21 -1
- snowflake/ml/model/_packager/model_meta/model_meta_schema.py +6 -1
- snowflake/ml/model/_packager/model_packager.py +9 -4
- snowflake/ml/model/_packager/model_runtime/_snowml_inference_alternative_requirements.py +1 -1
- snowflake/ml/model/_signatures/builtins_handler.py +2 -1
- snowflake/ml/model/_signatures/core.py +13 -1
- snowflake/ml/model/_signatures/pandas_handler.py +2 -0
- snowflake/ml/model/_signatures/snowpark_handler.py +3 -3
- snowflake/ml/model/custom_model.py +22 -2
- snowflake/ml/model/model_signature.py +2 -0
- snowflake/ml/model/type_hints.py +74 -4
- snowflake/ml/modeling/_internal/estimator_utils.py +58 -1
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +158 -121
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_search_udf_file.py +2 -0
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +39 -18
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +88 -134
- snowflake/ml/modeling/_internal/snowpark_implementations/xgboost_external_memory_trainer.py +22 -17
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +1 -1
- snowflake/ml/modeling/cluster/affinity_propagation.py +5 -3
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +5 -3
- snowflake/ml/modeling/cluster/birch.py +5 -3
- snowflake/ml/modeling/cluster/bisecting_k_means.py +5 -3
- snowflake/ml/modeling/cluster/dbscan.py +5 -3
- snowflake/ml/modeling/cluster/feature_agglomeration.py +5 -3
- snowflake/ml/modeling/cluster/k_means.py +5 -3
- snowflake/ml/modeling/cluster/mean_shift.py +5 -3
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +5 -3
- snowflake/ml/modeling/cluster/optics.py +5 -3
- snowflake/ml/modeling/cluster/spectral_biclustering.py +5 -3
- snowflake/ml/modeling/cluster/spectral_clustering.py +5 -3
- snowflake/ml/modeling/cluster/spectral_coclustering.py +5 -3
- snowflake/ml/modeling/compose/column_transformer.py +5 -3
- snowflake/ml/modeling/compose/transformed_target_regressor.py +1 -1
- snowflake/ml/modeling/covariance/elliptic_envelope.py +5 -3
- snowflake/ml/modeling/covariance/empirical_covariance.py +5 -3
- snowflake/ml/modeling/covariance/graphical_lasso.py +5 -3
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +5 -3
- snowflake/ml/modeling/covariance/ledoit_wolf.py +5 -3
- snowflake/ml/modeling/covariance/min_cov_det.py +5 -3
- snowflake/ml/modeling/covariance/oas.py +5 -3
- snowflake/ml/modeling/covariance/shrunk_covariance.py +5 -3
- snowflake/ml/modeling/decomposition/dictionary_learning.py +5 -3
- snowflake/ml/modeling/decomposition/factor_analysis.py +5 -3
- snowflake/ml/modeling/decomposition/fast_ica.py +5 -3
- snowflake/ml/modeling/decomposition/incremental_pca.py +5 -3
- snowflake/ml/modeling/decomposition/kernel_pca.py +5 -3
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +5 -3
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +5 -3
- snowflake/ml/modeling/decomposition/pca.py +5 -3
- snowflake/ml/modeling/decomposition/sparse_pca.py +5 -3
- snowflake/ml/modeling/decomposition/truncated_svd.py +5 -3
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +1 -1
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +1 -1
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +1 -1
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +1 -1
- snowflake/ml/modeling/ensemble/bagging_classifier.py +1 -1
- snowflake/ml/modeling/ensemble/bagging_regressor.py +1 -1
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +1 -1
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +1 -1
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +1 -1
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +1 -1
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +1 -1
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +1 -1
- snowflake/ml/modeling/ensemble/isolation_forest.py +5 -3
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +1 -1
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +1 -1
- snowflake/ml/modeling/ensemble/stacking_regressor.py +1 -1
- snowflake/ml/modeling/ensemble/voting_classifier.py +1 -1
- snowflake/ml/modeling/ensemble/voting_regressor.py +1 -1
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +1 -1
- snowflake/ml/modeling/feature_selection/select_fdr.py +1 -1
- snowflake/ml/modeling/feature_selection/select_fpr.py +1 -1
- snowflake/ml/modeling/feature_selection/select_fwe.py +1 -1
- snowflake/ml/modeling/feature_selection/select_k_best.py +1 -1
- snowflake/ml/modeling/feature_selection/select_percentile.py +1 -1
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +5 -3
- snowflake/ml/modeling/feature_selection/variance_threshold.py +5 -3
- snowflake/ml/modeling/framework/base.py +3 -8
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +1 -1
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +1 -1
- snowflake/ml/modeling/impute/iterative_imputer.py +5 -3
- snowflake/ml/modeling/impute/knn_imputer.py +5 -3
- snowflake/ml/modeling/impute/missing_indicator.py +5 -3
- snowflake/ml/modeling/impute/simple_imputer.py +8 -4
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +5 -3
- snowflake/ml/modeling/kernel_approximation/nystroem.py +5 -3
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +5 -3
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +5 -3
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +5 -3
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +1 -1
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +1 -1
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +1 -1
- snowflake/ml/modeling/linear_model/ard_regression.py +1 -1
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +1 -1
- snowflake/ml/modeling/linear_model/elastic_net.py +1 -1
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +1 -1
- snowflake/ml/modeling/linear_model/gamma_regressor.py +1 -1
- snowflake/ml/modeling/linear_model/huber_regressor.py +1 -1
- snowflake/ml/modeling/linear_model/lars.py +1 -1
- snowflake/ml/modeling/linear_model/lars_cv.py +1 -1
- snowflake/ml/modeling/linear_model/lasso.py +1 -1
- snowflake/ml/modeling/linear_model/lasso_cv.py +1 -1
- snowflake/ml/modeling/linear_model/lasso_lars.py +1 -1
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +1 -1
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +1 -1
- snowflake/ml/modeling/linear_model/linear_regression.py +1 -1
- snowflake/ml/modeling/linear_model/logistic_regression.py +1 -1
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +1 -1
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +1 -1
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +1 -1
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +1 -1
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +1 -1
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +1 -1
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +1 -1
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +1 -1
- snowflake/ml/modeling/linear_model/perceptron.py +1 -1
- snowflake/ml/modeling/linear_model/poisson_regressor.py +1 -1
- snowflake/ml/modeling/linear_model/ransac_regressor.py +1 -1
- snowflake/ml/modeling/linear_model/ridge.py +1 -1
- snowflake/ml/modeling/linear_model/ridge_classifier.py +1 -1
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +1 -1
- snowflake/ml/modeling/linear_model/ridge_cv.py +1 -1
- snowflake/ml/modeling/linear_model/sgd_classifier.py +1 -1
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +5 -3
- snowflake/ml/modeling/linear_model/sgd_regressor.py +1 -1
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +1 -1
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +1 -1
- snowflake/ml/modeling/manifold/isomap.py +5 -3
- snowflake/ml/modeling/manifold/mds.py +5 -3
- snowflake/ml/modeling/manifold/spectral_embedding.py +5 -3
- snowflake/ml/modeling/manifold/tsne.py +5 -3
- snowflake/ml/modeling/metrics/ranking.py +3 -0
- snowflake/ml/modeling/metrics/regression.py +3 -0
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +5 -3
- snowflake/ml/modeling/mixture/gaussian_mixture.py +5 -3
- snowflake/ml/modeling/model_selection/grid_search_cv.py +1 -5
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +1 -5
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +1 -1
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +1 -1
- snowflake/ml/modeling/multiclass/output_code_classifier.py +1 -1
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +1 -1
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +1 -1
- snowflake/ml/modeling/naive_bayes/complement_nb.py +1 -1
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +1 -1
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +1 -1
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +1 -1
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +1 -1
- snowflake/ml/modeling/neighbors/kernel_density.py +5 -3
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +5 -3
- snowflake/ml/modeling/neighbors/nearest_centroid.py +1 -1
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +5 -3
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +1 -1
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +1 -1
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +1 -1
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +5 -3
- snowflake/ml/modeling/neural_network/mlp_classifier.py +1 -1
- snowflake/ml/modeling/neural_network/mlp_regressor.py +1 -1
- snowflake/ml/modeling/pipeline/pipeline.py +6 -0
- snowflake/ml/modeling/preprocessing/binarizer.py +7 -3
- snowflake/ml/modeling/preprocessing/k_bins_discretizer.py +7 -2
- snowflake/ml/modeling/preprocessing/label_encoder.py +8 -7
- snowflake/ml/modeling/preprocessing/max_abs_scaler.py +7 -3
- snowflake/ml/modeling/preprocessing/min_max_scaler.py +7 -4
- snowflake/ml/modeling/preprocessing/normalizer.py +7 -3
- snowflake/ml/modeling/preprocessing/one_hot_encoder.py +53 -11
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +44 -13
- snowflake/ml/modeling/preprocessing/polynomial_features.py +5 -3
- snowflake/ml/modeling/preprocessing/robust_scaler.py +7 -4
- snowflake/ml/modeling/preprocessing/standard_scaler.py +7 -3
- snowflake/ml/modeling/semi_supervised/label_propagation.py +1 -1
- snowflake/ml/modeling/semi_supervised/label_spreading.py +1 -1
- snowflake/ml/modeling/svm/linear_svc.py +1 -1
- snowflake/ml/modeling/svm/linear_svr.py +1 -1
- snowflake/ml/modeling/svm/nu_svc.py +1 -1
- snowflake/ml/modeling/svm/nu_svr.py +1 -1
- snowflake/ml/modeling/svm/svc.py +1 -1
- snowflake/ml/modeling/svm/svr.py +1 -1
- snowflake/ml/modeling/tree/decision_tree_classifier.py +1 -1
- snowflake/ml/modeling/tree/decision_tree_regressor.py +1 -1
- snowflake/ml/modeling/tree/extra_tree_classifier.py +1 -1
- snowflake/ml/modeling/tree/extra_tree_regressor.py +1 -1
- snowflake/ml/modeling/xgboost/xgb_classifier.py +1 -1
- snowflake/ml/modeling/xgboost/xgb_regressor.py +1 -1
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +1 -1
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +1 -1
- snowflake/ml/registry/_manager/model_manager.py +16 -3
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.5.2.dist-info → snowflake_ml_python-1.5.4.dist-info}/METADATA +51 -7
- snowflake_ml_python-1.5.4.dist-info/RECORD +389 -0
- {snowflake_ml_python-1.5.2.dist-info → snowflake_ml_python-1.5.4.dist-info}/WHEEL +1 -1
- snowflake_ml_python-1.5.2.dist-info/RECORD +0 -384
- {snowflake_ml_python-1.5.2.dist-info → snowflake_ml_python-1.5.4.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.5.2.dist-info → snowflake_ml_python-1.5.4.dist-info}/top_level.txt +0 -0
@@ -383,7 +383,7 @@ class SGDRegressor(BaseTransformer):
|
|
383
383
|
inspect.currentframe(), SGDRegressor.__class__.__name__
|
384
384
|
),
|
385
385
|
api_calls=[Session.call],
|
386
|
-
custom_tags=
|
386
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
387
387
|
)
|
388
388
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
389
389
|
pd_df.columns = dataset.columns
|
@@ -285,7 +285,7 @@ class TheilSenRegressor(BaseTransformer):
|
|
285
285
|
inspect.currentframe(), TheilSenRegressor.__class__.__name__
|
286
286
|
),
|
287
287
|
api_calls=[Session.call],
|
288
|
-
custom_tags=
|
288
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
289
289
|
)
|
290
290
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
291
291
|
pd_df.columns = dataset.columns
|
@@ -311,7 +311,7 @@ class TweedieRegressor(BaseTransformer):
|
|
311
311
|
inspect.currentframe(), TweedieRegressor.__class__.__name__
|
312
312
|
),
|
313
313
|
api_calls=[Session.call],
|
314
|
-
custom_tags=
|
314
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
315
315
|
)
|
316
316
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
317
317
|
pd_df.columns = dataset.columns
|
@@ -76,8 +76,10 @@ class Isomap(BaseTransformer):
|
|
76
76
|
initialization with the `set_input_cols` method.
|
77
77
|
|
78
78
|
label_cols: Optional[Union[str, List[str]]]
|
79
|
-
|
80
|
-
|
79
|
+
A string or list of strings representing column names that contain labels.
|
80
|
+
Label columns must be specified with this parameter during initialization
|
81
|
+
or with the `set_label_cols` method before fitting.
|
82
|
+
|
81
83
|
output_cols: Optional[Union[str, List[str]]]
|
82
84
|
A string or list of strings representing column names that will store the
|
83
85
|
output of predict and transform operations. The length of output_cols must
|
@@ -307,7 +309,7 @@ class Isomap(BaseTransformer):
|
|
307
309
|
inspect.currentframe(), Isomap.__class__.__name__
|
308
310
|
),
|
309
311
|
api_calls=[Session.call],
|
310
|
-
custom_tags=
|
312
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
311
313
|
)
|
312
314
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
313
315
|
pd_df.columns = dataset.columns
|
@@ -76,8 +76,10 @@ class MDS(BaseTransformer):
|
|
76
76
|
initialization with the `set_input_cols` method.
|
77
77
|
|
78
78
|
label_cols: Optional[Union[str, List[str]]]
|
79
|
-
|
80
|
-
|
79
|
+
A string or list of strings representing column names that contain labels.
|
80
|
+
Label columns must be specified with this parameter during initialization
|
81
|
+
or with the `set_label_cols` method before fitting.
|
82
|
+
|
81
83
|
output_cols: Optional[Union[str, List[str]]]
|
82
84
|
A string or list of strings representing column names that will store the
|
83
85
|
output of predict and transform operations. The length of output_cols must
|
@@ -290,7 +292,7 @@ class MDS(BaseTransformer):
|
|
290
292
|
inspect.currentframe(), MDS.__class__.__name__
|
291
293
|
),
|
292
294
|
api_calls=[Session.call],
|
293
|
-
custom_tags=
|
295
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
294
296
|
)
|
295
297
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
296
298
|
pd_df.columns = dataset.columns
|
@@ -76,8 +76,10 @@ class SpectralEmbedding(BaseTransformer):
|
|
76
76
|
initialization with the `set_input_cols` method.
|
77
77
|
|
78
78
|
label_cols: Optional[Union[str, List[str]]]
|
79
|
-
|
80
|
-
|
79
|
+
A string or list of strings representing column names that contain labels.
|
80
|
+
Label columns must be specified with this parameter during initialization
|
81
|
+
or with the `set_label_cols` method before fitting.
|
82
|
+
|
81
83
|
output_cols: Optional[Union[str, List[str]]]
|
82
84
|
A string or list of strings representing column names that will store the
|
83
85
|
output of predict and transform operations. The length of output_cols must
|
@@ -292,7 +294,7 @@ class SpectralEmbedding(BaseTransformer):
|
|
292
294
|
inspect.currentframe(), SpectralEmbedding.__class__.__name__
|
293
295
|
),
|
294
296
|
api_calls=[Session.call],
|
295
|
-
custom_tags=
|
297
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
296
298
|
)
|
297
299
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
298
300
|
pd_df.columns = dataset.columns
|
@@ -76,8 +76,10 @@ class TSNE(BaseTransformer):
|
|
76
76
|
initialization with the `set_input_cols` method.
|
77
77
|
|
78
78
|
label_cols: Optional[Union[str, List[str]]]
|
79
|
-
|
80
|
-
|
79
|
+
A string or list of strings representing column names that contain labels.
|
80
|
+
Label columns must be specified with this parameter during initialization
|
81
|
+
or with the `set_label_cols` method before fitting.
|
82
|
+
|
81
83
|
output_cols: Optional[Union[str, List[str]]]
|
82
84
|
A string or list of strings representing column names that will store the
|
83
85
|
output of predict and transform operations. The length of output_cols must
|
@@ -351,7 +353,7 @@ class TSNE(BaseTransformer):
|
|
351
353
|
inspect.currentframe(), TSNE.__class__.__name__
|
352
354
|
),
|
353
355
|
api_calls=[Session.call],
|
354
|
-
custom_tags=
|
356
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
355
357
|
)
|
356
358
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
357
359
|
pd_df.columns = dataset.columns
|
@@ -102,6 +102,7 @@ def precision_recall_curve(
|
|
102
102
|
],
|
103
103
|
statement_params=statement_params,
|
104
104
|
anonymous=True,
|
105
|
+
execute_as="caller",
|
105
106
|
)
|
106
107
|
def precision_recall_curve_anon_sproc(session: snowpark.Session) -> bytes:
|
107
108
|
for query in queries[:-1]:
|
@@ -249,6 +250,7 @@ def roc_auc_score(
|
|
249
250
|
],
|
250
251
|
statement_params=statement_params,
|
251
252
|
anonymous=True,
|
253
|
+
execute_as="caller",
|
252
254
|
)
|
253
255
|
def roc_auc_score_anon_sproc(session: snowpark.Session) -> bytes:
|
254
256
|
for query in queries[:-1]:
|
@@ -352,6 +354,7 @@ def roc_curve(
|
|
352
354
|
],
|
353
355
|
statement_params=statement_params,
|
354
356
|
anonymous=True,
|
357
|
+
execute_as="caller",
|
355
358
|
)
|
356
359
|
def roc_curve_anon_sproc(session: snowpark.Session) -> bytes:
|
357
360
|
for query in queries[:-1]:
|
@@ -87,6 +87,7 @@ def d2_absolute_error_score(
|
|
87
87
|
],
|
88
88
|
statement_params=statement_params,
|
89
89
|
anonymous=True,
|
90
|
+
execute_as="caller",
|
90
91
|
)
|
91
92
|
def d2_absolute_error_score_anon_sproc(session: snowpark.Session) -> bytes:
|
92
93
|
for query in queries[:-1]:
|
@@ -184,6 +185,7 @@ def d2_pinball_score(
|
|
184
185
|
],
|
185
186
|
statement_params=statement_params,
|
186
187
|
anonymous=True,
|
188
|
+
execute_as="caller",
|
187
189
|
)
|
188
190
|
def d2_pinball_score_anon_sproc(session: snowpark.Session) -> bytes:
|
189
191
|
for query in queries[:-1]:
|
@@ -299,6 +301,7 @@ def explained_variance_score(
|
|
299
301
|
],
|
300
302
|
statement_params=statement_params,
|
301
303
|
anonymous=True,
|
304
|
+
execute_as="caller",
|
302
305
|
)
|
303
306
|
def explained_variance_score_anon_sproc(session: snowpark.Session) -> bytes:
|
304
307
|
for query in queries[:-1]:
|
@@ -76,8 +76,10 @@ class BayesianGaussianMixture(BaseTransformer):
|
|
76
76
|
initialization with the `set_input_cols` method.
|
77
77
|
|
78
78
|
label_cols: Optional[Union[str, List[str]]]
|
79
|
-
|
80
|
-
|
79
|
+
A string or list of strings representing column names that contain labels.
|
80
|
+
Label columns must be specified with this parameter during initialization
|
81
|
+
or with the `set_label_cols` method before fitting.
|
82
|
+
|
81
83
|
output_cols: Optional[Union[str, List[str]]]
|
82
84
|
A string or list of strings representing column names that will store the
|
83
85
|
output of predict and transform operations. The length of output_cols must
|
@@ -354,7 +356,7 @@ class BayesianGaussianMixture(BaseTransformer):
|
|
354
356
|
inspect.currentframe(), BayesianGaussianMixture.__class__.__name__
|
355
357
|
),
|
356
358
|
api_calls=[Session.call],
|
357
|
-
custom_tags=
|
359
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
358
360
|
)
|
359
361
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
360
362
|
pd_df.columns = dataset.columns
|
@@ -76,8 +76,10 @@ class GaussianMixture(BaseTransformer):
|
|
76
76
|
initialization with the `set_input_cols` method.
|
77
77
|
|
78
78
|
label_cols: Optional[Union[str, List[str]]]
|
79
|
-
|
80
|
-
|
79
|
+
A string or list of strings representing column names that contain labels.
|
80
|
+
Label columns must be specified with this parameter during initialization
|
81
|
+
or with the `set_label_cols` method before fitting.
|
82
|
+
|
81
83
|
output_cols: Optional[Union[str, List[str]]]
|
82
84
|
A string or list of strings representing column names that will store the
|
83
85
|
output of predict and transform operations. The length of output_cols must
|
@@ -327,7 +329,7 @@ class GaussianMixture(BaseTransformer):
|
|
327
329
|
inspect.currentframe(), GaussianMixture.__class__.__name__
|
328
330
|
),
|
329
331
|
api_calls=[Session.call],
|
330
|
-
custom_tags=
|
332
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
331
333
|
)
|
332
334
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
333
335
|
pd_df.columns = dataset.columns
|
@@ -285,11 +285,7 @@ class GridSearchCV(BaseTransformer):
|
|
285
285
|
)
|
286
286
|
return selected_cols
|
287
287
|
|
288
|
-
|
289
|
-
project=_PROJECT,
|
290
|
-
subproject=_SUBPROJECT,
|
291
|
-
)
|
292
|
-
def fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "GridSearchCV":
|
288
|
+
def _fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "GridSearchCV":
|
293
289
|
"""Run fit with all sets of parameters
|
294
290
|
For more details on this function, see [sklearn.model_selection.GridSearchCV.fit]
|
295
291
|
(https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.GridSearchCV.html#sklearn.model_selection.GridSearchCV.fit)
|
@@ -298,11 +298,7 @@ class RandomizedSearchCV(BaseTransformer):
|
|
298
298
|
)
|
299
299
|
return selected_cols
|
300
300
|
|
301
|
-
|
302
|
-
project=_PROJECT,
|
303
|
-
subproject=_SUBPROJECT,
|
304
|
-
)
|
305
|
-
def fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "RandomizedSearchCV":
|
301
|
+
def _fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "RandomizedSearchCV":
|
306
302
|
"""Run fit with all sets of parameters
|
307
303
|
For more details on this function, see [sklearn.model_selection.RandomizedSearchCV.fit]
|
308
304
|
(https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.RandomizedSearchCV.html#sklearn.model_selection.RandomizedSearchCV.fit)
|
@@ -239,7 +239,7 @@ class OneVsOneClassifier(BaseTransformer):
|
|
239
239
|
inspect.currentframe(), OneVsOneClassifier.__class__.__name__
|
240
240
|
),
|
241
241
|
api_calls=[Session.call],
|
242
|
-
custom_tags=
|
242
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
243
243
|
)
|
244
244
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
245
245
|
pd_df.columns = dataset.columns
|
@@ -248,7 +248,7 @@ class OneVsRestClassifier(BaseTransformer):
|
|
248
248
|
inspect.currentframe(), OneVsRestClassifier.__class__.__name__
|
249
249
|
),
|
250
250
|
api_calls=[Session.call],
|
251
|
-
custom_tags=
|
251
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
252
252
|
)
|
253
253
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
254
254
|
pd_df.columns = dataset.columns
|
@@ -251,7 +251,7 @@ class OutputCodeClassifier(BaseTransformer):
|
|
251
251
|
inspect.currentframe(), OutputCodeClassifier.__class__.__name__
|
252
252
|
),
|
253
253
|
api_calls=[Session.call],
|
254
|
-
custom_tags=
|
254
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
255
255
|
)
|
256
256
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
257
257
|
pd_df.columns = dataset.columns
|
@@ -251,7 +251,7 @@ class BernoulliNB(BaseTransformer):
|
|
251
251
|
inspect.currentframe(), BernoulliNB.__class__.__name__
|
252
252
|
),
|
253
253
|
api_calls=[Session.call],
|
254
|
-
custom_tags=
|
254
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
255
255
|
)
|
256
256
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
257
257
|
pd_df.columns = dataset.columns
|
@@ -257,7 +257,7 @@ class CategoricalNB(BaseTransformer):
|
|
257
257
|
inspect.currentframe(), CategoricalNB.__class__.__name__
|
258
258
|
),
|
259
259
|
api_calls=[Session.call],
|
260
|
-
custom_tags=
|
260
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
261
261
|
)
|
262
262
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
263
263
|
pd_df.columns = dataset.columns
|
@@ -251,7 +251,7 @@ class ComplementNB(BaseTransformer):
|
|
251
251
|
inspect.currentframe(), ComplementNB.__class__.__name__
|
252
252
|
),
|
253
253
|
api_calls=[Session.call],
|
254
|
-
custom_tags=
|
254
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
255
255
|
)
|
256
256
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
257
257
|
pd_df.columns = dataset.columns
|
@@ -232,7 +232,7 @@ class GaussianNB(BaseTransformer):
|
|
232
232
|
inspect.currentframe(), GaussianNB.__class__.__name__
|
233
233
|
),
|
234
234
|
api_calls=[Session.call],
|
235
|
-
custom_tags=
|
235
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
236
236
|
)
|
237
237
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
238
238
|
pd_df.columns = dataset.columns
|
@@ -245,7 +245,7 @@ class MultinomialNB(BaseTransformer):
|
|
245
245
|
inspect.currentframe(), MultinomialNB.__class__.__name__
|
246
246
|
),
|
247
247
|
api_calls=[Session.call],
|
248
|
-
custom_tags=
|
248
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
249
249
|
)
|
250
250
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
251
251
|
pd_df.columns = dataset.columns
|
@@ -302,7 +302,7 @@ class KNeighborsClassifier(BaseTransformer):
|
|
302
302
|
inspect.currentframe(), KNeighborsClassifier.__class__.__name__
|
303
303
|
),
|
304
304
|
api_calls=[Session.call],
|
305
|
-
custom_tags=
|
305
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
306
306
|
)
|
307
307
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
308
308
|
pd_df.columns = dataset.columns
|
@@ -304,7 +304,7 @@ class KNeighborsRegressor(BaseTransformer):
|
|
304
304
|
inspect.currentframe(), KNeighborsRegressor.__class__.__name__
|
305
305
|
),
|
306
306
|
api_calls=[Session.call],
|
307
|
-
custom_tags=
|
307
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
308
308
|
)
|
309
309
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
310
310
|
pd_df.columns = dataset.columns
|
@@ -76,8 +76,10 @@ class KernelDensity(BaseTransformer):
|
|
76
76
|
initialization with the `set_input_cols` method.
|
77
77
|
|
78
78
|
label_cols: Optional[Union[str, List[str]]]
|
79
|
-
|
80
|
-
|
79
|
+
A string or list of strings representing column names that contain labels.
|
80
|
+
Label columns must be specified with this parameter during initialization
|
81
|
+
or with the `set_label_cols` method before fitting.
|
82
|
+
|
81
83
|
output_cols: Optional[Union[str, List[str]]]
|
82
84
|
A string or list of strings representing column names that will store the
|
83
85
|
output of predict and transform operations. The length of output_cols must
|
@@ -281,7 +283,7 @@ class KernelDensity(BaseTransformer):
|
|
281
283
|
inspect.currentframe(), KernelDensity.__class__.__name__
|
282
284
|
),
|
283
285
|
api_calls=[Session.call],
|
284
|
-
custom_tags=
|
286
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
285
287
|
)
|
286
288
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
287
289
|
pd_df.columns = dataset.columns
|
@@ -76,8 +76,10 @@ class LocalOutlierFactor(BaseTransformer):
|
|
76
76
|
initialization with the `set_input_cols` method.
|
77
77
|
|
78
78
|
label_cols: Optional[Union[str, List[str]]]
|
79
|
-
|
80
|
-
|
79
|
+
A string or list of strings representing column names that contain labels.
|
80
|
+
Label columns must be specified with this parameter during initialization
|
81
|
+
or with the `set_label_cols` method before fitting.
|
82
|
+
|
81
83
|
output_cols: Optional[Union[str, List[str]]]
|
82
84
|
A string or list of strings representing column names that will store the
|
83
85
|
output of predict and transform operations. The length of output_cols must
|
@@ -309,7 +311,7 @@ class LocalOutlierFactor(BaseTransformer):
|
|
309
311
|
inspect.currentframe(), LocalOutlierFactor.__class__.__name__
|
310
312
|
),
|
311
313
|
api_calls=[Session.call],
|
312
|
-
custom_tags=
|
314
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
313
315
|
)
|
314
316
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
315
317
|
pd_df.columns = dataset.columns
|
@@ -242,7 +242,7 @@ class NearestCentroid(BaseTransformer):
|
|
242
242
|
inspect.currentframe(), NearestCentroid.__class__.__name__
|
243
243
|
),
|
244
244
|
api_calls=[Session.call],
|
245
|
-
custom_tags=
|
245
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
246
246
|
)
|
247
247
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
248
248
|
pd_df.columns = dataset.columns
|
@@ -76,8 +76,10 @@ class NearestNeighbors(BaseTransformer):
|
|
76
76
|
initialization with the `set_input_cols` method.
|
77
77
|
|
78
78
|
label_cols: Optional[Union[str, List[str]]]
|
79
|
-
|
80
|
-
|
79
|
+
A string or list of strings representing column names that contain labels.
|
80
|
+
Label columns must be specified with this parameter during initialization
|
81
|
+
or with the `set_label_cols` method before fitting.
|
82
|
+
|
81
83
|
output_cols: Optional[Union[str, List[str]]]
|
82
84
|
A string or list of strings representing column names that will store the
|
83
85
|
output of predict and transform operations. The length of output_cols must
|
@@ -292,7 +294,7 @@ class NearestNeighbors(BaseTransformer):
|
|
292
294
|
inspect.currentframe(), NearestNeighbors.__class__.__name__
|
293
295
|
),
|
294
296
|
api_calls=[Session.call],
|
295
|
-
custom_tags=
|
297
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
296
298
|
)
|
297
299
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
298
300
|
pd_df.columns = dataset.columns
|
@@ -313,7 +313,7 @@ class NeighborhoodComponentsAnalysis(BaseTransformer):
|
|
313
313
|
inspect.currentframe(), NeighborhoodComponentsAnalysis.__class__.__name__
|
314
314
|
),
|
315
315
|
api_calls=[Session.call],
|
316
|
-
custom_tags=
|
316
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
317
317
|
)
|
318
318
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
319
319
|
pd_df.columns = dataset.columns
|
@@ -314,7 +314,7 @@ class RadiusNeighborsClassifier(BaseTransformer):
|
|
314
314
|
inspect.currentframe(), RadiusNeighborsClassifier.__class__.__name__
|
315
315
|
),
|
316
316
|
api_calls=[Session.call],
|
317
|
-
custom_tags=
|
317
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
318
318
|
)
|
319
319
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
320
320
|
pd_df.columns = dataset.columns
|
@@ -304,7 +304,7 @@ class RadiusNeighborsRegressor(BaseTransformer):
|
|
304
304
|
inspect.currentframe(), RadiusNeighborsRegressor.__class__.__name__
|
305
305
|
),
|
306
306
|
api_calls=[Session.call],
|
307
|
-
custom_tags=
|
307
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
308
308
|
)
|
309
309
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
310
310
|
pd_df.columns = dataset.columns
|
@@ -76,8 +76,10 @@ class BernoulliRBM(BaseTransformer):
|
|
76
76
|
initialization with the `set_input_cols` method.
|
77
77
|
|
78
78
|
label_cols: Optional[Union[str, List[str]]]
|
79
|
-
|
80
|
-
|
79
|
+
A string or list of strings representing column names that contain labels.
|
80
|
+
Label columns must be specified with this parameter during initialization
|
81
|
+
or with the `set_label_cols` method before fitting.
|
82
|
+
|
81
83
|
output_cols: Optional[Union[str, List[str]]]
|
82
84
|
A string or list of strings representing column names that will store the
|
83
85
|
output of predict and transform operations. The length of output_cols must
|
@@ -261,7 +263,7 @@ class BernoulliRBM(BaseTransformer):
|
|
261
263
|
inspect.currentframe(), BernoulliRBM.__class__.__name__
|
262
264
|
),
|
263
265
|
api_calls=[Session.call],
|
264
|
-
custom_tags=
|
266
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
265
267
|
)
|
266
268
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
267
269
|
pd_df.columns = dataset.columns
|
@@ -416,7 +416,7 @@ class MLPClassifier(BaseTransformer):
|
|
416
416
|
inspect.currentframe(), MLPClassifier.__class__.__name__
|
417
417
|
),
|
418
418
|
api_calls=[Session.call],
|
419
|
-
custom_tags=
|
419
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
420
420
|
)
|
421
421
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
422
422
|
pd_df.columns = dataset.columns
|
@@ -412,7 +412,7 @@ class MLPRegressor(BaseTransformer):
|
|
412
412
|
inspect.currentframe(), MLPRegressor.__class__.__name__
|
413
413
|
),
|
414
414
|
api_calls=[Session.call],
|
415
|
-
custom_tags=
|
415
|
+
custom_tags={"autogen": True} if self._autogenerated else None,
|
416
416
|
)
|
417
417
|
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
418
418
|
pd_df.columns = dataset.columns
|
@@ -17,6 +17,7 @@ from sklearn.utils import metaestimators
|
|
17
17
|
from snowflake import snowpark
|
18
18
|
from snowflake.ml._internal import file_utils, telemetry
|
19
19
|
from snowflake.ml._internal.exceptions import error_codes, exceptions
|
20
|
+
from snowflake.ml._internal.lineage import lineage_utils
|
20
21
|
from snowflake.ml._internal.utils import snowpark_dataframe_utils, temp_file_utils
|
21
22
|
from snowflake.ml.model.model_signature import ModelSignature, _infer_signature
|
22
23
|
from snowflake.ml.modeling._internal.model_transformer_builder import (
|
@@ -377,6 +378,7 @@ class Pipeline(base.BaseTransformer):
|
|
377
378
|
anonymous=True,
|
378
379
|
imports=imports, # type: ignore[arg-type]
|
379
380
|
statement_params=sproc_statement_params,
|
381
|
+
execute_as="caller",
|
380
382
|
)
|
381
383
|
|
382
384
|
sproc_export_file_name: str = pipeline_within_one_sproc(
|
@@ -427,6 +429,10 @@ class Pipeline(base.BaseTransformer):
|
|
427
429
|
else dataset
|
428
430
|
)
|
429
431
|
|
432
|
+
# Extract lineage information here since we're overriding fit() directly
|
433
|
+
data_sources = lineage_utils.get_data_sources(dataset)
|
434
|
+
lineage_utils.set_data_sources(self, data_sources)
|
435
|
+
|
430
436
|
if self._can_be_trained_in_ml_runtime(dataset):
|
431
437
|
if not self._is_convertible_to_sklearn:
|
432
438
|
raise ValueError("This pipeline cannot be converted to an sklearn pipeline.")
|
@@ -25,11 +25,15 @@ class Binarizer(base.BaseTransformer):
|
|
25
25
|
Feature values below or equal to this are replaced by 0, above it by 1. Default values is 0.0.
|
26
26
|
|
27
27
|
input_cols: Optional[Union[str, Iterable[str]]], default=None
|
28
|
-
The name(s) of one or more columns in
|
28
|
+
The name(s) of one or more columns in the input DataFrame containing feature(s) to be binarized. Input
|
29
|
+
columns must be specified before transform with this argument or after initialization with the
|
30
|
+
`set_input_cols` method. This argument is optional for API consistency.
|
29
31
|
|
30
32
|
output_cols: Optional[Union[str, Iterable[str]]], default=None
|
31
|
-
The name(s)
|
32
|
-
columns specified must
|
33
|
+
The name(s) to assign output columns in the output DataFrame. The number of
|
34
|
+
columns specified must equal the number of input columns. Output columns must be specified before transform
|
35
|
+
with this argument or after initialization with the `set_output_cols` method. This argument is optional for
|
36
|
+
API consistency.
|
33
37
|
|
34
38
|
passthrough_cols: Optional[Union[str, Iterable[str]]], default=None
|
35
39
|
A string or a list of strings indicating column names to be excluded from any
|
@@ -74,10 +74,15 @@ class KBinsDiscretizer(base.BaseTransformer):
|
|
74
74
|
- 'quantile': All bins in each feature have the same number of points.
|
75
75
|
|
76
76
|
input_cols: str or Iterable [column_name], default=None
|
77
|
-
|
77
|
+
The name(s) of one or more columns in the input DataFrame containing feature(s) to be discretized.
|
78
|
+
Input columns must be specified before fit with this argument or after initialization with the
|
79
|
+
`set_input_cols` method. This argument is optional for API consistency.
|
78
80
|
|
79
81
|
output_cols: str or Iterable [column_name], default=None
|
80
|
-
|
82
|
+
The name(s) to assign output columns in the output DataFrame. The number of
|
83
|
+
columns specified must equal the number of input columns. Output columns must be specified before transform
|
84
|
+
with this argument or after initialization with the `set_output_cols` method. This argument is optional for
|
85
|
+
API consistency.
|
81
86
|
|
82
87
|
passthrough_cols: A string or a list of strings indicating column names to be excluded from any
|
83
88
|
operations (such as train, transform, or inference). These specified column(s)
|
@@ -25,11 +25,12 @@ class LabelEncoder(base.BaseTransformer):
|
|
25
25
|
|
26
26
|
Args:
|
27
27
|
input_cols: Optional[Union[str, List[str]]]
|
28
|
-
The name of a column
|
28
|
+
The name of a column or a list containing one column name to be encoded in the input DataFrame. There must
|
29
|
+
be exactly one input column specified before fit. This argument is optional for API consistency.
|
29
30
|
|
30
31
|
output_cols: Optional[Union[str, List[str]]]
|
31
|
-
The name of a column
|
32
|
-
|
32
|
+
The name of a column or a list containing one column name where the results will be stored. There must be
|
33
|
+
exactly one output column specified before trainsform. This argument is optional for API consistency.
|
33
34
|
|
34
35
|
passthrough_cols: Optional[Union[str, List[str]]]
|
35
36
|
A string or a list of strings indicating column names to be excluded from any
|
@@ -54,11 +55,11 @@ class LabelEncoder(base.BaseTransformer):
|
|
54
55
|
|
55
56
|
Args:
|
56
57
|
input_cols: Optional[Union[str, List[str]]]
|
57
|
-
The name of a column
|
58
|
-
|
58
|
+
The name of a column or a list containing one column name to be encoded in the input DataFrame. There
|
59
|
+
must be exactly one input column specified before fit. This argument is optional for API consistency.
|
59
60
|
output_cols: Optional[Union[str, List[str]]]
|
60
|
-
The name of a column
|
61
|
-
|
61
|
+
The name of a column or a list containing one column name where the results will be stored. There must
|
62
|
+
be exactly one output column specified before transform. This argument is optional for API consistency.
|
62
63
|
passthrough_cols: Optional[Union[str, List[str]]]
|
63
64
|
A string or a list of strings indicating column names to be excluded from any
|
64
65
|
operations (such as train, transform, or inference). These specified column(s)
|