snowflake-ml-python 1.5.2__py3-none-any.whl → 1.5.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (250) hide show
  1. snowflake/cortex/__init__.py +2 -1
  2. snowflake/cortex/_complete.py +240 -16
  3. snowflake/cortex/_extract_answer.py +0 -1
  4. snowflake/cortex/_sentiment.py +0 -1
  5. snowflake/cortex/_sse_client.py +81 -0
  6. snowflake/cortex/_summarize.py +0 -1
  7. snowflake/cortex/_translate.py +0 -1
  8. snowflake/cortex/_util.py +34 -10
  9. snowflake/ml/_internal/container_services/image_registry/http_client.py +10 -3
  10. snowflake/ml/_internal/container_services/image_registry/imagelib.py +23 -10
  11. snowflake/ml/_internal/container_services/image_registry/registry_client.py +7 -1
  12. snowflake/ml/_internal/exceptions/dataset_errors.py +7 -7
  13. snowflake/ml/_internal/exceptions/fileset_errors.py +3 -3
  14. snowflake/ml/_internal/exceptions/sql_error_codes.py +6 -0
  15. snowflake/ml/_internal/lineage/lineage_utils.py +34 -25
  16. snowflake/ml/_internal/telemetry.py +26 -0
  17. snowflake/ml/_internal/utils/identifier.py +14 -0
  18. snowflake/ml/_internal/utils/snowpark_dataframe_utils.py +15 -4
  19. snowflake/ml/dataset/dataset.py +54 -32
  20. snowflake/ml/dataset/dataset_factory.py +3 -4
  21. snowflake/ml/feature_store/feature_store.py +440 -243
  22. snowflake/ml/feature_store/feature_view.py +61 -9
  23. snowflake/ml/fileset/embedded_stage_fs.py +25 -21
  24. snowflake/ml/fileset/fileset.py +2 -2
  25. snowflake/ml/fileset/snowfs.py +4 -15
  26. snowflake/ml/fileset/stage_fs.py +6 -8
  27. snowflake/ml/lineage/__init__.py +3 -0
  28. snowflake/ml/lineage/lineage_node.py +139 -0
  29. snowflake/ml/model/_client/model/model_impl.py +47 -14
  30. snowflake/ml/model/_client/model/model_version_impl.py +82 -2
  31. snowflake/ml/model/_client/ops/model_ops.py +77 -5
  32. snowflake/ml/model/_client/sql/model.py +1 -0
  33. snowflake/ml/model/_client/sql/model_version.py +47 -4
  34. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +2 -3
  35. snowflake/ml/model/_model_composer/model_composer.py +7 -6
  36. snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +7 -1
  37. snowflake/ml/model/_model_composer/model_method/function_generator.py +17 -1
  38. snowflake/ml/model/_model_composer/model_method/infer_partitioned.py_template +79 -0
  39. snowflake/ml/model/_model_composer/model_method/infer_table_function.py_template +5 -3
  40. snowflake/ml/model/_model_composer/model_method/model_method.py +5 -5
  41. snowflake/ml/model/_packager/model_handlers/_base.py +2 -2
  42. snowflake/ml/model/_packager/model_handlers/_utils.py +1 -0
  43. snowflake/ml/model/_packager/model_handlers/catboost.py +2 -2
  44. snowflake/ml/model/_packager/model_handlers/custom.py +12 -4
  45. snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +18 -15
  46. snowflake/ml/model/_packager/model_handlers/lightgbm.py +2 -2
  47. snowflake/ml/model/_packager/model_handlers/llm.py +2 -2
  48. snowflake/ml/model/_packager/model_handlers/mlflow.py +2 -2
  49. snowflake/ml/model/_packager/model_handlers/pytorch.py +2 -2
  50. snowflake/ml/model/_packager/model_handlers/sentence_transformers.py +2 -2
  51. snowflake/ml/model/_packager/model_handlers/sklearn.py +2 -2
  52. snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +2 -2
  53. snowflake/ml/model/_packager/model_handlers/tensorflow.py +2 -2
  54. snowflake/ml/model/_packager/model_handlers/torchscript.py +2 -2
  55. snowflake/ml/model/_packager/model_handlers/xgboost.py +2 -2
  56. snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
  57. snowflake/ml/model/_packager/model_meta/model_blob_meta.py +2 -0
  58. snowflake/ml/model/_packager/model_meta/model_meta.py +21 -1
  59. snowflake/ml/model/_packager/model_meta/model_meta_schema.py +6 -1
  60. snowflake/ml/model/_packager/model_packager.py +9 -4
  61. snowflake/ml/model/_packager/model_runtime/_snowml_inference_alternative_requirements.py +1 -1
  62. snowflake/ml/model/_signatures/builtins_handler.py +2 -1
  63. snowflake/ml/model/_signatures/core.py +13 -1
  64. snowflake/ml/model/_signatures/pandas_handler.py +2 -0
  65. snowflake/ml/model/_signatures/snowpark_handler.py +3 -3
  66. snowflake/ml/model/custom_model.py +22 -2
  67. snowflake/ml/model/model_signature.py +2 -0
  68. snowflake/ml/model/type_hints.py +74 -4
  69. snowflake/ml/modeling/_internal/estimator_utils.py +58 -1
  70. snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +158 -121
  71. snowflake/ml/modeling/_internal/snowpark_implementations/distributed_search_udf_file.py +2 -0
  72. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +39 -18
  73. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +88 -134
  74. snowflake/ml/modeling/_internal/snowpark_implementations/xgboost_external_memory_trainer.py +22 -17
  75. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +1 -1
  76. snowflake/ml/modeling/cluster/affinity_propagation.py +5 -3
  77. snowflake/ml/modeling/cluster/agglomerative_clustering.py +5 -3
  78. snowflake/ml/modeling/cluster/birch.py +5 -3
  79. snowflake/ml/modeling/cluster/bisecting_k_means.py +5 -3
  80. snowflake/ml/modeling/cluster/dbscan.py +5 -3
  81. snowflake/ml/modeling/cluster/feature_agglomeration.py +5 -3
  82. snowflake/ml/modeling/cluster/k_means.py +5 -3
  83. snowflake/ml/modeling/cluster/mean_shift.py +5 -3
  84. snowflake/ml/modeling/cluster/mini_batch_k_means.py +5 -3
  85. snowflake/ml/modeling/cluster/optics.py +5 -3
  86. snowflake/ml/modeling/cluster/spectral_biclustering.py +5 -3
  87. snowflake/ml/modeling/cluster/spectral_clustering.py +5 -3
  88. snowflake/ml/modeling/cluster/spectral_coclustering.py +5 -3
  89. snowflake/ml/modeling/compose/column_transformer.py +5 -3
  90. snowflake/ml/modeling/compose/transformed_target_regressor.py +1 -1
  91. snowflake/ml/modeling/covariance/elliptic_envelope.py +5 -3
  92. snowflake/ml/modeling/covariance/empirical_covariance.py +5 -3
  93. snowflake/ml/modeling/covariance/graphical_lasso.py +5 -3
  94. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +5 -3
  95. snowflake/ml/modeling/covariance/ledoit_wolf.py +5 -3
  96. snowflake/ml/modeling/covariance/min_cov_det.py +5 -3
  97. snowflake/ml/modeling/covariance/oas.py +5 -3
  98. snowflake/ml/modeling/covariance/shrunk_covariance.py +5 -3
  99. snowflake/ml/modeling/decomposition/dictionary_learning.py +5 -3
  100. snowflake/ml/modeling/decomposition/factor_analysis.py +5 -3
  101. snowflake/ml/modeling/decomposition/fast_ica.py +5 -3
  102. snowflake/ml/modeling/decomposition/incremental_pca.py +5 -3
  103. snowflake/ml/modeling/decomposition/kernel_pca.py +5 -3
  104. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +5 -3
  105. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +5 -3
  106. snowflake/ml/modeling/decomposition/pca.py +5 -3
  107. snowflake/ml/modeling/decomposition/sparse_pca.py +5 -3
  108. snowflake/ml/modeling/decomposition/truncated_svd.py +5 -3
  109. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +1 -1
  110. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +1 -1
  111. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +1 -1
  112. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +1 -1
  113. snowflake/ml/modeling/ensemble/bagging_classifier.py +1 -1
  114. snowflake/ml/modeling/ensemble/bagging_regressor.py +1 -1
  115. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +1 -1
  116. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +1 -1
  117. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +1 -1
  118. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +1 -1
  119. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +1 -1
  120. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +1 -1
  121. snowflake/ml/modeling/ensemble/isolation_forest.py +5 -3
  122. snowflake/ml/modeling/ensemble/random_forest_classifier.py +1 -1
  123. snowflake/ml/modeling/ensemble/random_forest_regressor.py +1 -1
  124. snowflake/ml/modeling/ensemble/stacking_regressor.py +1 -1
  125. snowflake/ml/modeling/ensemble/voting_classifier.py +1 -1
  126. snowflake/ml/modeling/ensemble/voting_regressor.py +1 -1
  127. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +1 -1
  128. snowflake/ml/modeling/feature_selection/select_fdr.py +1 -1
  129. snowflake/ml/modeling/feature_selection/select_fpr.py +1 -1
  130. snowflake/ml/modeling/feature_selection/select_fwe.py +1 -1
  131. snowflake/ml/modeling/feature_selection/select_k_best.py +1 -1
  132. snowflake/ml/modeling/feature_selection/select_percentile.py +1 -1
  133. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +5 -3
  134. snowflake/ml/modeling/feature_selection/variance_threshold.py +5 -3
  135. snowflake/ml/modeling/framework/base.py +3 -8
  136. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +1 -1
  137. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +1 -1
  138. snowflake/ml/modeling/impute/iterative_imputer.py +5 -3
  139. snowflake/ml/modeling/impute/knn_imputer.py +5 -3
  140. snowflake/ml/modeling/impute/missing_indicator.py +5 -3
  141. snowflake/ml/modeling/impute/simple_imputer.py +8 -4
  142. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +5 -3
  143. snowflake/ml/modeling/kernel_approximation/nystroem.py +5 -3
  144. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +5 -3
  145. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +5 -3
  146. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +5 -3
  147. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +1 -1
  148. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +1 -1
  149. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +1 -1
  150. snowflake/ml/modeling/linear_model/ard_regression.py +1 -1
  151. snowflake/ml/modeling/linear_model/bayesian_ridge.py +1 -1
  152. snowflake/ml/modeling/linear_model/elastic_net.py +1 -1
  153. snowflake/ml/modeling/linear_model/elastic_net_cv.py +1 -1
  154. snowflake/ml/modeling/linear_model/gamma_regressor.py +1 -1
  155. snowflake/ml/modeling/linear_model/huber_regressor.py +1 -1
  156. snowflake/ml/modeling/linear_model/lars.py +1 -1
  157. snowflake/ml/modeling/linear_model/lars_cv.py +1 -1
  158. snowflake/ml/modeling/linear_model/lasso.py +1 -1
  159. snowflake/ml/modeling/linear_model/lasso_cv.py +1 -1
  160. snowflake/ml/modeling/linear_model/lasso_lars.py +1 -1
  161. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +1 -1
  162. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +1 -1
  163. snowflake/ml/modeling/linear_model/linear_regression.py +1 -1
  164. snowflake/ml/modeling/linear_model/logistic_regression.py +1 -1
  165. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +1 -1
  166. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +1 -1
  167. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +1 -1
  168. snowflake/ml/modeling/linear_model/multi_task_lasso.py +1 -1
  169. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +1 -1
  170. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +1 -1
  171. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +1 -1
  172. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +1 -1
  173. snowflake/ml/modeling/linear_model/perceptron.py +1 -1
  174. snowflake/ml/modeling/linear_model/poisson_regressor.py +1 -1
  175. snowflake/ml/modeling/linear_model/ransac_regressor.py +1 -1
  176. snowflake/ml/modeling/linear_model/ridge.py +1 -1
  177. snowflake/ml/modeling/linear_model/ridge_classifier.py +1 -1
  178. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +1 -1
  179. snowflake/ml/modeling/linear_model/ridge_cv.py +1 -1
  180. snowflake/ml/modeling/linear_model/sgd_classifier.py +1 -1
  181. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +5 -3
  182. snowflake/ml/modeling/linear_model/sgd_regressor.py +1 -1
  183. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +1 -1
  184. snowflake/ml/modeling/linear_model/tweedie_regressor.py +1 -1
  185. snowflake/ml/modeling/manifold/isomap.py +5 -3
  186. snowflake/ml/modeling/manifold/mds.py +5 -3
  187. snowflake/ml/modeling/manifold/spectral_embedding.py +5 -3
  188. snowflake/ml/modeling/manifold/tsne.py +5 -3
  189. snowflake/ml/modeling/metrics/ranking.py +3 -0
  190. snowflake/ml/modeling/metrics/regression.py +3 -0
  191. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +5 -3
  192. snowflake/ml/modeling/mixture/gaussian_mixture.py +5 -3
  193. snowflake/ml/modeling/model_selection/grid_search_cv.py +1 -5
  194. snowflake/ml/modeling/model_selection/randomized_search_cv.py +1 -5
  195. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +1 -1
  196. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +1 -1
  197. snowflake/ml/modeling/multiclass/output_code_classifier.py +1 -1
  198. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +1 -1
  199. snowflake/ml/modeling/naive_bayes/categorical_nb.py +1 -1
  200. snowflake/ml/modeling/naive_bayes/complement_nb.py +1 -1
  201. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +1 -1
  202. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +1 -1
  203. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +1 -1
  204. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +1 -1
  205. snowflake/ml/modeling/neighbors/kernel_density.py +5 -3
  206. snowflake/ml/modeling/neighbors/local_outlier_factor.py +5 -3
  207. snowflake/ml/modeling/neighbors/nearest_centroid.py +1 -1
  208. snowflake/ml/modeling/neighbors/nearest_neighbors.py +5 -3
  209. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +1 -1
  210. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +1 -1
  211. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +1 -1
  212. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +5 -3
  213. snowflake/ml/modeling/neural_network/mlp_classifier.py +1 -1
  214. snowflake/ml/modeling/neural_network/mlp_regressor.py +1 -1
  215. snowflake/ml/modeling/pipeline/pipeline.py +6 -0
  216. snowflake/ml/modeling/preprocessing/binarizer.py +7 -3
  217. snowflake/ml/modeling/preprocessing/k_bins_discretizer.py +7 -2
  218. snowflake/ml/modeling/preprocessing/label_encoder.py +8 -7
  219. snowflake/ml/modeling/preprocessing/max_abs_scaler.py +7 -3
  220. snowflake/ml/modeling/preprocessing/min_max_scaler.py +7 -4
  221. snowflake/ml/modeling/preprocessing/normalizer.py +7 -3
  222. snowflake/ml/modeling/preprocessing/one_hot_encoder.py +53 -11
  223. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +44 -13
  224. snowflake/ml/modeling/preprocessing/polynomial_features.py +5 -3
  225. snowflake/ml/modeling/preprocessing/robust_scaler.py +7 -4
  226. snowflake/ml/modeling/preprocessing/standard_scaler.py +7 -3
  227. snowflake/ml/modeling/semi_supervised/label_propagation.py +1 -1
  228. snowflake/ml/modeling/semi_supervised/label_spreading.py +1 -1
  229. snowflake/ml/modeling/svm/linear_svc.py +1 -1
  230. snowflake/ml/modeling/svm/linear_svr.py +1 -1
  231. snowflake/ml/modeling/svm/nu_svc.py +1 -1
  232. snowflake/ml/modeling/svm/nu_svr.py +1 -1
  233. snowflake/ml/modeling/svm/svc.py +1 -1
  234. snowflake/ml/modeling/svm/svr.py +1 -1
  235. snowflake/ml/modeling/tree/decision_tree_classifier.py +1 -1
  236. snowflake/ml/modeling/tree/decision_tree_regressor.py +1 -1
  237. snowflake/ml/modeling/tree/extra_tree_classifier.py +1 -1
  238. snowflake/ml/modeling/tree/extra_tree_regressor.py +1 -1
  239. snowflake/ml/modeling/xgboost/xgb_classifier.py +1 -1
  240. snowflake/ml/modeling/xgboost/xgb_regressor.py +1 -1
  241. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +1 -1
  242. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +1 -1
  243. snowflake/ml/registry/_manager/model_manager.py +16 -3
  244. snowflake/ml/version.py +1 -1
  245. {snowflake_ml_python-1.5.2.dist-info → snowflake_ml_python-1.5.4.dist-info}/METADATA +51 -7
  246. snowflake_ml_python-1.5.4.dist-info/RECORD +389 -0
  247. {snowflake_ml_python-1.5.2.dist-info → snowflake_ml_python-1.5.4.dist-info}/WHEEL +1 -1
  248. snowflake_ml_python-1.5.2.dist-info/RECORD +0 -384
  249. {snowflake_ml_python-1.5.2.dist-info → snowflake_ml_python-1.5.4.dist-info}/LICENSE.txt +0 -0
  250. {snowflake_ml_python-1.5.2.dist-info → snowflake_ml_python-1.5.4.dist-info}/top_level.txt +0 -0
@@ -383,7 +383,7 @@ class SGDRegressor(BaseTransformer):
383
383
  inspect.currentframe(), SGDRegressor.__class__.__name__
384
384
  ),
385
385
  api_calls=[Session.call],
386
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
386
+ custom_tags={"autogen": True} if self._autogenerated else None,
387
387
  )
388
388
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
389
389
  pd_df.columns = dataset.columns
@@ -285,7 +285,7 @@ class TheilSenRegressor(BaseTransformer):
285
285
  inspect.currentframe(), TheilSenRegressor.__class__.__name__
286
286
  ),
287
287
  api_calls=[Session.call],
288
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
288
+ custom_tags={"autogen": True} if self._autogenerated else None,
289
289
  )
290
290
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
291
291
  pd_df.columns = dataset.columns
@@ -311,7 +311,7 @@ class TweedieRegressor(BaseTransformer):
311
311
  inspect.currentframe(), TweedieRegressor.__class__.__name__
312
312
  ),
313
313
  api_calls=[Session.call],
314
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
314
+ custom_tags={"autogen": True} if self._autogenerated else None,
315
315
  )
316
316
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
317
317
  pd_df.columns = dataset.columns
@@ -76,8 +76,10 @@ class Isomap(BaseTransformer):
76
76
  initialization with the `set_input_cols` method.
77
77
 
78
78
  label_cols: Optional[Union[str, List[str]]]
79
- This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
80
-
79
+ A string or list of strings representing column names that contain labels.
80
+ Label columns must be specified with this parameter during initialization
81
+ or with the `set_label_cols` method before fitting.
82
+
81
83
  output_cols: Optional[Union[str, List[str]]]
82
84
  A string or list of strings representing column names that will store the
83
85
  output of predict and transform operations. The length of output_cols must
@@ -307,7 +309,7 @@ class Isomap(BaseTransformer):
307
309
  inspect.currentframe(), Isomap.__class__.__name__
308
310
  ),
309
311
  api_calls=[Session.call],
310
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
312
+ custom_tags={"autogen": True} if self._autogenerated else None,
311
313
  )
312
314
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
313
315
  pd_df.columns = dataset.columns
@@ -76,8 +76,10 @@ class MDS(BaseTransformer):
76
76
  initialization with the `set_input_cols` method.
77
77
 
78
78
  label_cols: Optional[Union[str, List[str]]]
79
- This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
80
-
79
+ A string or list of strings representing column names that contain labels.
80
+ Label columns must be specified with this parameter during initialization
81
+ or with the `set_label_cols` method before fitting.
82
+
81
83
  output_cols: Optional[Union[str, List[str]]]
82
84
  A string or list of strings representing column names that will store the
83
85
  output of predict and transform operations. The length of output_cols must
@@ -290,7 +292,7 @@ class MDS(BaseTransformer):
290
292
  inspect.currentframe(), MDS.__class__.__name__
291
293
  ),
292
294
  api_calls=[Session.call],
293
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
295
+ custom_tags={"autogen": True} if self._autogenerated else None,
294
296
  )
295
297
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
296
298
  pd_df.columns = dataset.columns
@@ -76,8 +76,10 @@ class SpectralEmbedding(BaseTransformer):
76
76
  initialization with the `set_input_cols` method.
77
77
 
78
78
  label_cols: Optional[Union[str, List[str]]]
79
- This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
80
-
79
+ A string or list of strings representing column names that contain labels.
80
+ Label columns must be specified with this parameter during initialization
81
+ or with the `set_label_cols` method before fitting.
82
+
81
83
  output_cols: Optional[Union[str, List[str]]]
82
84
  A string or list of strings representing column names that will store the
83
85
  output of predict and transform operations. The length of output_cols must
@@ -292,7 +294,7 @@ class SpectralEmbedding(BaseTransformer):
292
294
  inspect.currentframe(), SpectralEmbedding.__class__.__name__
293
295
  ),
294
296
  api_calls=[Session.call],
295
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
297
+ custom_tags={"autogen": True} if self._autogenerated else None,
296
298
  )
297
299
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
298
300
  pd_df.columns = dataset.columns
@@ -76,8 +76,10 @@ class TSNE(BaseTransformer):
76
76
  initialization with the `set_input_cols` method.
77
77
 
78
78
  label_cols: Optional[Union[str, List[str]]]
79
- This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
80
-
79
+ A string or list of strings representing column names that contain labels.
80
+ Label columns must be specified with this parameter during initialization
81
+ or with the `set_label_cols` method before fitting.
82
+
81
83
  output_cols: Optional[Union[str, List[str]]]
82
84
  A string or list of strings representing column names that will store the
83
85
  output of predict and transform operations. The length of output_cols must
@@ -351,7 +353,7 @@ class TSNE(BaseTransformer):
351
353
  inspect.currentframe(), TSNE.__class__.__name__
352
354
  ),
353
355
  api_calls=[Session.call],
354
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
356
+ custom_tags={"autogen": True} if self._autogenerated else None,
355
357
  )
356
358
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
357
359
  pd_df.columns = dataset.columns
@@ -102,6 +102,7 @@ def precision_recall_curve(
102
102
  ],
103
103
  statement_params=statement_params,
104
104
  anonymous=True,
105
+ execute_as="caller",
105
106
  )
106
107
  def precision_recall_curve_anon_sproc(session: snowpark.Session) -> bytes:
107
108
  for query in queries[:-1]:
@@ -249,6 +250,7 @@ def roc_auc_score(
249
250
  ],
250
251
  statement_params=statement_params,
251
252
  anonymous=True,
253
+ execute_as="caller",
252
254
  )
253
255
  def roc_auc_score_anon_sproc(session: snowpark.Session) -> bytes:
254
256
  for query in queries[:-1]:
@@ -352,6 +354,7 @@ def roc_curve(
352
354
  ],
353
355
  statement_params=statement_params,
354
356
  anonymous=True,
357
+ execute_as="caller",
355
358
  )
356
359
  def roc_curve_anon_sproc(session: snowpark.Session) -> bytes:
357
360
  for query in queries[:-1]:
@@ -87,6 +87,7 @@ def d2_absolute_error_score(
87
87
  ],
88
88
  statement_params=statement_params,
89
89
  anonymous=True,
90
+ execute_as="caller",
90
91
  )
91
92
  def d2_absolute_error_score_anon_sproc(session: snowpark.Session) -> bytes:
92
93
  for query in queries[:-1]:
@@ -184,6 +185,7 @@ def d2_pinball_score(
184
185
  ],
185
186
  statement_params=statement_params,
186
187
  anonymous=True,
188
+ execute_as="caller",
187
189
  )
188
190
  def d2_pinball_score_anon_sproc(session: snowpark.Session) -> bytes:
189
191
  for query in queries[:-1]:
@@ -299,6 +301,7 @@ def explained_variance_score(
299
301
  ],
300
302
  statement_params=statement_params,
301
303
  anonymous=True,
304
+ execute_as="caller",
302
305
  )
303
306
  def explained_variance_score_anon_sproc(session: snowpark.Session) -> bytes:
304
307
  for query in queries[:-1]:
@@ -76,8 +76,10 @@ class BayesianGaussianMixture(BaseTransformer):
76
76
  initialization with the `set_input_cols` method.
77
77
 
78
78
  label_cols: Optional[Union[str, List[str]]]
79
- This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
80
-
79
+ A string or list of strings representing column names that contain labels.
80
+ Label columns must be specified with this parameter during initialization
81
+ or with the `set_label_cols` method before fitting.
82
+
81
83
  output_cols: Optional[Union[str, List[str]]]
82
84
  A string or list of strings representing column names that will store the
83
85
  output of predict and transform operations. The length of output_cols must
@@ -354,7 +356,7 @@ class BayesianGaussianMixture(BaseTransformer):
354
356
  inspect.currentframe(), BayesianGaussianMixture.__class__.__name__
355
357
  ),
356
358
  api_calls=[Session.call],
357
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
359
+ custom_tags={"autogen": True} if self._autogenerated else None,
358
360
  )
359
361
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
360
362
  pd_df.columns = dataset.columns
@@ -76,8 +76,10 @@ class GaussianMixture(BaseTransformer):
76
76
  initialization with the `set_input_cols` method.
77
77
 
78
78
  label_cols: Optional[Union[str, List[str]]]
79
- This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
80
-
79
+ A string or list of strings representing column names that contain labels.
80
+ Label columns must be specified with this parameter during initialization
81
+ or with the `set_label_cols` method before fitting.
82
+
81
83
  output_cols: Optional[Union[str, List[str]]]
82
84
  A string or list of strings representing column names that will store the
83
85
  output of predict and transform operations. The length of output_cols must
@@ -327,7 +329,7 @@ class GaussianMixture(BaseTransformer):
327
329
  inspect.currentframe(), GaussianMixture.__class__.__name__
328
330
  ),
329
331
  api_calls=[Session.call],
330
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
332
+ custom_tags={"autogen": True} if self._autogenerated else None,
331
333
  )
332
334
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
333
335
  pd_df.columns = dataset.columns
@@ -285,11 +285,7 @@ class GridSearchCV(BaseTransformer):
285
285
  )
286
286
  return selected_cols
287
287
 
288
- @telemetry.send_api_usage_telemetry(
289
- project=_PROJECT,
290
- subproject=_SUBPROJECT,
291
- )
292
- def fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "GridSearchCV":
288
+ def _fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "GridSearchCV":
293
289
  """Run fit with all sets of parameters
294
290
  For more details on this function, see [sklearn.model_selection.GridSearchCV.fit]
295
291
  (https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.GridSearchCV.html#sklearn.model_selection.GridSearchCV.fit)
@@ -298,11 +298,7 @@ class RandomizedSearchCV(BaseTransformer):
298
298
  )
299
299
  return selected_cols
300
300
 
301
- @telemetry.send_api_usage_telemetry(
302
- project=_PROJECT,
303
- subproject=_SUBPROJECT,
304
- )
305
- def fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "RandomizedSearchCV":
301
+ def _fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "RandomizedSearchCV":
306
302
  """Run fit with all sets of parameters
307
303
  For more details on this function, see [sklearn.model_selection.RandomizedSearchCV.fit]
308
304
  (https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.RandomizedSearchCV.html#sklearn.model_selection.RandomizedSearchCV.fit)
@@ -239,7 +239,7 @@ class OneVsOneClassifier(BaseTransformer):
239
239
  inspect.currentframe(), OneVsOneClassifier.__class__.__name__
240
240
  ),
241
241
  api_calls=[Session.call],
242
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
242
+ custom_tags={"autogen": True} if self._autogenerated else None,
243
243
  )
244
244
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
245
245
  pd_df.columns = dataset.columns
@@ -248,7 +248,7 @@ class OneVsRestClassifier(BaseTransformer):
248
248
  inspect.currentframe(), OneVsRestClassifier.__class__.__name__
249
249
  ),
250
250
  api_calls=[Session.call],
251
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
251
+ custom_tags={"autogen": True} if self._autogenerated else None,
252
252
  )
253
253
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
254
254
  pd_df.columns = dataset.columns
@@ -251,7 +251,7 @@ class OutputCodeClassifier(BaseTransformer):
251
251
  inspect.currentframe(), OutputCodeClassifier.__class__.__name__
252
252
  ),
253
253
  api_calls=[Session.call],
254
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
254
+ custom_tags={"autogen": True} if self._autogenerated else None,
255
255
  )
256
256
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
257
257
  pd_df.columns = dataset.columns
@@ -251,7 +251,7 @@ class BernoulliNB(BaseTransformer):
251
251
  inspect.currentframe(), BernoulliNB.__class__.__name__
252
252
  ),
253
253
  api_calls=[Session.call],
254
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
254
+ custom_tags={"autogen": True} if self._autogenerated else None,
255
255
  )
256
256
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
257
257
  pd_df.columns = dataset.columns
@@ -257,7 +257,7 @@ class CategoricalNB(BaseTransformer):
257
257
  inspect.currentframe(), CategoricalNB.__class__.__name__
258
258
  ),
259
259
  api_calls=[Session.call],
260
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
260
+ custom_tags={"autogen": True} if self._autogenerated else None,
261
261
  )
262
262
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
263
263
  pd_df.columns = dataset.columns
@@ -251,7 +251,7 @@ class ComplementNB(BaseTransformer):
251
251
  inspect.currentframe(), ComplementNB.__class__.__name__
252
252
  ),
253
253
  api_calls=[Session.call],
254
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
254
+ custom_tags={"autogen": True} if self._autogenerated else None,
255
255
  )
256
256
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
257
257
  pd_df.columns = dataset.columns
@@ -232,7 +232,7 @@ class GaussianNB(BaseTransformer):
232
232
  inspect.currentframe(), GaussianNB.__class__.__name__
233
233
  ),
234
234
  api_calls=[Session.call],
235
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
235
+ custom_tags={"autogen": True} if self._autogenerated else None,
236
236
  )
237
237
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
238
238
  pd_df.columns = dataset.columns
@@ -245,7 +245,7 @@ class MultinomialNB(BaseTransformer):
245
245
  inspect.currentframe(), MultinomialNB.__class__.__name__
246
246
  ),
247
247
  api_calls=[Session.call],
248
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
248
+ custom_tags={"autogen": True} if self._autogenerated else None,
249
249
  )
250
250
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
251
251
  pd_df.columns = dataset.columns
@@ -302,7 +302,7 @@ class KNeighborsClassifier(BaseTransformer):
302
302
  inspect.currentframe(), KNeighborsClassifier.__class__.__name__
303
303
  ),
304
304
  api_calls=[Session.call],
305
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
305
+ custom_tags={"autogen": True} if self._autogenerated else None,
306
306
  )
307
307
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
308
308
  pd_df.columns = dataset.columns
@@ -304,7 +304,7 @@ class KNeighborsRegressor(BaseTransformer):
304
304
  inspect.currentframe(), KNeighborsRegressor.__class__.__name__
305
305
  ),
306
306
  api_calls=[Session.call],
307
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
307
+ custom_tags={"autogen": True} if self._autogenerated else None,
308
308
  )
309
309
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
310
310
  pd_df.columns = dataset.columns
@@ -76,8 +76,10 @@ class KernelDensity(BaseTransformer):
76
76
  initialization with the `set_input_cols` method.
77
77
 
78
78
  label_cols: Optional[Union[str, List[str]]]
79
- This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
80
-
79
+ A string or list of strings representing column names that contain labels.
80
+ Label columns must be specified with this parameter during initialization
81
+ or with the `set_label_cols` method before fitting.
82
+
81
83
  output_cols: Optional[Union[str, List[str]]]
82
84
  A string or list of strings representing column names that will store the
83
85
  output of predict and transform operations. The length of output_cols must
@@ -281,7 +283,7 @@ class KernelDensity(BaseTransformer):
281
283
  inspect.currentframe(), KernelDensity.__class__.__name__
282
284
  ),
283
285
  api_calls=[Session.call],
284
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
286
+ custom_tags={"autogen": True} if self._autogenerated else None,
285
287
  )
286
288
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
287
289
  pd_df.columns = dataset.columns
@@ -76,8 +76,10 @@ class LocalOutlierFactor(BaseTransformer):
76
76
  initialization with the `set_input_cols` method.
77
77
 
78
78
  label_cols: Optional[Union[str, List[str]]]
79
- This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
80
-
79
+ A string or list of strings representing column names that contain labels.
80
+ Label columns must be specified with this parameter during initialization
81
+ or with the `set_label_cols` method before fitting.
82
+
81
83
  output_cols: Optional[Union[str, List[str]]]
82
84
  A string or list of strings representing column names that will store the
83
85
  output of predict and transform operations. The length of output_cols must
@@ -309,7 +311,7 @@ class LocalOutlierFactor(BaseTransformer):
309
311
  inspect.currentframe(), LocalOutlierFactor.__class__.__name__
310
312
  ),
311
313
  api_calls=[Session.call],
312
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
314
+ custom_tags={"autogen": True} if self._autogenerated else None,
313
315
  )
314
316
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
315
317
  pd_df.columns = dataset.columns
@@ -242,7 +242,7 @@ class NearestCentroid(BaseTransformer):
242
242
  inspect.currentframe(), NearestCentroid.__class__.__name__
243
243
  ),
244
244
  api_calls=[Session.call],
245
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
245
+ custom_tags={"autogen": True} if self._autogenerated else None,
246
246
  )
247
247
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
248
248
  pd_df.columns = dataset.columns
@@ -76,8 +76,10 @@ class NearestNeighbors(BaseTransformer):
76
76
  initialization with the `set_input_cols` method.
77
77
 
78
78
  label_cols: Optional[Union[str, List[str]]]
79
- This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
80
-
79
+ A string or list of strings representing column names that contain labels.
80
+ Label columns must be specified with this parameter during initialization
81
+ or with the `set_label_cols` method before fitting.
82
+
81
83
  output_cols: Optional[Union[str, List[str]]]
82
84
  A string or list of strings representing column names that will store the
83
85
  output of predict and transform operations. The length of output_cols must
@@ -292,7 +294,7 @@ class NearestNeighbors(BaseTransformer):
292
294
  inspect.currentframe(), NearestNeighbors.__class__.__name__
293
295
  ),
294
296
  api_calls=[Session.call],
295
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
297
+ custom_tags={"autogen": True} if self._autogenerated else None,
296
298
  )
297
299
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
298
300
  pd_df.columns = dataset.columns
@@ -313,7 +313,7 @@ class NeighborhoodComponentsAnalysis(BaseTransformer):
313
313
  inspect.currentframe(), NeighborhoodComponentsAnalysis.__class__.__name__
314
314
  ),
315
315
  api_calls=[Session.call],
316
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
316
+ custom_tags={"autogen": True} if self._autogenerated else None,
317
317
  )
318
318
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
319
319
  pd_df.columns = dataset.columns
@@ -314,7 +314,7 @@ class RadiusNeighborsClassifier(BaseTransformer):
314
314
  inspect.currentframe(), RadiusNeighborsClassifier.__class__.__name__
315
315
  ),
316
316
  api_calls=[Session.call],
317
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
317
+ custom_tags={"autogen": True} if self._autogenerated else None,
318
318
  )
319
319
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
320
320
  pd_df.columns = dataset.columns
@@ -304,7 +304,7 @@ class RadiusNeighborsRegressor(BaseTransformer):
304
304
  inspect.currentframe(), RadiusNeighborsRegressor.__class__.__name__
305
305
  ),
306
306
  api_calls=[Session.call],
307
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
307
+ custom_tags={"autogen": True} if self._autogenerated else None,
308
308
  )
309
309
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
310
310
  pd_df.columns = dataset.columns
@@ -76,8 +76,10 @@ class BernoulliRBM(BaseTransformer):
76
76
  initialization with the `set_input_cols` method.
77
77
 
78
78
  label_cols: Optional[Union[str, List[str]]]
79
- This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
80
-
79
+ A string or list of strings representing column names that contain labels.
80
+ Label columns must be specified with this parameter during initialization
81
+ or with the `set_label_cols` method before fitting.
82
+
81
83
  output_cols: Optional[Union[str, List[str]]]
82
84
  A string or list of strings representing column names that will store the
83
85
  output of predict and transform operations. The length of output_cols must
@@ -261,7 +263,7 @@ class BernoulliRBM(BaseTransformer):
261
263
  inspect.currentframe(), BernoulliRBM.__class__.__name__
262
264
  ),
263
265
  api_calls=[Session.call],
264
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
266
+ custom_tags={"autogen": True} if self._autogenerated else None,
265
267
  )
266
268
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
267
269
  pd_df.columns = dataset.columns
@@ -416,7 +416,7 @@ class MLPClassifier(BaseTransformer):
416
416
  inspect.currentframe(), MLPClassifier.__class__.__name__
417
417
  ),
418
418
  api_calls=[Session.call],
419
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
419
+ custom_tags={"autogen": True} if self._autogenerated else None,
420
420
  )
421
421
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
422
422
  pd_df.columns = dataset.columns
@@ -412,7 +412,7 @@ class MLPRegressor(BaseTransformer):
412
412
  inspect.currentframe(), MLPRegressor.__class__.__name__
413
413
  ),
414
414
  api_calls=[Session.call],
415
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
415
+ custom_tags={"autogen": True} if self._autogenerated else None,
416
416
  )
417
417
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
418
418
  pd_df.columns = dataset.columns
@@ -17,6 +17,7 @@ from sklearn.utils import metaestimators
17
17
  from snowflake import snowpark
18
18
  from snowflake.ml._internal import file_utils, telemetry
19
19
  from snowflake.ml._internal.exceptions import error_codes, exceptions
20
+ from snowflake.ml._internal.lineage import lineage_utils
20
21
  from snowflake.ml._internal.utils import snowpark_dataframe_utils, temp_file_utils
21
22
  from snowflake.ml.model.model_signature import ModelSignature, _infer_signature
22
23
  from snowflake.ml.modeling._internal.model_transformer_builder import (
@@ -377,6 +378,7 @@ class Pipeline(base.BaseTransformer):
377
378
  anonymous=True,
378
379
  imports=imports, # type: ignore[arg-type]
379
380
  statement_params=sproc_statement_params,
381
+ execute_as="caller",
380
382
  )
381
383
 
382
384
  sproc_export_file_name: str = pipeline_within_one_sproc(
@@ -427,6 +429,10 @@ class Pipeline(base.BaseTransformer):
427
429
  else dataset
428
430
  )
429
431
 
432
+ # Extract lineage information here since we're overriding fit() directly
433
+ data_sources = lineage_utils.get_data_sources(dataset)
434
+ lineage_utils.set_data_sources(self, data_sources)
435
+
430
436
  if self._can_be_trained_in_ml_runtime(dataset):
431
437
  if not self._is_convertible_to_sklearn:
432
438
  raise ValueError("This pipeline cannot be converted to an sklearn pipeline.")
@@ -25,11 +25,15 @@ class Binarizer(base.BaseTransformer):
25
25
  Feature values below or equal to this are replaced by 0, above it by 1. Default values is 0.0.
26
26
 
27
27
  input_cols: Optional[Union[str, Iterable[str]]], default=None
28
- The name(s) of one or more columns in a DataFrame containing a feature to be binarized.
28
+ The name(s) of one or more columns in the input DataFrame containing feature(s) to be binarized. Input
29
+ columns must be specified before transform with this argument or after initialization with the
30
+ `set_input_cols` method. This argument is optional for API consistency.
29
31
 
30
32
  output_cols: Optional[Union[str, Iterable[str]]], default=None
31
- The name(s) of one or more columns in a DataFrame in which results will be stored. The number of
32
- columns specified must match the number of input columns.
33
+ The name(s) to assign output columns in the output DataFrame. The number of
34
+ columns specified must equal the number of input columns. Output columns must be specified before transform
35
+ with this argument or after initialization with the `set_output_cols` method. This argument is optional for
36
+ API consistency.
33
37
 
34
38
  passthrough_cols: Optional[Union[str, Iterable[str]]], default=None
35
39
  A string or a list of strings indicating column names to be excluded from any
@@ -74,10 +74,15 @@ class KBinsDiscretizer(base.BaseTransformer):
74
74
  - 'quantile': All bins in each feature have the same number of points.
75
75
 
76
76
  input_cols: str or Iterable [column_name], default=None
77
- Single or multiple input columns.
77
+ The name(s) of one or more columns in the input DataFrame containing feature(s) to be discretized.
78
+ Input columns must be specified before fit with this argument or after initialization with the
79
+ `set_input_cols` method. This argument is optional for API consistency.
78
80
 
79
81
  output_cols: str or Iterable [column_name], default=None
80
- Single or multiple output columns.
82
+ The name(s) to assign output columns in the output DataFrame. The number of
83
+ columns specified must equal the number of input columns. Output columns must be specified before transform
84
+ with this argument or after initialization with the `set_output_cols` method. This argument is optional for
85
+ API consistency.
81
86
 
82
87
  passthrough_cols: A string or a list of strings indicating column names to be excluded from any
83
88
  operations (such as train, transform, or inference). These specified column(s)
@@ -25,11 +25,12 @@ class LabelEncoder(base.BaseTransformer):
25
25
 
26
26
  Args:
27
27
  input_cols: Optional[Union[str, List[str]]]
28
- The name of a column in a DataFrame to be encoded. May be a string or a list containing one string.
28
+ The name of a column or a list containing one column name to be encoded in the input DataFrame. There must
29
+ be exactly one input column specified before fit. This argument is optional for API consistency.
29
30
 
30
31
  output_cols: Optional[Union[str, List[str]]]
31
- The name of a column in a DataFrame where the results will be stored. May be a string or a list
32
- containing one string.
32
+ The name of a column or a list containing one column name where the results will be stored. There must be
33
+ exactly one output column specified before trainsform. This argument is optional for API consistency.
33
34
 
34
35
  passthrough_cols: Optional[Union[str, List[str]]]
35
36
  A string or a list of strings indicating column names to be excluded from any
@@ -54,11 +55,11 @@ class LabelEncoder(base.BaseTransformer):
54
55
 
55
56
  Args:
56
57
  input_cols: Optional[Union[str, List[str]]]
57
- The name of a column in a DataFrame to be encoded. May be a string or a list containing one
58
- string.
58
+ The name of a column or a list containing one column name to be encoded in the input DataFrame. There
59
+ must be exactly one input column specified before fit. This argument is optional for API consistency.
59
60
  output_cols: Optional[Union[str, List[str]]]
60
- The name of a column in a DataFrame where the results will be stored. May be a string or a list
61
- containing one string.
61
+ The name of a column or a list containing one column name where the results will be stored. There must
62
+ be exactly one output column specified before transform. This argument is optional for API consistency.
62
63
  passthrough_cols: Optional[Union[str, List[str]]]
63
64
  A string or a list of strings indicating column names to be excluded from any
64
65
  operations (such as train, transform, or inference). These specified column(s)