snowflake-ml-python 1.5.2__py3-none-any.whl → 1.5.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (250) hide show
  1. snowflake/cortex/__init__.py +2 -1
  2. snowflake/cortex/_complete.py +240 -16
  3. snowflake/cortex/_extract_answer.py +0 -1
  4. snowflake/cortex/_sentiment.py +0 -1
  5. snowflake/cortex/_sse_client.py +81 -0
  6. snowflake/cortex/_summarize.py +0 -1
  7. snowflake/cortex/_translate.py +0 -1
  8. snowflake/cortex/_util.py +34 -10
  9. snowflake/ml/_internal/container_services/image_registry/http_client.py +10 -3
  10. snowflake/ml/_internal/container_services/image_registry/imagelib.py +23 -10
  11. snowflake/ml/_internal/container_services/image_registry/registry_client.py +7 -1
  12. snowflake/ml/_internal/exceptions/dataset_errors.py +7 -7
  13. snowflake/ml/_internal/exceptions/fileset_errors.py +3 -3
  14. snowflake/ml/_internal/exceptions/sql_error_codes.py +6 -0
  15. snowflake/ml/_internal/lineage/lineage_utils.py +34 -25
  16. snowflake/ml/_internal/telemetry.py +26 -0
  17. snowflake/ml/_internal/utils/identifier.py +14 -0
  18. snowflake/ml/_internal/utils/snowpark_dataframe_utils.py +15 -4
  19. snowflake/ml/dataset/dataset.py +54 -32
  20. snowflake/ml/dataset/dataset_factory.py +3 -4
  21. snowflake/ml/feature_store/feature_store.py +440 -243
  22. snowflake/ml/feature_store/feature_view.py +61 -9
  23. snowflake/ml/fileset/embedded_stage_fs.py +25 -21
  24. snowflake/ml/fileset/fileset.py +2 -2
  25. snowflake/ml/fileset/snowfs.py +4 -15
  26. snowflake/ml/fileset/stage_fs.py +6 -8
  27. snowflake/ml/lineage/__init__.py +3 -0
  28. snowflake/ml/lineage/lineage_node.py +139 -0
  29. snowflake/ml/model/_client/model/model_impl.py +47 -14
  30. snowflake/ml/model/_client/model/model_version_impl.py +82 -2
  31. snowflake/ml/model/_client/ops/model_ops.py +77 -5
  32. snowflake/ml/model/_client/sql/model.py +1 -0
  33. snowflake/ml/model/_client/sql/model_version.py +47 -4
  34. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +2 -3
  35. snowflake/ml/model/_model_composer/model_composer.py +7 -6
  36. snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +7 -1
  37. snowflake/ml/model/_model_composer/model_method/function_generator.py +17 -1
  38. snowflake/ml/model/_model_composer/model_method/infer_partitioned.py_template +79 -0
  39. snowflake/ml/model/_model_composer/model_method/infer_table_function.py_template +5 -3
  40. snowflake/ml/model/_model_composer/model_method/model_method.py +5 -5
  41. snowflake/ml/model/_packager/model_handlers/_base.py +2 -2
  42. snowflake/ml/model/_packager/model_handlers/_utils.py +1 -0
  43. snowflake/ml/model/_packager/model_handlers/catboost.py +2 -2
  44. snowflake/ml/model/_packager/model_handlers/custom.py +12 -4
  45. snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +18 -15
  46. snowflake/ml/model/_packager/model_handlers/lightgbm.py +2 -2
  47. snowflake/ml/model/_packager/model_handlers/llm.py +2 -2
  48. snowflake/ml/model/_packager/model_handlers/mlflow.py +2 -2
  49. snowflake/ml/model/_packager/model_handlers/pytorch.py +2 -2
  50. snowflake/ml/model/_packager/model_handlers/sentence_transformers.py +2 -2
  51. snowflake/ml/model/_packager/model_handlers/sklearn.py +2 -2
  52. snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +2 -2
  53. snowflake/ml/model/_packager/model_handlers/tensorflow.py +2 -2
  54. snowflake/ml/model/_packager/model_handlers/torchscript.py +2 -2
  55. snowflake/ml/model/_packager/model_handlers/xgboost.py +2 -2
  56. snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
  57. snowflake/ml/model/_packager/model_meta/model_blob_meta.py +2 -0
  58. snowflake/ml/model/_packager/model_meta/model_meta.py +21 -1
  59. snowflake/ml/model/_packager/model_meta/model_meta_schema.py +6 -1
  60. snowflake/ml/model/_packager/model_packager.py +9 -4
  61. snowflake/ml/model/_packager/model_runtime/_snowml_inference_alternative_requirements.py +1 -1
  62. snowflake/ml/model/_signatures/builtins_handler.py +2 -1
  63. snowflake/ml/model/_signatures/core.py +13 -1
  64. snowflake/ml/model/_signatures/pandas_handler.py +2 -0
  65. snowflake/ml/model/_signatures/snowpark_handler.py +3 -3
  66. snowflake/ml/model/custom_model.py +22 -2
  67. snowflake/ml/model/model_signature.py +2 -0
  68. snowflake/ml/model/type_hints.py +74 -4
  69. snowflake/ml/modeling/_internal/estimator_utils.py +58 -1
  70. snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +158 -121
  71. snowflake/ml/modeling/_internal/snowpark_implementations/distributed_search_udf_file.py +2 -0
  72. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +39 -18
  73. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +88 -134
  74. snowflake/ml/modeling/_internal/snowpark_implementations/xgboost_external_memory_trainer.py +22 -17
  75. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +1 -1
  76. snowflake/ml/modeling/cluster/affinity_propagation.py +5 -3
  77. snowflake/ml/modeling/cluster/agglomerative_clustering.py +5 -3
  78. snowflake/ml/modeling/cluster/birch.py +5 -3
  79. snowflake/ml/modeling/cluster/bisecting_k_means.py +5 -3
  80. snowflake/ml/modeling/cluster/dbscan.py +5 -3
  81. snowflake/ml/modeling/cluster/feature_agglomeration.py +5 -3
  82. snowflake/ml/modeling/cluster/k_means.py +5 -3
  83. snowflake/ml/modeling/cluster/mean_shift.py +5 -3
  84. snowflake/ml/modeling/cluster/mini_batch_k_means.py +5 -3
  85. snowflake/ml/modeling/cluster/optics.py +5 -3
  86. snowflake/ml/modeling/cluster/spectral_biclustering.py +5 -3
  87. snowflake/ml/modeling/cluster/spectral_clustering.py +5 -3
  88. snowflake/ml/modeling/cluster/spectral_coclustering.py +5 -3
  89. snowflake/ml/modeling/compose/column_transformer.py +5 -3
  90. snowflake/ml/modeling/compose/transformed_target_regressor.py +1 -1
  91. snowflake/ml/modeling/covariance/elliptic_envelope.py +5 -3
  92. snowflake/ml/modeling/covariance/empirical_covariance.py +5 -3
  93. snowflake/ml/modeling/covariance/graphical_lasso.py +5 -3
  94. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +5 -3
  95. snowflake/ml/modeling/covariance/ledoit_wolf.py +5 -3
  96. snowflake/ml/modeling/covariance/min_cov_det.py +5 -3
  97. snowflake/ml/modeling/covariance/oas.py +5 -3
  98. snowflake/ml/modeling/covariance/shrunk_covariance.py +5 -3
  99. snowflake/ml/modeling/decomposition/dictionary_learning.py +5 -3
  100. snowflake/ml/modeling/decomposition/factor_analysis.py +5 -3
  101. snowflake/ml/modeling/decomposition/fast_ica.py +5 -3
  102. snowflake/ml/modeling/decomposition/incremental_pca.py +5 -3
  103. snowflake/ml/modeling/decomposition/kernel_pca.py +5 -3
  104. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +5 -3
  105. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +5 -3
  106. snowflake/ml/modeling/decomposition/pca.py +5 -3
  107. snowflake/ml/modeling/decomposition/sparse_pca.py +5 -3
  108. snowflake/ml/modeling/decomposition/truncated_svd.py +5 -3
  109. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +1 -1
  110. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +1 -1
  111. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +1 -1
  112. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +1 -1
  113. snowflake/ml/modeling/ensemble/bagging_classifier.py +1 -1
  114. snowflake/ml/modeling/ensemble/bagging_regressor.py +1 -1
  115. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +1 -1
  116. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +1 -1
  117. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +1 -1
  118. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +1 -1
  119. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +1 -1
  120. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +1 -1
  121. snowflake/ml/modeling/ensemble/isolation_forest.py +5 -3
  122. snowflake/ml/modeling/ensemble/random_forest_classifier.py +1 -1
  123. snowflake/ml/modeling/ensemble/random_forest_regressor.py +1 -1
  124. snowflake/ml/modeling/ensemble/stacking_regressor.py +1 -1
  125. snowflake/ml/modeling/ensemble/voting_classifier.py +1 -1
  126. snowflake/ml/modeling/ensemble/voting_regressor.py +1 -1
  127. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +1 -1
  128. snowflake/ml/modeling/feature_selection/select_fdr.py +1 -1
  129. snowflake/ml/modeling/feature_selection/select_fpr.py +1 -1
  130. snowflake/ml/modeling/feature_selection/select_fwe.py +1 -1
  131. snowflake/ml/modeling/feature_selection/select_k_best.py +1 -1
  132. snowflake/ml/modeling/feature_selection/select_percentile.py +1 -1
  133. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +5 -3
  134. snowflake/ml/modeling/feature_selection/variance_threshold.py +5 -3
  135. snowflake/ml/modeling/framework/base.py +3 -8
  136. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +1 -1
  137. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +1 -1
  138. snowflake/ml/modeling/impute/iterative_imputer.py +5 -3
  139. snowflake/ml/modeling/impute/knn_imputer.py +5 -3
  140. snowflake/ml/modeling/impute/missing_indicator.py +5 -3
  141. snowflake/ml/modeling/impute/simple_imputer.py +8 -4
  142. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +5 -3
  143. snowflake/ml/modeling/kernel_approximation/nystroem.py +5 -3
  144. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +5 -3
  145. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +5 -3
  146. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +5 -3
  147. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +1 -1
  148. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +1 -1
  149. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +1 -1
  150. snowflake/ml/modeling/linear_model/ard_regression.py +1 -1
  151. snowflake/ml/modeling/linear_model/bayesian_ridge.py +1 -1
  152. snowflake/ml/modeling/linear_model/elastic_net.py +1 -1
  153. snowflake/ml/modeling/linear_model/elastic_net_cv.py +1 -1
  154. snowflake/ml/modeling/linear_model/gamma_regressor.py +1 -1
  155. snowflake/ml/modeling/linear_model/huber_regressor.py +1 -1
  156. snowflake/ml/modeling/linear_model/lars.py +1 -1
  157. snowflake/ml/modeling/linear_model/lars_cv.py +1 -1
  158. snowflake/ml/modeling/linear_model/lasso.py +1 -1
  159. snowflake/ml/modeling/linear_model/lasso_cv.py +1 -1
  160. snowflake/ml/modeling/linear_model/lasso_lars.py +1 -1
  161. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +1 -1
  162. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +1 -1
  163. snowflake/ml/modeling/linear_model/linear_regression.py +1 -1
  164. snowflake/ml/modeling/linear_model/logistic_regression.py +1 -1
  165. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +1 -1
  166. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +1 -1
  167. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +1 -1
  168. snowflake/ml/modeling/linear_model/multi_task_lasso.py +1 -1
  169. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +1 -1
  170. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +1 -1
  171. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +1 -1
  172. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +1 -1
  173. snowflake/ml/modeling/linear_model/perceptron.py +1 -1
  174. snowflake/ml/modeling/linear_model/poisson_regressor.py +1 -1
  175. snowflake/ml/modeling/linear_model/ransac_regressor.py +1 -1
  176. snowflake/ml/modeling/linear_model/ridge.py +1 -1
  177. snowflake/ml/modeling/linear_model/ridge_classifier.py +1 -1
  178. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +1 -1
  179. snowflake/ml/modeling/linear_model/ridge_cv.py +1 -1
  180. snowflake/ml/modeling/linear_model/sgd_classifier.py +1 -1
  181. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +5 -3
  182. snowflake/ml/modeling/linear_model/sgd_regressor.py +1 -1
  183. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +1 -1
  184. snowflake/ml/modeling/linear_model/tweedie_regressor.py +1 -1
  185. snowflake/ml/modeling/manifold/isomap.py +5 -3
  186. snowflake/ml/modeling/manifold/mds.py +5 -3
  187. snowflake/ml/modeling/manifold/spectral_embedding.py +5 -3
  188. snowflake/ml/modeling/manifold/tsne.py +5 -3
  189. snowflake/ml/modeling/metrics/ranking.py +3 -0
  190. snowflake/ml/modeling/metrics/regression.py +3 -0
  191. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +5 -3
  192. snowflake/ml/modeling/mixture/gaussian_mixture.py +5 -3
  193. snowflake/ml/modeling/model_selection/grid_search_cv.py +1 -5
  194. snowflake/ml/modeling/model_selection/randomized_search_cv.py +1 -5
  195. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +1 -1
  196. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +1 -1
  197. snowflake/ml/modeling/multiclass/output_code_classifier.py +1 -1
  198. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +1 -1
  199. snowflake/ml/modeling/naive_bayes/categorical_nb.py +1 -1
  200. snowflake/ml/modeling/naive_bayes/complement_nb.py +1 -1
  201. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +1 -1
  202. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +1 -1
  203. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +1 -1
  204. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +1 -1
  205. snowflake/ml/modeling/neighbors/kernel_density.py +5 -3
  206. snowflake/ml/modeling/neighbors/local_outlier_factor.py +5 -3
  207. snowflake/ml/modeling/neighbors/nearest_centroid.py +1 -1
  208. snowflake/ml/modeling/neighbors/nearest_neighbors.py +5 -3
  209. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +1 -1
  210. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +1 -1
  211. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +1 -1
  212. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +5 -3
  213. snowflake/ml/modeling/neural_network/mlp_classifier.py +1 -1
  214. snowflake/ml/modeling/neural_network/mlp_regressor.py +1 -1
  215. snowflake/ml/modeling/pipeline/pipeline.py +6 -0
  216. snowflake/ml/modeling/preprocessing/binarizer.py +7 -3
  217. snowflake/ml/modeling/preprocessing/k_bins_discretizer.py +7 -2
  218. snowflake/ml/modeling/preprocessing/label_encoder.py +8 -7
  219. snowflake/ml/modeling/preprocessing/max_abs_scaler.py +7 -3
  220. snowflake/ml/modeling/preprocessing/min_max_scaler.py +7 -4
  221. snowflake/ml/modeling/preprocessing/normalizer.py +7 -3
  222. snowflake/ml/modeling/preprocessing/one_hot_encoder.py +53 -11
  223. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +44 -13
  224. snowflake/ml/modeling/preprocessing/polynomial_features.py +5 -3
  225. snowflake/ml/modeling/preprocessing/robust_scaler.py +7 -4
  226. snowflake/ml/modeling/preprocessing/standard_scaler.py +7 -3
  227. snowflake/ml/modeling/semi_supervised/label_propagation.py +1 -1
  228. snowflake/ml/modeling/semi_supervised/label_spreading.py +1 -1
  229. snowflake/ml/modeling/svm/linear_svc.py +1 -1
  230. snowflake/ml/modeling/svm/linear_svr.py +1 -1
  231. snowflake/ml/modeling/svm/nu_svc.py +1 -1
  232. snowflake/ml/modeling/svm/nu_svr.py +1 -1
  233. snowflake/ml/modeling/svm/svc.py +1 -1
  234. snowflake/ml/modeling/svm/svr.py +1 -1
  235. snowflake/ml/modeling/tree/decision_tree_classifier.py +1 -1
  236. snowflake/ml/modeling/tree/decision_tree_regressor.py +1 -1
  237. snowflake/ml/modeling/tree/extra_tree_classifier.py +1 -1
  238. snowflake/ml/modeling/tree/extra_tree_regressor.py +1 -1
  239. snowflake/ml/modeling/xgboost/xgb_classifier.py +1 -1
  240. snowflake/ml/modeling/xgboost/xgb_regressor.py +1 -1
  241. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +1 -1
  242. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +1 -1
  243. snowflake/ml/registry/_manager/model_manager.py +16 -3
  244. snowflake/ml/version.py +1 -1
  245. {snowflake_ml_python-1.5.2.dist-info → snowflake_ml_python-1.5.4.dist-info}/METADATA +51 -7
  246. snowflake_ml_python-1.5.4.dist-info/RECORD +389 -0
  247. {snowflake_ml_python-1.5.2.dist-info → snowflake_ml_python-1.5.4.dist-info}/WHEEL +1 -1
  248. snowflake_ml_python-1.5.2.dist-info/RECORD +0 -384
  249. {snowflake_ml_python-1.5.2.dist-info → snowflake_ml_python-1.5.4.dist-info}/LICENSE.txt +0 -0
  250. {snowflake_ml_python-1.5.2.dist-info → snowflake_ml_python-1.5.4.dist-info}/top_level.txt +0 -0
@@ -76,8 +76,10 @@ class ShrunkCovariance(BaseTransformer):
76
76
  initialization with the `set_input_cols` method.
77
77
 
78
78
  label_cols: Optional[Union[str, List[str]]]
79
- This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
80
-
79
+ A string or list of strings representing column names that contain labels.
80
+ Label columns must be specified with this parameter during initialization
81
+ or with the `set_label_cols` method before fitting.
82
+
81
83
  output_cols: Optional[Union[str, List[str]]]
82
84
  A string or list of strings representing column names that will store the
83
85
  output of predict and transform operations. The length of output_cols must
@@ -237,7 +239,7 @@ class ShrunkCovariance(BaseTransformer):
237
239
  inspect.currentframe(), ShrunkCovariance.__class__.__name__
238
240
  ),
239
241
  api_calls=[Session.call],
240
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
242
+ custom_tags={"autogen": True} if self._autogenerated else None,
241
243
  )
242
244
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
243
245
  pd_df.columns = dataset.columns
@@ -76,8 +76,10 @@ class DictionaryLearning(BaseTransformer):
76
76
  initialization with the `set_input_cols` method.
77
77
 
78
78
  label_cols: Optional[Union[str, List[str]]]
79
- This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
80
-
79
+ A string or list of strings representing column names that contain labels.
80
+ Label columns must be specified with this parameter during initialization
81
+ or with the `set_label_cols` method before fitting.
82
+
81
83
  output_cols: Optional[Union[str, List[str]]]
82
84
  A string or list of strings representing column names that will store the
83
85
  output of predict and transform operations. The length of output_cols must
@@ -343,7 +345,7 @@ class DictionaryLearning(BaseTransformer):
343
345
  inspect.currentframe(), DictionaryLearning.__class__.__name__
344
346
  ),
345
347
  api_calls=[Session.call],
346
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
348
+ custom_tags={"autogen": True} if self._autogenerated else None,
347
349
  )
348
350
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
349
351
  pd_df.columns = dataset.columns
@@ -76,8 +76,10 @@ class FactorAnalysis(BaseTransformer):
76
76
  initialization with the `set_input_cols` method.
77
77
 
78
78
  label_cols: Optional[Union[str, List[str]]]
79
- This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
80
-
79
+ A string or list of strings representing column names that contain labels.
80
+ Label columns must be specified with this parameter during initialization
81
+ or with the `set_label_cols` method before fitting.
82
+
81
83
  output_cols: Optional[Union[str, List[str]]]
82
84
  A string or list of strings representing column names that will store the
83
85
  output of predict and transform operations. The length of output_cols must
@@ -280,7 +282,7 @@ class FactorAnalysis(BaseTransformer):
280
282
  inspect.currentframe(), FactorAnalysis.__class__.__name__
281
283
  ),
282
284
  api_calls=[Session.call],
283
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
285
+ custom_tags={"autogen": True} if self._autogenerated else None,
284
286
  )
285
287
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
286
288
  pd_df.columns = dataset.columns
@@ -76,8 +76,10 @@ class FastICA(BaseTransformer):
76
76
  initialization with the `set_input_cols` method.
77
77
 
78
78
  label_cols: Optional[Union[str, List[str]]]
79
- This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
80
-
79
+ A string or list of strings representing column names that contain labels.
80
+ Label columns must be specified with this parameter during initialization
81
+ or with the `set_label_cols` method before fitting.
82
+
81
83
  output_cols: Optional[Union[str, List[str]]]
82
84
  A string or list of strings representing column names that will store the
83
85
  output of predict and transform operations. The length of output_cols must
@@ -298,7 +300,7 @@ class FastICA(BaseTransformer):
298
300
  inspect.currentframe(), FastICA.__class__.__name__
299
301
  ),
300
302
  api_calls=[Session.call],
301
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
303
+ custom_tags={"autogen": True} if self._autogenerated else None,
302
304
  )
303
305
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
304
306
  pd_df.columns = dataset.columns
@@ -76,8 +76,10 @@ class IncrementalPCA(BaseTransformer):
76
76
  initialization with the `set_input_cols` method.
77
77
 
78
78
  label_cols: Optional[Union[str, List[str]]]
79
- This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
80
-
79
+ A string or list of strings representing column names that contain labels.
80
+ Label columns must be specified with this parameter during initialization
81
+ or with the `set_label_cols` method before fitting.
82
+
81
83
  output_cols: Optional[Union[str, List[str]]]
82
84
  A string or list of strings representing column names that will store the
83
85
  output of predict and transform operations. The length of output_cols must
@@ -250,7 +252,7 @@ class IncrementalPCA(BaseTransformer):
250
252
  inspect.currentframe(), IncrementalPCA.__class__.__name__
251
253
  ),
252
254
  api_calls=[Session.call],
253
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
255
+ custom_tags={"autogen": True} if self._autogenerated else None,
254
256
  )
255
257
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
256
258
  pd_df.columns = dataset.columns
@@ -76,8 +76,10 @@ class KernelPCA(BaseTransformer):
76
76
  initialization with the `set_input_cols` method.
77
77
 
78
78
  label_cols: Optional[Union[str, List[str]]]
79
- This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
80
-
79
+ A string or list of strings representing column names that contain labels.
80
+ Label columns must be specified with this parameter during initialization
81
+ or with the `set_label_cols` method before fitting.
82
+
81
83
  output_cols: Optional[Union[str, List[str]]]
82
84
  A string or list of strings representing column names that will store the
83
85
  output of predict and transform operations. The length of output_cols must
@@ -346,7 +348,7 @@ class KernelPCA(BaseTransformer):
346
348
  inspect.currentframe(), KernelPCA.__class__.__name__
347
349
  ),
348
350
  api_calls=[Session.call],
349
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
351
+ custom_tags={"autogen": True} if self._autogenerated else None,
350
352
  )
351
353
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
352
354
  pd_df.columns = dataset.columns
@@ -76,8 +76,10 @@ class MiniBatchDictionaryLearning(BaseTransformer):
76
76
  initialization with the `set_input_cols` method.
77
77
 
78
78
  label_cols: Optional[Union[str, List[str]]]
79
- This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
80
-
79
+ A string or list of strings representing column names that contain labels.
80
+ Label columns must be specified with this parameter during initialization
81
+ or with the `set_label_cols` method before fitting.
82
+
81
83
  output_cols: Optional[Union[str, List[str]]]
82
84
  A string or list of strings representing column names that will store the
83
85
  output of predict and transform operations. The length of output_cols must
@@ -368,7 +370,7 @@ class MiniBatchDictionaryLearning(BaseTransformer):
368
370
  inspect.currentframe(), MiniBatchDictionaryLearning.__class__.__name__
369
371
  ),
370
372
  api_calls=[Session.call],
371
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
373
+ custom_tags={"autogen": True} if self._autogenerated else None,
372
374
  )
373
375
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
374
376
  pd_df.columns = dataset.columns
@@ -76,8 +76,10 @@ class MiniBatchSparsePCA(BaseTransformer):
76
76
  initialization with the `set_input_cols` method.
77
77
 
78
78
  label_cols: Optional[Union[str, List[str]]]
79
- This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
80
-
79
+ A string or list of strings representing column names that contain labels.
80
+ Label columns must be specified with this parameter during initialization
81
+ or with the `set_label_cols` method before fitting.
82
+
81
83
  output_cols: Optional[Union[str, List[str]]]
82
84
  A string or list of strings representing column names that will store the
83
85
  output of predict and transform operations. The length of output_cols must
@@ -313,7 +315,7 @@ class MiniBatchSparsePCA(BaseTransformer):
313
315
  inspect.currentframe(), MiniBatchSparsePCA.__class__.__name__
314
316
  ),
315
317
  api_calls=[Session.call],
316
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
318
+ custom_tags={"autogen": True} if self._autogenerated else None,
317
319
  )
318
320
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
319
321
  pd_df.columns = dataset.columns
@@ -76,8 +76,10 @@ class PCA(BaseTransformer):
76
76
  initialization with the `set_input_cols` method.
77
77
 
78
78
  label_cols: Optional[Union[str, List[str]]]
79
- This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
80
-
79
+ A string or list of strings representing column names that contain labels.
80
+ Label columns must be specified with this parameter during initialization
81
+ or with the `set_label_cols` method before fitting.
82
+
81
83
  output_cols: Optional[Union[str, List[str]]]
82
84
  A string or list of strings representing column names that will store the
83
85
  output of predict and transform operations. The length of output_cols must
@@ -315,7 +317,7 @@ class PCA(BaseTransformer):
315
317
  inspect.currentframe(), PCA.__class__.__name__
316
318
  ),
317
319
  api_calls=[Session.call],
318
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
320
+ custom_tags={"autogen": True} if self._autogenerated else None,
319
321
  )
320
322
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
321
323
  pd_df.columns = dataset.columns
@@ -76,8 +76,10 @@ class SparsePCA(BaseTransformer):
76
76
  initialization with the `set_input_cols` method.
77
77
 
78
78
  label_cols: Optional[Union[str, List[str]]]
79
- This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
80
-
79
+ A string or list of strings representing column names that contain labels.
80
+ Label columns must be specified with this parameter during initialization
81
+ or with the `set_label_cols` method before fitting.
82
+
81
83
  output_cols: Optional[Union[str, List[str]]]
82
84
  A string or list of strings representing column names that will store the
83
85
  output of predict and transform operations. The length of output_cols must
@@ -288,7 +290,7 @@ class SparsePCA(BaseTransformer):
288
290
  inspect.currentframe(), SparsePCA.__class__.__name__
289
291
  ),
290
292
  api_calls=[Session.call],
291
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
293
+ custom_tags={"autogen": True} if self._autogenerated else None,
292
294
  )
293
295
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
294
296
  pd_df.columns = dataset.columns
@@ -76,8 +76,10 @@ class TruncatedSVD(BaseTransformer):
76
76
  initialization with the `set_input_cols` method.
77
77
 
78
78
  label_cols: Optional[Union[str, List[str]]]
79
- This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
80
-
79
+ A string or list of strings representing column names that contain labels.
80
+ Label columns must be specified with this parameter during initialization
81
+ or with the `set_label_cols` method before fitting.
82
+
81
83
  output_cols: Optional[Union[str, List[str]]]
82
84
  A string or list of strings representing column names that will store the
83
85
  output of predict and transform operations. The length of output_cols must
@@ -269,7 +271,7 @@ class TruncatedSVD(BaseTransformer):
269
271
  inspect.currentframe(), TruncatedSVD.__class__.__name__
270
272
  ),
271
273
  api_calls=[Session.call],
272
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
274
+ custom_tags={"autogen": True} if self._autogenerated else None,
273
275
  )
274
276
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
275
277
  pd_df.columns = dataset.columns
@@ -286,7 +286,7 @@ class LinearDiscriminantAnalysis(BaseTransformer):
286
286
  inspect.currentframe(), LinearDiscriminantAnalysis.__class__.__name__
287
287
  ),
288
288
  api_calls=[Session.call],
289
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
289
+ custom_tags={"autogen": True} if self._autogenerated else None,
290
290
  )
291
291
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
292
292
  pd_df.columns = dataset.columns
@@ -248,7 +248,7 @@ class QuadraticDiscriminantAnalysis(BaseTransformer):
248
248
  inspect.currentframe(), QuadraticDiscriminantAnalysis.__class__.__name__
249
249
  ),
250
250
  api_calls=[Session.call],
251
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
251
+ custom_tags={"autogen": True} if self._autogenerated else None,
252
252
  )
253
253
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
254
254
  pd_df.columns = dataset.columns
@@ -273,7 +273,7 @@ class AdaBoostClassifier(BaseTransformer):
273
273
  inspect.currentframe(), AdaBoostClassifier.__class__.__name__
274
274
  ),
275
275
  api_calls=[Session.call],
276
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
276
+ custom_tags={"autogen": True} if self._autogenerated else None,
277
277
  )
278
278
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
279
279
  pd_df.columns = dataset.columns
@@ -270,7 +270,7 @@ class AdaBoostRegressor(BaseTransformer):
270
270
  inspect.currentframe(), AdaBoostRegressor.__class__.__name__
271
271
  ),
272
272
  api_calls=[Session.call],
273
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
273
+ custom_tags={"autogen": True} if self._autogenerated else None,
274
274
  )
275
275
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
276
276
  pd_df.columns = dataset.columns
@@ -305,7 +305,7 @@ class BaggingClassifier(BaseTransformer):
305
305
  inspect.currentframe(), BaggingClassifier.__class__.__name__
306
306
  ),
307
307
  api_calls=[Session.call],
308
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
308
+ custom_tags={"autogen": True} if self._autogenerated else None,
309
309
  )
310
310
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
311
311
  pd_df.columns = dataset.columns
@@ -305,7 +305,7 @@ class BaggingRegressor(BaseTransformer):
305
305
  inspect.currentframe(), BaggingRegressor.__class__.__name__
306
306
  ),
307
307
  api_calls=[Session.call],
308
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
308
+ custom_tags={"autogen": True} if self._autogenerated else None,
309
309
  )
310
310
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
311
311
  pd_df.columns = dataset.columns
@@ -408,7 +408,7 @@ class ExtraTreesClassifier(BaseTransformer):
408
408
  inspect.currentframe(), ExtraTreesClassifier.__class__.__name__
409
409
  ),
410
410
  api_calls=[Session.call],
411
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
411
+ custom_tags={"autogen": True} if self._autogenerated else None,
412
412
  )
413
413
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
414
414
  pd_df.columns = dataset.columns
@@ -387,7 +387,7 @@ class ExtraTreesRegressor(BaseTransformer):
387
387
  inspect.currentframe(), ExtraTreesRegressor.__class__.__name__
388
388
  ),
389
389
  api_calls=[Session.call],
390
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
390
+ custom_tags={"autogen": True} if self._autogenerated else None,
391
391
  )
392
392
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
393
393
  pd_df.columns = dataset.columns
@@ -420,7 +420,7 @@ class GradientBoostingClassifier(BaseTransformer):
420
420
  inspect.currentframe(), GradientBoostingClassifier.__class__.__name__
421
421
  ),
422
422
  api_calls=[Session.call],
423
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
423
+ custom_tags={"autogen": True} if self._autogenerated else None,
424
424
  )
425
425
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
426
426
  pd_df.columns = dataset.columns
@@ -429,7 +429,7 @@ class GradientBoostingRegressor(BaseTransformer):
429
429
  inspect.currentframe(), GradientBoostingRegressor.__class__.__name__
430
430
  ),
431
431
  api_calls=[Session.call],
432
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
432
+ custom_tags={"autogen": True} if self._autogenerated else None,
433
433
  )
434
434
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
435
435
  pd_df.columns = dataset.columns
@@ -401,7 +401,7 @@ class HistGradientBoostingClassifier(BaseTransformer):
401
401
  inspect.currentframe(), HistGradientBoostingClassifier.__class__.__name__
402
402
  ),
403
403
  api_calls=[Session.call],
404
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
404
+ custom_tags={"autogen": True} if self._autogenerated else None,
405
405
  )
406
406
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
407
407
  pd_df.columns = dataset.columns
@@ -392,7 +392,7 @@ class HistGradientBoostingRegressor(BaseTransformer):
392
392
  inspect.currentframe(), HistGradientBoostingRegressor.__class__.__name__
393
393
  ),
394
394
  api_calls=[Session.call],
395
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
395
+ custom_tags={"autogen": True} if self._autogenerated else None,
396
396
  )
397
397
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
398
398
  pd_df.columns = dataset.columns
@@ -76,8 +76,10 @@ class IsolationForest(BaseTransformer):
76
76
  initialization with the `set_input_cols` method.
77
77
 
78
78
  label_cols: Optional[Union[str, List[str]]]
79
- This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
80
-
79
+ A string or list of strings representing column names that contain labels.
80
+ Label columns must be specified with this parameter during initialization
81
+ or with the `set_label_cols` method before fitting.
82
+
81
83
  output_cols: Optional[Union[str, List[str]]]
82
84
  A string or list of strings representing column names that will store the
83
85
  output of predict and transform operations. The length of output_cols must
@@ -292,7 +294,7 @@ class IsolationForest(BaseTransformer):
292
294
  inspect.currentframe(), IsolationForest.__class__.__name__
293
295
  ),
294
296
  api_calls=[Session.call],
295
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
297
+ custom_tags={"autogen": True} if self._autogenerated else None,
296
298
  )
297
299
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
298
300
  pd_df.columns = dataset.columns
@@ -404,7 +404,7 @@ class RandomForestClassifier(BaseTransformer):
404
404
  inspect.currentframe(), RandomForestClassifier.__class__.__name__
405
405
  ),
406
406
  api_calls=[Session.call],
407
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
407
+ custom_tags={"autogen": True} if self._autogenerated else None,
408
408
  )
409
409
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
410
410
  pd_df.columns = dataset.columns
@@ -383,7 +383,7 @@ class RandomForestRegressor(BaseTransformer):
383
383
  inspect.currentframe(), RandomForestRegressor.__class__.__name__
384
384
  ),
385
385
  api_calls=[Session.call],
386
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
386
+ custom_tags={"autogen": True} if self._autogenerated else None,
387
387
  )
388
388
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
389
389
  pd_df.columns = dataset.columns
@@ -284,7 +284,7 @@ class StackingRegressor(BaseTransformer):
284
284
  inspect.currentframe(), StackingRegressor.__class__.__name__
285
285
  ),
286
286
  api_calls=[Session.call],
287
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
287
+ custom_tags={"autogen": True} if self._autogenerated else None,
288
288
  )
289
289
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
290
290
  pd_df.columns = dataset.columns
@@ -266,7 +266,7 @@ class VotingClassifier(BaseTransformer):
266
266
  inspect.currentframe(), VotingClassifier.__class__.__name__
267
267
  ),
268
268
  api_calls=[Session.call],
269
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
269
+ custom_tags={"autogen": True} if self._autogenerated else None,
270
270
  )
271
271
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
272
272
  pd_df.columns = dataset.columns
@@ -248,7 +248,7 @@ class VotingRegressor(BaseTransformer):
248
248
  inspect.currentframe(), VotingRegressor.__class__.__name__
249
249
  ),
250
250
  api_calls=[Session.call],
251
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
251
+ custom_tags={"autogen": True} if self._autogenerated else None,
252
252
  )
253
253
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
254
254
  pd_df.columns = dataset.columns
@@ -238,7 +238,7 @@ class GenericUnivariateSelect(BaseTransformer):
238
238
  inspect.currentframe(), GenericUnivariateSelect.__class__.__name__
239
239
  ),
240
240
  api_calls=[Session.call],
241
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
241
+ custom_tags={"autogen": True} if self._autogenerated else None,
242
242
  )
243
243
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
244
244
  pd_df.columns = dataset.columns
@@ -234,7 +234,7 @@ class SelectFdr(BaseTransformer):
234
234
  inspect.currentframe(), SelectFdr.__class__.__name__
235
235
  ),
236
236
  api_calls=[Session.call],
237
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
237
+ custom_tags={"autogen": True} if self._autogenerated else None,
238
238
  )
239
239
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
240
240
  pd_df.columns = dataset.columns
@@ -234,7 +234,7 @@ class SelectFpr(BaseTransformer):
234
234
  inspect.currentframe(), SelectFpr.__class__.__name__
235
235
  ),
236
236
  api_calls=[Session.call],
237
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
237
+ custom_tags={"autogen": True} if self._autogenerated else None,
238
238
  )
239
239
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
240
240
  pd_df.columns = dataset.columns
@@ -234,7 +234,7 @@ class SelectFwe(BaseTransformer):
234
234
  inspect.currentframe(), SelectFwe.__class__.__name__
235
235
  ),
236
236
  api_calls=[Session.call],
237
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
237
+ custom_tags={"autogen": True} if self._autogenerated else None,
238
238
  )
239
239
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
240
240
  pd_df.columns = dataset.columns
@@ -235,7 +235,7 @@ class SelectKBest(BaseTransformer):
235
235
  inspect.currentframe(), SelectKBest.__class__.__name__
236
236
  ),
237
237
  api_calls=[Session.call],
238
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
238
+ custom_tags={"autogen": True} if self._autogenerated else None,
239
239
  )
240
240
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
241
241
  pd_df.columns = dataset.columns
@@ -234,7 +234,7 @@ class SelectPercentile(BaseTransformer):
234
234
  inspect.currentframe(), SelectPercentile.__class__.__name__
235
235
  ),
236
236
  api_calls=[Session.call],
237
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
237
+ custom_tags={"autogen": True} if self._autogenerated else None,
238
238
  )
239
239
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
240
240
  pd_df.columns = dataset.columns
@@ -76,8 +76,10 @@ class SequentialFeatureSelector(BaseTransformer):
76
76
  initialization with the `set_input_cols` method.
77
77
 
78
78
  label_cols: Optional[Union[str, List[str]]]
79
- This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
80
-
79
+ A string or list of strings representing column names that contain labels.
80
+ Label columns must be specified with this parameter during initialization
81
+ or with the `set_label_cols` method before fitting.
82
+
81
83
  output_cols: Optional[Union[str, List[str]]]
82
84
  A string or list of strings representing column names that will store the
83
85
  output of predict and transform operations. The length of output_cols must
@@ -292,7 +294,7 @@ class SequentialFeatureSelector(BaseTransformer):
292
294
  inspect.currentframe(), SequentialFeatureSelector.__class__.__name__
293
295
  ),
294
296
  api_calls=[Session.call],
295
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
297
+ custom_tags={"autogen": True} if self._autogenerated else None,
296
298
  )
297
299
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
298
300
  pd_df.columns = dataset.columns
@@ -76,8 +76,10 @@ class VarianceThreshold(BaseTransformer):
76
76
  initialization with the `set_input_cols` method.
77
77
 
78
78
  label_cols: Optional[Union[str, List[str]]]
79
- This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
80
-
79
+ A string or list of strings representing column names that contain labels.
80
+ Label columns must be specified with this parameter during initialization
81
+ or with the `set_label_cols` method before fitting.
82
+
81
83
  output_cols: Optional[Union[str, List[str]]]
82
84
  A string or list of strings representing column names that will store the
83
85
  output of predict and transform operations. The length of output_cols must
@@ -225,7 +227,7 @@ class VarianceThreshold(BaseTransformer):
225
227
  inspect.currentframe(), VarianceThreshold.__class__.__name__
226
228
  ),
227
229
  api_calls=[Session.call],
228
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
230
+ custom_tags={"autogen": True} if self._autogenerated else None,
229
231
  )
230
232
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
231
233
  pd_df.columns = dataset.columns
@@ -16,7 +16,7 @@ from snowflake.ml._internal.exceptions import (
16
16
  exceptions,
17
17
  modeling_error_messages,
18
18
  )
19
- from snowflake.ml._internal.lineage import data_source, lineage_utils
19
+ from snowflake.ml._internal.lineage import lineage_utils
20
20
  from snowflake.ml._internal.utils import identifier, parallelize
21
21
  from snowflake.ml.modeling.framework import _utils
22
22
  from snowflake.snowpark import functions as F
@@ -386,7 +386,6 @@ class BaseEstimator(Base):
386
386
  self.file_names = file_names
387
387
  self.custom_states = custom_states
388
388
  self.sample_weight_col = sample_weight_col
389
- self._data_sources: Optional[List[data_source.DataSource]] = None
390
389
 
391
390
  self.start_time = datetime.now().strftime(_utils.DATETIME_FORMAT)[:-3]
392
391
 
@@ -421,18 +420,14 @@ class BaseEstimator(Base):
421
420
  """
422
421
  return []
423
422
 
424
- def _get_data_sources(self) -> Optional[List[data_source.DataSource]]:
425
- return self._data_sources
426
-
427
423
  @telemetry.send_api_usage_telemetry(
428
424
  project=PROJECT,
429
425
  subproject=SUBPROJECT,
430
426
  )
431
427
  def fit(self, dataset: Union[snowpark.DataFrame, pd.DataFrame]) -> "BaseEstimator":
432
428
  """Runs universal logics for all fit implementations."""
433
- self._data_sources = getattr(dataset, lineage_utils.DATA_SOURCES_ATTR, None)
434
- if self._data_sources:
435
- assert all(isinstance(ds, data_source.DataSource) for ds in self._data_sources)
429
+ data_sources = lineage_utils.get_data_sources(dataset)
430
+ lineage_utils.set_data_sources(self, data_sources)
436
431
  return self._fit(dataset)
437
432
 
438
433
  @abstractmethod
@@ -320,7 +320,7 @@ class GaussianProcessClassifier(BaseTransformer):
320
320
  inspect.currentframe(), GaussianProcessClassifier.__class__.__name__
321
321
  ),
322
322
  api_calls=[Session.call],
323
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
323
+ custom_tags={"autogen": True} if self._autogenerated else None,
324
324
  )
325
325
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
326
326
  pd_df.columns = dataset.columns