snowflake-ml-python 1.5.2__py3-none-any.whl → 1.5.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (196) hide show
  1. snowflake/cortex/_complete.py +26 -5
  2. snowflake/cortex/_sse_client.py +81 -0
  3. snowflake/cortex/_util.py +105 -8
  4. snowflake/ml/_internal/lineage/lineage_utils.py +34 -25
  5. snowflake/ml/dataset/dataset.py +15 -12
  6. snowflake/ml/dataset/dataset_factory.py +3 -4
  7. snowflake/ml/feature_store/feature_store.py +2 -2
  8. snowflake/ml/model/_client/sql/model_version.py +2 -2
  9. snowflake/ml/model/_model_composer/model_composer.py +2 -2
  10. snowflake/ml/model/_model_composer/model_method/infer_table_function.py_template +3 -1
  11. snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
  12. snowflake/ml/model/_packager/model_runtime/_snowml_inference_alternative_requirements.py +1 -1
  13. snowflake/ml/model/_signatures/builtins_handler.py +2 -1
  14. snowflake/ml/model/_signatures/core.py +13 -1
  15. snowflake/ml/model/_signatures/pandas_handler.py +2 -0
  16. snowflake/ml/model/_signatures/snowpark_handler.py +3 -3
  17. snowflake/ml/model/model_signature.py +2 -0
  18. snowflake/ml/model/type_hints.py +1 -0
  19. snowflake/ml/modeling/_internal/estimator_utils.py +58 -1
  20. snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +156 -121
  21. snowflake/ml/modeling/_internal/snowpark_implementations/distributed_search_udf_file.py +2 -0
  22. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +38 -18
  23. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +82 -134
  24. snowflake/ml/modeling/_internal/snowpark_implementations/xgboost_external_memory_trainer.py +21 -17
  25. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +1 -1
  26. snowflake/ml/modeling/cluster/affinity_propagation.py +1 -1
  27. snowflake/ml/modeling/cluster/agglomerative_clustering.py +1 -1
  28. snowflake/ml/modeling/cluster/birch.py +1 -1
  29. snowflake/ml/modeling/cluster/bisecting_k_means.py +1 -1
  30. snowflake/ml/modeling/cluster/dbscan.py +1 -1
  31. snowflake/ml/modeling/cluster/feature_agglomeration.py +1 -1
  32. snowflake/ml/modeling/cluster/k_means.py +1 -1
  33. snowflake/ml/modeling/cluster/mean_shift.py +1 -1
  34. snowflake/ml/modeling/cluster/mini_batch_k_means.py +1 -1
  35. snowflake/ml/modeling/cluster/optics.py +1 -1
  36. snowflake/ml/modeling/cluster/spectral_biclustering.py +1 -1
  37. snowflake/ml/modeling/cluster/spectral_clustering.py +1 -1
  38. snowflake/ml/modeling/cluster/spectral_coclustering.py +1 -1
  39. snowflake/ml/modeling/compose/column_transformer.py +1 -1
  40. snowflake/ml/modeling/compose/transformed_target_regressor.py +1 -1
  41. snowflake/ml/modeling/covariance/elliptic_envelope.py +1 -1
  42. snowflake/ml/modeling/covariance/empirical_covariance.py +1 -1
  43. snowflake/ml/modeling/covariance/graphical_lasso.py +1 -1
  44. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +1 -1
  45. snowflake/ml/modeling/covariance/ledoit_wolf.py +1 -1
  46. snowflake/ml/modeling/covariance/min_cov_det.py +1 -1
  47. snowflake/ml/modeling/covariance/oas.py +1 -1
  48. snowflake/ml/modeling/covariance/shrunk_covariance.py +1 -1
  49. snowflake/ml/modeling/decomposition/dictionary_learning.py +1 -1
  50. snowflake/ml/modeling/decomposition/factor_analysis.py +1 -1
  51. snowflake/ml/modeling/decomposition/fast_ica.py +1 -1
  52. snowflake/ml/modeling/decomposition/incremental_pca.py +1 -1
  53. snowflake/ml/modeling/decomposition/kernel_pca.py +1 -1
  54. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +1 -1
  55. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +1 -1
  56. snowflake/ml/modeling/decomposition/pca.py +1 -1
  57. snowflake/ml/modeling/decomposition/sparse_pca.py +1 -1
  58. snowflake/ml/modeling/decomposition/truncated_svd.py +1 -1
  59. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +1 -1
  60. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +1 -1
  61. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +1 -1
  62. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +1 -1
  63. snowflake/ml/modeling/ensemble/bagging_classifier.py +1 -1
  64. snowflake/ml/modeling/ensemble/bagging_regressor.py +1 -1
  65. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +1 -1
  66. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +1 -1
  67. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +1 -1
  68. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +1 -1
  69. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +1 -1
  70. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +1 -1
  71. snowflake/ml/modeling/ensemble/isolation_forest.py +1 -1
  72. snowflake/ml/modeling/ensemble/random_forest_classifier.py +1 -1
  73. snowflake/ml/modeling/ensemble/random_forest_regressor.py +1 -1
  74. snowflake/ml/modeling/ensemble/stacking_regressor.py +1 -1
  75. snowflake/ml/modeling/ensemble/voting_classifier.py +1 -1
  76. snowflake/ml/modeling/ensemble/voting_regressor.py +1 -1
  77. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +1 -1
  78. snowflake/ml/modeling/feature_selection/select_fdr.py +1 -1
  79. snowflake/ml/modeling/feature_selection/select_fpr.py +1 -1
  80. snowflake/ml/modeling/feature_selection/select_fwe.py +1 -1
  81. snowflake/ml/modeling/feature_selection/select_k_best.py +1 -1
  82. snowflake/ml/modeling/feature_selection/select_percentile.py +1 -1
  83. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +1 -1
  84. snowflake/ml/modeling/feature_selection/variance_threshold.py +1 -1
  85. snowflake/ml/modeling/framework/base.py +3 -8
  86. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +1 -1
  87. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +1 -1
  88. snowflake/ml/modeling/impute/iterative_imputer.py +1 -1
  89. snowflake/ml/modeling/impute/knn_imputer.py +1 -1
  90. snowflake/ml/modeling/impute/missing_indicator.py +1 -1
  91. snowflake/ml/modeling/impute/simple_imputer.py +8 -4
  92. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +1 -1
  93. snowflake/ml/modeling/kernel_approximation/nystroem.py +1 -1
  94. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +1 -1
  95. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +1 -1
  96. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +1 -1
  97. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +1 -1
  98. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +1 -1
  99. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +1 -1
  100. snowflake/ml/modeling/linear_model/ard_regression.py +1 -1
  101. snowflake/ml/modeling/linear_model/bayesian_ridge.py +1 -1
  102. snowflake/ml/modeling/linear_model/elastic_net.py +1 -1
  103. snowflake/ml/modeling/linear_model/elastic_net_cv.py +1 -1
  104. snowflake/ml/modeling/linear_model/gamma_regressor.py +1 -1
  105. snowflake/ml/modeling/linear_model/huber_regressor.py +1 -1
  106. snowflake/ml/modeling/linear_model/lars.py +1 -1
  107. snowflake/ml/modeling/linear_model/lars_cv.py +1 -1
  108. snowflake/ml/modeling/linear_model/lasso.py +1 -1
  109. snowflake/ml/modeling/linear_model/lasso_cv.py +1 -1
  110. snowflake/ml/modeling/linear_model/lasso_lars.py +1 -1
  111. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +1 -1
  112. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +1 -1
  113. snowflake/ml/modeling/linear_model/linear_regression.py +1 -1
  114. snowflake/ml/modeling/linear_model/logistic_regression.py +1 -1
  115. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +1 -1
  116. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +1 -1
  117. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +1 -1
  118. snowflake/ml/modeling/linear_model/multi_task_lasso.py +1 -1
  119. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +1 -1
  120. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +1 -1
  121. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +1 -1
  122. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +1 -1
  123. snowflake/ml/modeling/linear_model/perceptron.py +1 -1
  124. snowflake/ml/modeling/linear_model/poisson_regressor.py +1 -1
  125. snowflake/ml/modeling/linear_model/ransac_regressor.py +1 -1
  126. snowflake/ml/modeling/linear_model/ridge.py +1 -1
  127. snowflake/ml/modeling/linear_model/ridge_classifier.py +1 -1
  128. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +1 -1
  129. snowflake/ml/modeling/linear_model/ridge_cv.py +1 -1
  130. snowflake/ml/modeling/linear_model/sgd_classifier.py +1 -1
  131. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +1 -1
  132. snowflake/ml/modeling/linear_model/sgd_regressor.py +1 -1
  133. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +1 -1
  134. snowflake/ml/modeling/linear_model/tweedie_regressor.py +1 -1
  135. snowflake/ml/modeling/manifold/isomap.py +1 -1
  136. snowflake/ml/modeling/manifold/mds.py +1 -1
  137. snowflake/ml/modeling/manifold/spectral_embedding.py +1 -1
  138. snowflake/ml/modeling/manifold/tsne.py +1 -1
  139. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +1 -1
  140. snowflake/ml/modeling/mixture/gaussian_mixture.py +1 -1
  141. snowflake/ml/modeling/model_selection/grid_search_cv.py +1 -5
  142. snowflake/ml/modeling/model_selection/randomized_search_cv.py +1 -5
  143. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +1 -1
  144. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +1 -1
  145. snowflake/ml/modeling/multiclass/output_code_classifier.py +1 -1
  146. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +1 -1
  147. snowflake/ml/modeling/naive_bayes/categorical_nb.py +1 -1
  148. snowflake/ml/modeling/naive_bayes/complement_nb.py +1 -1
  149. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +1 -1
  150. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +1 -1
  151. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +1 -1
  152. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +1 -1
  153. snowflake/ml/modeling/neighbors/kernel_density.py +1 -1
  154. snowflake/ml/modeling/neighbors/local_outlier_factor.py +1 -1
  155. snowflake/ml/modeling/neighbors/nearest_centroid.py +1 -1
  156. snowflake/ml/modeling/neighbors/nearest_neighbors.py +1 -1
  157. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +1 -1
  158. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +1 -1
  159. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +1 -1
  160. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +1 -1
  161. snowflake/ml/modeling/neural_network/mlp_classifier.py +1 -1
  162. snowflake/ml/modeling/neural_network/mlp_regressor.py +1 -1
  163. snowflake/ml/modeling/pipeline/pipeline.py +5 -0
  164. snowflake/ml/modeling/preprocessing/binarizer.py +7 -3
  165. snowflake/ml/modeling/preprocessing/k_bins_discretizer.py +7 -2
  166. snowflake/ml/modeling/preprocessing/label_encoder.py +8 -7
  167. snowflake/ml/modeling/preprocessing/max_abs_scaler.py +7 -3
  168. snowflake/ml/modeling/preprocessing/min_max_scaler.py +7 -4
  169. snowflake/ml/modeling/preprocessing/normalizer.py +7 -3
  170. snowflake/ml/modeling/preprocessing/one_hot_encoder.py +10 -2
  171. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +8 -5
  172. snowflake/ml/modeling/preprocessing/polynomial_features.py +1 -1
  173. snowflake/ml/modeling/preprocessing/robust_scaler.py +7 -4
  174. snowflake/ml/modeling/preprocessing/standard_scaler.py +7 -3
  175. snowflake/ml/modeling/semi_supervised/label_propagation.py +1 -1
  176. snowflake/ml/modeling/semi_supervised/label_spreading.py +1 -1
  177. snowflake/ml/modeling/svm/linear_svc.py +1 -1
  178. snowflake/ml/modeling/svm/linear_svr.py +1 -1
  179. snowflake/ml/modeling/svm/nu_svc.py +1 -1
  180. snowflake/ml/modeling/svm/nu_svr.py +1 -1
  181. snowflake/ml/modeling/svm/svc.py +1 -1
  182. snowflake/ml/modeling/svm/svr.py +1 -1
  183. snowflake/ml/modeling/tree/decision_tree_classifier.py +1 -1
  184. snowflake/ml/modeling/tree/decision_tree_regressor.py +1 -1
  185. snowflake/ml/modeling/tree/extra_tree_classifier.py +1 -1
  186. snowflake/ml/modeling/tree/extra_tree_regressor.py +1 -1
  187. snowflake/ml/modeling/xgboost/xgb_classifier.py +1 -1
  188. snowflake/ml/modeling/xgboost/xgb_regressor.py +1 -1
  189. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +1 -1
  190. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +1 -1
  191. snowflake/ml/version.py +1 -1
  192. {snowflake_ml_python-1.5.2.dist-info → snowflake_ml_python-1.5.3.dist-info}/METADATA +21 -5
  193. {snowflake_ml_python-1.5.2.dist-info → snowflake_ml_python-1.5.3.dist-info}/RECORD +196 -195
  194. {snowflake_ml_python-1.5.2.dist-info → snowflake_ml_python-1.5.3.dist-info}/LICENSE.txt +0 -0
  195. {snowflake_ml_python-1.5.2.dist-info → snowflake_ml_python-1.5.3.dist-info}/WHEEL +0 -0
  196. {snowflake_ml_python-1.5.2.dist-info → snowflake_ml_python-1.5.3.dist-info}/top_level.txt +0 -0
@@ -284,7 +284,7 @@ class StackingRegressor(BaseTransformer):
284
284
  inspect.currentframe(), StackingRegressor.__class__.__name__
285
285
  ),
286
286
  api_calls=[Session.call],
287
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
287
+ custom_tags={"autogen": True} if self._autogenerated else None,
288
288
  )
289
289
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
290
290
  pd_df.columns = dataset.columns
@@ -266,7 +266,7 @@ class VotingClassifier(BaseTransformer):
266
266
  inspect.currentframe(), VotingClassifier.__class__.__name__
267
267
  ),
268
268
  api_calls=[Session.call],
269
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
269
+ custom_tags={"autogen": True} if self._autogenerated else None,
270
270
  )
271
271
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
272
272
  pd_df.columns = dataset.columns
@@ -248,7 +248,7 @@ class VotingRegressor(BaseTransformer):
248
248
  inspect.currentframe(), VotingRegressor.__class__.__name__
249
249
  ),
250
250
  api_calls=[Session.call],
251
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
251
+ custom_tags={"autogen": True} if self._autogenerated else None,
252
252
  )
253
253
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
254
254
  pd_df.columns = dataset.columns
@@ -238,7 +238,7 @@ class GenericUnivariateSelect(BaseTransformer):
238
238
  inspect.currentframe(), GenericUnivariateSelect.__class__.__name__
239
239
  ),
240
240
  api_calls=[Session.call],
241
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
241
+ custom_tags={"autogen": True} if self._autogenerated else None,
242
242
  )
243
243
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
244
244
  pd_df.columns = dataset.columns
@@ -234,7 +234,7 @@ class SelectFdr(BaseTransformer):
234
234
  inspect.currentframe(), SelectFdr.__class__.__name__
235
235
  ),
236
236
  api_calls=[Session.call],
237
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
237
+ custom_tags={"autogen": True} if self._autogenerated else None,
238
238
  )
239
239
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
240
240
  pd_df.columns = dataset.columns
@@ -234,7 +234,7 @@ class SelectFpr(BaseTransformer):
234
234
  inspect.currentframe(), SelectFpr.__class__.__name__
235
235
  ),
236
236
  api_calls=[Session.call],
237
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
237
+ custom_tags={"autogen": True} if self._autogenerated else None,
238
238
  )
239
239
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
240
240
  pd_df.columns = dataset.columns
@@ -234,7 +234,7 @@ class SelectFwe(BaseTransformer):
234
234
  inspect.currentframe(), SelectFwe.__class__.__name__
235
235
  ),
236
236
  api_calls=[Session.call],
237
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
237
+ custom_tags={"autogen": True} if self._autogenerated else None,
238
238
  )
239
239
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
240
240
  pd_df.columns = dataset.columns
@@ -235,7 +235,7 @@ class SelectKBest(BaseTransformer):
235
235
  inspect.currentframe(), SelectKBest.__class__.__name__
236
236
  ),
237
237
  api_calls=[Session.call],
238
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
238
+ custom_tags={"autogen": True} if self._autogenerated else None,
239
239
  )
240
240
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
241
241
  pd_df.columns = dataset.columns
@@ -234,7 +234,7 @@ class SelectPercentile(BaseTransformer):
234
234
  inspect.currentframe(), SelectPercentile.__class__.__name__
235
235
  ),
236
236
  api_calls=[Session.call],
237
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
237
+ custom_tags={"autogen": True} if self._autogenerated else None,
238
238
  )
239
239
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
240
240
  pd_df.columns = dataset.columns
@@ -292,7 +292,7 @@ class SequentialFeatureSelector(BaseTransformer):
292
292
  inspect.currentframe(), SequentialFeatureSelector.__class__.__name__
293
293
  ),
294
294
  api_calls=[Session.call],
295
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
295
+ custom_tags={"autogen": True} if self._autogenerated else None,
296
296
  )
297
297
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
298
298
  pd_df.columns = dataset.columns
@@ -225,7 +225,7 @@ class VarianceThreshold(BaseTransformer):
225
225
  inspect.currentframe(), VarianceThreshold.__class__.__name__
226
226
  ),
227
227
  api_calls=[Session.call],
228
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
228
+ custom_tags={"autogen": True} if self._autogenerated else None,
229
229
  )
230
230
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
231
231
  pd_df.columns = dataset.columns
@@ -16,7 +16,7 @@ from snowflake.ml._internal.exceptions import (
16
16
  exceptions,
17
17
  modeling_error_messages,
18
18
  )
19
- from snowflake.ml._internal.lineage import data_source, lineage_utils
19
+ from snowflake.ml._internal.lineage import lineage_utils
20
20
  from snowflake.ml._internal.utils import identifier, parallelize
21
21
  from snowflake.ml.modeling.framework import _utils
22
22
  from snowflake.snowpark import functions as F
@@ -386,7 +386,6 @@ class BaseEstimator(Base):
386
386
  self.file_names = file_names
387
387
  self.custom_states = custom_states
388
388
  self.sample_weight_col = sample_weight_col
389
- self._data_sources: Optional[List[data_source.DataSource]] = None
390
389
 
391
390
  self.start_time = datetime.now().strftime(_utils.DATETIME_FORMAT)[:-3]
392
391
 
@@ -421,18 +420,14 @@ class BaseEstimator(Base):
421
420
  """
422
421
  return []
423
422
 
424
- def _get_data_sources(self) -> Optional[List[data_source.DataSource]]:
425
- return self._data_sources
426
-
427
423
  @telemetry.send_api_usage_telemetry(
428
424
  project=PROJECT,
429
425
  subproject=SUBPROJECT,
430
426
  )
431
427
  def fit(self, dataset: Union[snowpark.DataFrame, pd.DataFrame]) -> "BaseEstimator":
432
428
  """Runs universal logics for all fit implementations."""
433
- self._data_sources = getattr(dataset, lineage_utils.DATA_SOURCES_ATTR, None)
434
- if self._data_sources:
435
- assert all(isinstance(ds, data_source.DataSource) for ds in self._data_sources)
429
+ data_sources = lineage_utils.get_data_sources(dataset)
430
+ lineage_utils.set_data_sources(self, data_sources)
436
431
  return self._fit(dataset)
437
432
 
438
433
  @abstractmethod
@@ -320,7 +320,7 @@ class GaussianProcessClassifier(BaseTransformer):
320
320
  inspect.currentframe(), GaussianProcessClassifier.__class__.__name__
321
321
  ),
322
322
  api_calls=[Session.call],
323
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
323
+ custom_tags={"autogen": True} if self._autogenerated else None,
324
324
  )
325
325
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
326
326
  pd_df.columns = dataset.columns
@@ -311,7 +311,7 @@ class GaussianProcessRegressor(BaseTransformer):
311
311
  inspect.currentframe(), GaussianProcessRegressor.__class__.__name__
312
312
  ),
313
313
  api_calls=[Session.call],
314
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
314
+ custom_tags={"autogen": True} if self._autogenerated else None,
315
315
  )
316
316
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
317
317
  pd_df.columns = dataset.columns
@@ -353,7 +353,7 @@ class IterativeImputer(BaseTransformer):
353
353
  inspect.currentframe(), IterativeImputer.__class__.__name__
354
354
  ),
355
355
  api_calls=[Session.call],
356
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
356
+ custom_tags={"autogen": True} if self._autogenerated else None,
357
357
  )
358
358
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
359
359
  pd_df.columns = dataset.columns
@@ -279,7 +279,7 @@ class KNNImputer(BaseTransformer):
279
279
  inspect.currentframe(), KNNImputer.__class__.__name__
280
280
  ),
281
281
  api_calls=[Session.call],
282
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
282
+ custom_tags={"autogen": True} if self._autogenerated else None,
283
283
  )
284
284
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
285
285
  pd_df.columns = dataset.columns
@@ -253,7 +253,7 @@ class MissingIndicator(BaseTransformer):
253
253
  inspect.currentframe(), MissingIndicator.__class__.__name__
254
254
  ),
255
255
  api_calls=[Session.call],
256
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
256
+ custom_tags={"autogen": True} if self._autogenerated else None,
257
257
  )
258
258
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
259
259
  pd_df.columns = dataset.columns
@@ -102,10 +102,14 @@ class SimpleImputer(base.BaseTransformer):
102
102
  For string or object data types, `fill_value` must be a string. If `None`, `fill_value` will be 0 when
103
103
  imputing numerical data and `missing_value` for strings and object data types.
104
104
  input_cols: Optional[Union[str, List[str]]]
105
- Columns to use as inputs during fit and transform.
105
+ The name(s) of one or more columns in the input DataFrame containing feature(s) to be imputed. Input
106
+ columns must be specified before fit with this argument or after initialization with the
107
+ `set_input_cols` method. This argument is optional for API consistency.
106
108
  output_cols: Optional[Union[str, List[str]]]
107
- A string or list of strings representing column names that will store the output of transform operation.
108
- The length of `output_cols` must equal the length of `input_cols`.
109
+ The name(s) to assign output columns in the output DataFrame. The number of
110
+ output columns specified must equal the number of input columns. Output columns must be specified before
111
+ transform with this argument or after initialization with the `set_output_cols` method. This argument is
112
+ optional for API consistency.
109
113
  passthrough_cols: A string or a list of strings indicating column names to be excluded from any
110
114
  operations (such as train, transform, or inference). These specified column(s)
111
115
  will remain untouched throughout the process. This option is helpful in scenarios
@@ -230,7 +234,7 @@ class SimpleImputer(base.BaseTransformer):
230
234
 
231
235
  return input_col_datatypes
232
236
 
233
- def fit(self, dataset: Union[snowpark.DataFrame, pd.DataFrame]) -> "SimpleImputer":
237
+ def _fit(self, dataset: Union[snowpark.DataFrame, pd.DataFrame]) -> "SimpleImputer":
234
238
  if isinstance(dataset, snowpark.DataFrame):
235
239
  return self._fit_snowpark(dataset)
236
240
  else:
@@ -228,7 +228,7 @@ class AdditiveChi2Sampler(BaseTransformer):
228
228
  inspect.currentframe(), AdditiveChi2Sampler.__class__.__name__
229
229
  ),
230
230
  api_calls=[Session.call],
231
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
231
+ custom_tags={"autogen": True} if self._autogenerated else None,
232
232
  )
233
233
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
234
234
  pd_df.columns = dataset.columns
@@ -276,7 +276,7 @@ class Nystroem(BaseTransformer):
276
276
  inspect.currentframe(), Nystroem.__class__.__name__
277
277
  ),
278
278
  api_calls=[Session.call],
279
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
279
+ custom_tags={"autogen": True} if self._autogenerated else None,
280
280
  )
281
281
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
282
282
  pd_df.columns = dataset.columns
@@ -252,7 +252,7 @@ class PolynomialCountSketch(BaseTransformer):
252
252
  inspect.currentframe(), PolynomialCountSketch.__class__.__name__
253
253
  ),
254
254
  api_calls=[Session.call],
255
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
255
+ custom_tags={"autogen": True} if self._autogenerated else None,
256
256
  )
257
257
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
258
258
  pd_df.columns = dataset.columns
@@ -239,7 +239,7 @@ class RBFSampler(BaseTransformer):
239
239
  inspect.currentframe(), RBFSampler.__class__.__name__
240
240
  ),
241
241
  api_calls=[Session.call],
242
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
242
+ custom_tags={"autogen": True} if self._autogenerated else None,
243
243
  )
244
244
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
245
245
  pd_df.columns = dataset.columns
@@ -237,7 +237,7 @@ class SkewedChi2Sampler(BaseTransformer):
237
237
  inspect.currentframe(), SkewedChi2Sampler.__class__.__name__
238
238
  ),
239
239
  api_calls=[Session.call],
240
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
240
+ custom_tags={"autogen": True} if self._autogenerated else None,
241
241
  )
242
242
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
243
243
  pd_df.columns = dataset.columns
@@ -273,7 +273,7 @@ class KernelRidge(BaseTransformer):
273
273
  inspect.currentframe(), KernelRidge.__class__.__name__
274
274
  ),
275
275
  api_calls=[Session.call],
276
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
276
+ custom_tags={"autogen": True} if self._autogenerated else None,
277
277
  )
278
278
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
279
279
  pd_df.columns = dataset.columns
@@ -262,7 +262,7 @@ class LGBMClassifier(BaseTransformer):
262
262
  inspect.currentframe(), LGBMClassifier.__class__.__name__
263
263
  ),
264
264
  api_calls=[Session.call],
265
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
265
+ custom_tags={"autogen": True} if self._autogenerated else None,
266
266
  )
267
267
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
268
268
  pd_df.columns = dataset.columns
@@ -262,7 +262,7 @@ class LGBMRegressor(BaseTransformer):
262
262
  inspect.currentframe(), LGBMRegressor.__class__.__name__
263
263
  ),
264
264
  api_calls=[Session.call],
265
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
265
+ custom_tags={"autogen": True} if self._autogenerated else None,
266
266
  )
267
267
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
268
268
  pd_df.columns = dataset.columns
@@ -287,7 +287,7 @@ class ARDRegression(BaseTransformer):
287
287
  inspect.currentframe(), ARDRegression.__class__.__name__
288
288
  ),
289
289
  api_calls=[Session.call],
290
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
290
+ custom_tags={"autogen": True} if self._autogenerated else None,
291
291
  )
292
292
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
293
293
  pd_df.columns = dataset.columns
@@ -298,7 +298,7 @@ class BayesianRidge(BaseTransformer):
298
298
  inspect.currentframe(), BayesianRidge.__class__.__name__
299
299
  ),
300
300
  api_calls=[Session.call],
301
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
301
+ custom_tags={"autogen": True} if self._autogenerated else None,
302
302
  )
303
303
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
304
304
  pd_df.columns = dataset.columns
@@ -297,7 +297,7 @@ class ElasticNet(BaseTransformer):
297
297
  inspect.currentframe(), ElasticNet.__class__.__name__
298
298
  ),
299
299
  api_calls=[Session.call],
300
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
300
+ custom_tags={"autogen": True} if self._autogenerated else None,
301
301
  )
302
302
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
303
303
  pd_df.columns = dataset.columns
@@ -333,7 +333,7 @@ class ElasticNetCV(BaseTransformer):
333
333
  inspect.currentframe(), ElasticNetCV.__class__.__name__
334
334
  ),
335
335
  api_calls=[Session.call],
336
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
336
+ custom_tags={"autogen": True} if self._autogenerated else None,
337
337
  )
338
338
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
339
339
  pd_df.columns = dataset.columns
@@ -278,7 +278,7 @@ class GammaRegressor(BaseTransformer):
278
278
  inspect.currentframe(), GammaRegressor.__class__.__name__
279
279
  ),
280
280
  api_calls=[Session.call],
281
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
281
+ custom_tags={"autogen": True} if self._autogenerated else None,
282
282
  )
283
283
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
284
284
  pd_df.columns = dataset.columns
@@ -261,7 +261,7 @@ class HuberRegressor(BaseTransformer):
261
261
  inspect.currentframe(), HuberRegressor.__class__.__name__
262
262
  ),
263
263
  api_calls=[Session.call],
264
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
264
+ custom_tags={"autogen": True} if self._autogenerated else None,
265
265
  )
266
266
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
267
267
  pd_df.columns = dataset.columns
@@ -290,7 +290,7 @@ class Lars(BaseTransformer):
290
290
  inspect.currentframe(), Lars.__class__.__name__
291
291
  ),
292
292
  api_calls=[Session.call],
293
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
293
+ custom_tags={"autogen": True} if self._autogenerated else None,
294
294
  )
295
295
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
296
296
  pd_df.columns = dataset.columns
@@ -298,7 +298,7 @@ class LarsCV(BaseTransformer):
298
298
  inspect.currentframe(), LarsCV.__class__.__name__
299
299
  ),
300
300
  api_calls=[Session.call],
301
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
301
+ custom_tags={"autogen": True} if self._autogenerated else None,
302
302
  )
303
303
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
304
304
  pd_df.columns = dataset.columns
@@ -291,7 +291,7 @@ class Lasso(BaseTransformer):
291
291
  inspect.currentframe(), Lasso.__class__.__name__
292
292
  ),
293
293
  api_calls=[Session.call],
294
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
294
+ custom_tags={"autogen": True} if self._autogenerated else None,
295
295
  )
296
296
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
297
297
  pd_df.columns = dataset.columns
@@ -319,7 +319,7 @@ class LassoCV(BaseTransformer):
319
319
  inspect.currentframe(), LassoCV.__class__.__name__
320
320
  ),
321
321
  api_calls=[Session.call],
322
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
322
+ custom_tags={"autogen": True} if self._autogenerated else None,
323
323
  )
324
324
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
325
325
  pd_df.columns = dataset.columns
@@ -311,7 +311,7 @@ class LassoLars(BaseTransformer):
311
311
  inspect.currentframe(), LassoLars.__class__.__name__
312
312
  ),
313
313
  api_calls=[Session.call],
314
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
314
+ custom_tags={"autogen": True} if self._autogenerated else None,
315
315
  )
316
316
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
317
317
  pd_df.columns = dataset.columns
@@ -312,7 +312,7 @@ class LassoLarsCV(BaseTransformer):
312
312
  inspect.currentframe(), LassoLarsCV.__class__.__name__
313
313
  ),
314
314
  api_calls=[Session.call],
315
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
315
+ custom_tags={"autogen": True} if self._autogenerated else None,
316
316
  )
317
317
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
318
318
  pd_df.columns = dataset.columns
@@ -295,7 +295,7 @@ class LassoLarsIC(BaseTransformer):
295
295
  inspect.currentframe(), LassoLarsIC.__class__.__name__
296
296
  ),
297
297
  api_calls=[Session.call],
298
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
298
+ custom_tags={"autogen": True} if self._autogenerated else None,
299
299
  )
300
300
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
301
301
  pd_df.columns = dataset.columns
@@ -248,7 +248,7 @@ class LinearRegression(BaseTransformer):
248
248
  inspect.currentframe(), LinearRegression.__class__.__name__
249
249
  ),
250
250
  api_calls=[Session.call],
251
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
251
+ custom_tags={"autogen": True} if self._autogenerated else None,
252
252
  )
253
253
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
254
254
  pd_df.columns = dataset.columns
@@ -362,7 +362,7 @@ class LogisticRegression(BaseTransformer):
362
362
  inspect.currentframe(), LogisticRegression.__class__.__name__
363
363
  ),
364
364
  api_calls=[Session.call],
365
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
365
+ custom_tags={"autogen": True} if self._autogenerated else None,
366
366
  )
367
367
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
368
368
  pd_df.columns = dataset.columns
@@ -383,7 +383,7 @@ class LogisticRegressionCV(BaseTransformer):
383
383
  inspect.currentframe(), LogisticRegressionCV.__class__.__name__
384
384
  ),
385
385
  api_calls=[Session.call],
386
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
386
+ custom_tags={"autogen": True} if self._autogenerated else None,
387
387
  )
388
388
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
389
389
  pd_df.columns = dataset.columns
@@ -281,7 +281,7 @@ class MultiTaskElasticNet(BaseTransformer):
281
281
  inspect.currentframe(), MultiTaskElasticNet.__class__.__name__
282
282
  ),
283
283
  api_calls=[Session.call],
284
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
284
+ custom_tags={"autogen": True} if self._autogenerated else None,
285
285
  )
286
286
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
287
287
  pd_df.columns = dataset.columns
@@ -322,7 +322,7 @@ class MultiTaskElasticNetCV(BaseTransformer):
322
322
  inspect.currentframe(), MultiTaskElasticNetCV.__class__.__name__
323
323
  ),
324
324
  api_calls=[Session.call],
325
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
325
+ custom_tags={"autogen": True} if self._autogenerated else None,
326
326
  )
327
327
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
328
328
  pd_df.columns = dataset.columns
@@ -273,7 +273,7 @@ class MultiTaskLasso(BaseTransformer):
273
273
  inspect.currentframe(), MultiTaskLasso.__class__.__name__
274
274
  ),
275
275
  api_calls=[Session.call],
276
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
276
+ custom_tags={"autogen": True} if self._autogenerated else None,
277
277
  )
278
278
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
279
279
  pd_df.columns = dataset.columns
@@ -308,7 +308,7 @@ class MultiTaskLassoCV(BaseTransformer):
308
308
  inspect.currentframe(), MultiTaskLassoCV.__class__.__name__
309
309
  ),
310
310
  api_calls=[Session.call],
311
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
311
+ custom_tags={"autogen": True} if self._autogenerated else None,
312
312
  )
313
313
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
314
314
  pd_df.columns = dataset.columns
@@ -256,7 +256,7 @@ class OrthogonalMatchingPursuit(BaseTransformer):
256
256
  inspect.currentframe(), OrthogonalMatchingPursuit.__class__.__name__
257
257
  ),
258
258
  api_calls=[Session.call],
259
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
259
+ custom_tags={"autogen": True} if self._autogenerated else None,
260
260
  )
261
261
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
262
262
  pd_df.columns = dataset.columns
@@ -330,7 +330,7 @@ class PassiveAggressiveClassifier(BaseTransformer):
330
330
  inspect.currentframe(), PassiveAggressiveClassifier.__class__.__name__
331
331
  ),
332
332
  api_calls=[Session.call],
333
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
333
+ custom_tags={"autogen": True} if self._autogenerated else None,
334
334
  )
335
335
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
336
336
  pd_df.columns = dataset.columns
@@ -316,7 +316,7 @@ class PassiveAggressiveRegressor(BaseTransformer):
316
316
  inspect.currentframe(), PassiveAggressiveRegressor.__class__.__name__
317
317
  ),
318
318
  api_calls=[Session.call],
319
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
319
+ custom_tags={"autogen": True} if self._autogenerated else None,
320
320
  )
321
321
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
322
322
  pd_df.columns = dataset.columns
@@ -329,7 +329,7 @@ class Perceptron(BaseTransformer):
329
329
  inspect.currentframe(), Perceptron.__class__.__name__
330
330
  ),
331
331
  api_calls=[Session.call],
332
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
332
+ custom_tags={"autogen": True} if self._autogenerated else None,
333
333
  )
334
334
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
335
335
  pd_df.columns = dataset.columns
@@ -278,7 +278,7 @@ class PoissonRegressor(BaseTransformer):
278
278
  inspect.currentframe(), PoissonRegressor.__class__.__name__
279
279
  ),
280
280
  api_calls=[Session.call],
281
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
281
+ custom_tags={"autogen": True} if self._autogenerated else None,
282
282
  )
283
283
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
284
284
  pd_df.columns = dataset.columns
@@ -334,7 +334,7 @@ class RANSACRegressor(BaseTransformer):
334
334
  inspect.currentframe(), RANSACRegressor.__class__.__name__
335
335
  ),
336
336
  api_calls=[Session.call],
337
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
337
+ custom_tags={"autogen": True} if self._autogenerated else None,
338
338
  )
339
339
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
340
340
  pd_df.columns = dataset.columns
@@ -326,7 +326,7 @@ class Ridge(BaseTransformer):
326
326
  inspect.currentframe(), Ridge.__class__.__name__
327
327
  ),
328
328
  api_calls=[Session.call],
329
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
329
+ custom_tags={"autogen": True} if self._autogenerated else None,
330
330
  )
331
331
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
332
332
  pd_df.columns = dataset.columns
@@ -326,7 +326,7 @@ class RidgeClassifier(BaseTransformer):
326
326
  inspect.currentframe(), RidgeClassifier.__class__.__name__
327
327
  ),
328
328
  api_calls=[Session.call],
329
- custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
329
+ custom_tags={"autogen": True} if self._autogenerated else None,
330
330
  )
331
331
  pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
332
332
  pd_df.columns = dataset.columns