snowflake-ml-python 1.4.0__py3-none-any.whl → 1.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (234) hide show
  1. snowflake/ml/_internal/env_utils.py +77 -32
  2. snowflake/ml/_internal/exceptions/dataset_error_messages.py +5 -0
  3. snowflake/ml/_internal/exceptions/dataset_errors.py +24 -0
  4. snowflake/ml/_internal/exceptions/error_codes.py +3 -0
  5. snowflake/ml/_internal/lineage/data_source.py +10 -0
  6. snowflake/ml/_internal/lineage/dataset_dataframe.py +44 -0
  7. snowflake/ml/_internal/utils/identifier.py +3 -1
  8. snowflake/ml/_internal/utils/sql_identifier.py +2 -6
  9. snowflake/ml/dataset/__init__.py +10 -0
  10. snowflake/ml/dataset/dataset.py +454 -129
  11. snowflake/ml/dataset/dataset_factory.py +53 -0
  12. snowflake/ml/dataset/dataset_metadata.py +103 -0
  13. snowflake/ml/dataset/dataset_reader.py +202 -0
  14. snowflake/ml/feature_store/feature_store.py +531 -332
  15. snowflake/ml/feature_store/feature_view.py +40 -23
  16. snowflake/ml/fileset/embedded_stage_fs.py +146 -0
  17. snowflake/ml/fileset/sfcfs.py +56 -54
  18. snowflake/ml/fileset/snowfs.py +159 -0
  19. snowflake/ml/fileset/stage_fs.py +49 -17
  20. snowflake/ml/model/__init__.py +2 -2
  21. snowflake/ml/model/_api.py +16 -1
  22. snowflake/ml/model/_client/model/model_impl.py +27 -0
  23. snowflake/ml/model/_client/model/model_version_impl.py +137 -50
  24. snowflake/ml/model/_client/ops/model_ops.py +159 -40
  25. snowflake/ml/model/_client/sql/model.py +25 -2
  26. snowflake/ml/model/_client/sql/model_version.py +131 -2
  27. snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +5 -1
  28. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +1 -0
  29. snowflake/ml/model/_deploy_client/snowservice/deploy.py +2 -0
  30. snowflake/ml/model/_deploy_client/utils/constants.py +0 -5
  31. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +21 -50
  32. snowflake/ml/model/_model_composer/model_composer.py +22 -1
  33. snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +38 -51
  34. snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +19 -1
  35. snowflake/ml/model/_model_composer/model_method/model_method.py +6 -10
  36. snowflake/ml/model/_packager/model_env/model_env.py +41 -0
  37. snowflake/ml/model/_packager/model_handlers/catboost.py +206 -0
  38. snowflake/ml/model/_packager/model_handlers/lightgbm.py +218 -0
  39. snowflake/ml/model/_packager/model_handlers/sklearn.py +3 -0
  40. snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
  41. snowflake/ml/model/_packager/model_meta/model_meta.py +37 -11
  42. snowflake/ml/model/_packager/model_meta/model_meta_schema.py +20 -1
  43. snowflake/ml/model/_packager/model_meta_migrator/migrator_plans.py +3 -1
  44. snowflake/ml/model/_packager/model_packager.py +2 -5
  45. snowflake/ml/model/{_model_composer/model_runtime/_runtime_requirements.py → _packager/model_runtime/_snowml_inference_alternative_requirements.py} +1 -1
  46. snowflake/ml/model/_packager/model_runtime/model_runtime.py +137 -0
  47. snowflake/ml/model/type_hints.py +21 -2
  48. snowflake/ml/modeling/_internal/estimator_utils.py +16 -11
  49. snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +4 -1
  50. snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py +55 -3
  51. snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_handlers.py +34 -18
  52. snowflake/ml/modeling/_internal/model_trainer.py +7 -0
  53. snowflake/ml/modeling/_internal/model_trainer_builder.py +42 -9
  54. snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +13 -14
  55. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +29 -7
  56. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +261 -16
  57. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +246 -175
  58. snowflake/ml/modeling/cluster/affinity_propagation.py +246 -175
  59. snowflake/ml/modeling/cluster/agglomerative_clustering.py +246 -175
  60. snowflake/ml/modeling/cluster/birch.py +248 -175
  61. snowflake/ml/modeling/cluster/bisecting_k_means.py +248 -175
  62. snowflake/ml/modeling/cluster/dbscan.py +246 -175
  63. snowflake/ml/modeling/cluster/feature_agglomeration.py +248 -175
  64. snowflake/ml/modeling/cluster/k_means.py +248 -175
  65. snowflake/ml/modeling/cluster/mean_shift.py +246 -175
  66. snowflake/ml/modeling/cluster/mini_batch_k_means.py +248 -175
  67. snowflake/ml/modeling/cluster/optics.py +246 -175
  68. snowflake/ml/modeling/cluster/spectral_biclustering.py +246 -175
  69. snowflake/ml/modeling/cluster/spectral_clustering.py +246 -175
  70. snowflake/ml/modeling/cluster/spectral_coclustering.py +246 -175
  71. snowflake/ml/modeling/compose/column_transformer.py +248 -175
  72. snowflake/ml/modeling/compose/transformed_target_regressor.py +246 -175
  73. snowflake/ml/modeling/covariance/elliptic_envelope.py +246 -175
  74. snowflake/ml/modeling/covariance/empirical_covariance.py +246 -175
  75. snowflake/ml/modeling/covariance/graphical_lasso.py +246 -175
  76. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +246 -175
  77. snowflake/ml/modeling/covariance/ledoit_wolf.py +246 -175
  78. snowflake/ml/modeling/covariance/min_cov_det.py +246 -175
  79. snowflake/ml/modeling/covariance/oas.py +246 -175
  80. snowflake/ml/modeling/covariance/shrunk_covariance.py +246 -175
  81. snowflake/ml/modeling/decomposition/dictionary_learning.py +248 -175
  82. snowflake/ml/modeling/decomposition/factor_analysis.py +248 -175
  83. snowflake/ml/modeling/decomposition/fast_ica.py +248 -175
  84. snowflake/ml/modeling/decomposition/incremental_pca.py +248 -175
  85. snowflake/ml/modeling/decomposition/kernel_pca.py +248 -175
  86. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +248 -175
  87. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +248 -175
  88. snowflake/ml/modeling/decomposition/pca.py +248 -175
  89. snowflake/ml/modeling/decomposition/sparse_pca.py +248 -175
  90. snowflake/ml/modeling/decomposition/truncated_svd.py +248 -175
  91. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +248 -175
  92. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +246 -175
  93. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +246 -175
  94. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +246 -175
  95. snowflake/ml/modeling/ensemble/bagging_classifier.py +246 -175
  96. snowflake/ml/modeling/ensemble/bagging_regressor.py +246 -175
  97. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +246 -175
  98. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +246 -175
  99. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +246 -175
  100. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +246 -175
  101. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +246 -175
  102. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +246 -175
  103. snowflake/ml/modeling/ensemble/isolation_forest.py +246 -175
  104. snowflake/ml/modeling/ensemble/random_forest_classifier.py +246 -175
  105. snowflake/ml/modeling/ensemble/random_forest_regressor.py +246 -175
  106. snowflake/ml/modeling/ensemble/stacking_regressor.py +248 -175
  107. snowflake/ml/modeling/ensemble/voting_classifier.py +248 -175
  108. snowflake/ml/modeling/ensemble/voting_regressor.py +248 -175
  109. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +248 -175
  110. snowflake/ml/modeling/feature_selection/select_fdr.py +248 -175
  111. snowflake/ml/modeling/feature_selection/select_fpr.py +248 -175
  112. snowflake/ml/modeling/feature_selection/select_fwe.py +248 -175
  113. snowflake/ml/modeling/feature_selection/select_k_best.py +248 -175
  114. snowflake/ml/modeling/feature_selection/select_percentile.py +248 -175
  115. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +248 -175
  116. snowflake/ml/modeling/feature_selection/variance_threshold.py +248 -175
  117. snowflake/ml/modeling/framework/_utils.py +8 -1
  118. snowflake/ml/modeling/framework/base.py +72 -37
  119. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +246 -175
  120. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +246 -175
  121. snowflake/ml/modeling/impute/iterative_imputer.py +248 -175
  122. snowflake/ml/modeling/impute/knn_imputer.py +248 -175
  123. snowflake/ml/modeling/impute/missing_indicator.py +248 -175
  124. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +248 -175
  125. snowflake/ml/modeling/kernel_approximation/nystroem.py +248 -175
  126. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +248 -175
  127. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +248 -175
  128. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +248 -175
  129. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +246 -175
  130. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +246 -175
  131. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +246 -175
  132. snowflake/ml/modeling/linear_model/ard_regression.py +246 -175
  133. snowflake/ml/modeling/linear_model/bayesian_ridge.py +246 -175
  134. snowflake/ml/modeling/linear_model/elastic_net.py +246 -175
  135. snowflake/ml/modeling/linear_model/elastic_net_cv.py +246 -175
  136. snowflake/ml/modeling/linear_model/gamma_regressor.py +246 -175
  137. snowflake/ml/modeling/linear_model/huber_regressor.py +246 -175
  138. snowflake/ml/modeling/linear_model/lars.py +246 -175
  139. snowflake/ml/modeling/linear_model/lars_cv.py +246 -175
  140. snowflake/ml/modeling/linear_model/lasso.py +246 -175
  141. snowflake/ml/modeling/linear_model/lasso_cv.py +246 -175
  142. snowflake/ml/modeling/linear_model/lasso_lars.py +246 -175
  143. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +246 -175
  144. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +246 -175
  145. snowflake/ml/modeling/linear_model/linear_regression.py +246 -175
  146. snowflake/ml/modeling/linear_model/logistic_regression.py +246 -175
  147. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +246 -175
  148. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +246 -175
  149. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +246 -175
  150. snowflake/ml/modeling/linear_model/multi_task_lasso.py +246 -175
  151. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +246 -175
  152. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +246 -175
  153. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +246 -175
  154. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +246 -175
  155. snowflake/ml/modeling/linear_model/perceptron.py +246 -175
  156. snowflake/ml/modeling/linear_model/poisson_regressor.py +246 -175
  157. snowflake/ml/modeling/linear_model/ransac_regressor.py +246 -175
  158. snowflake/ml/modeling/linear_model/ridge.py +246 -175
  159. snowflake/ml/modeling/linear_model/ridge_classifier.py +246 -175
  160. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +246 -175
  161. snowflake/ml/modeling/linear_model/ridge_cv.py +246 -175
  162. snowflake/ml/modeling/linear_model/sgd_classifier.py +246 -175
  163. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +246 -175
  164. snowflake/ml/modeling/linear_model/sgd_regressor.py +246 -175
  165. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +246 -175
  166. snowflake/ml/modeling/linear_model/tweedie_regressor.py +246 -175
  167. snowflake/ml/modeling/manifold/isomap.py +248 -175
  168. snowflake/ml/modeling/manifold/mds.py +248 -175
  169. snowflake/ml/modeling/manifold/spectral_embedding.py +248 -175
  170. snowflake/ml/modeling/manifold/tsne.py +248 -175
  171. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +246 -175
  172. snowflake/ml/modeling/mixture/gaussian_mixture.py +246 -175
  173. snowflake/ml/modeling/model_selection/grid_search_cv.py +63 -41
  174. snowflake/ml/modeling/model_selection/randomized_search_cv.py +80 -38
  175. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +246 -175
  176. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +246 -175
  177. snowflake/ml/modeling/multiclass/output_code_classifier.py +246 -175
  178. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +246 -175
  179. snowflake/ml/modeling/naive_bayes/categorical_nb.py +246 -175
  180. snowflake/ml/modeling/naive_bayes/complement_nb.py +246 -175
  181. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +246 -175
  182. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +246 -175
  183. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +246 -175
  184. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +246 -175
  185. snowflake/ml/modeling/neighbors/kernel_density.py +246 -175
  186. snowflake/ml/modeling/neighbors/local_outlier_factor.py +246 -175
  187. snowflake/ml/modeling/neighbors/nearest_centroid.py +246 -175
  188. snowflake/ml/modeling/neighbors/nearest_neighbors.py +246 -175
  189. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +248 -175
  190. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +246 -175
  191. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +246 -175
  192. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +248 -175
  193. snowflake/ml/modeling/neural_network/mlp_classifier.py +246 -175
  194. snowflake/ml/modeling/neural_network/mlp_regressor.py +246 -175
  195. snowflake/ml/modeling/pipeline/pipeline.py +517 -35
  196. snowflake/ml/modeling/preprocessing/binarizer.py +1 -5
  197. snowflake/ml/modeling/preprocessing/k_bins_discretizer.py +1 -5
  198. snowflake/ml/modeling/preprocessing/label_encoder.py +1 -5
  199. snowflake/ml/modeling/preprocessing/max_abs_scaler.py +1 -5
  200. snowflake/ml/modeling/preprocessing/min_max_scaler.py +10 -12
  201. snowflake/ml/modeling/preprocessing/normalizer.py +1 -5
  202. snowflake/ml/modeling/preprocessing/one_hot_encoder.py +13 -5
  203. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +1 -5
  204. snowflake/ml/modeling/preprocessing/polynomial_features.py +248 -175
  205. snowflake/ml/modeling/preprocessing/robust_scaler.py +1 -5
  206. snowflake/ml/modeling/preprocessing/standard_scaler.py +11 -11
  207. snowflake/ml/modeling/semi_supervised/label_propagation.py +246 -175
  208. snowflake/ml/modeling/semi_supervised/label_spreading.py +246 -175
  209. snowflake/ml/modeling/svm/linear_svc.py +246 -175
  210. snowflake/ml/modeling/svm/linear_svr.py +246 -175
  211. snowflake/ml/modeling/svm/nu_svc.py +246 -175
  212. snowflake/ml/modeling/svm/nu_svr.py +246 -175
  213. snowflake/ml/modeling/svm/svc.py +246 -175
  214. snowflake/ml/modeling/svm/svr.py +246 -175
  215. snowflake/ml/modeling/tree/decision_tree_classifier.py +246 -175
  216. snowflake/ml/modeling/tree/decision_tree_regressor.py +246 -175
  217. snowflake/ml/modeling/tree/extra_tree_classifier.py +246 -175
  218. snowflake/ml/modeling/tree/extra_tree_regressor.py +246 -175
  219. snowflake/ml/modeling/xgboost/xgb_classifier.py +246 -175
  220. snowflake/ml/modeling/xgboost/xgb_regressor.py +246 -175
  221. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +246 -175
  222. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +246 -175
  223. snowflake/ml/registry/model_registry.py +3 -149
  224. snowflake/ml/registry/registry.py +1 -1
  225. snowflake/ml/version.py +1 -1
  226. {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/METADATA +129 -57
  227. snowflake_ml_python-1.5.0.dist-info/RECORD +380 -0
  228. snowflake/ml/model/_model_composer/model_runtime/model_runtime.py +0 -97
  229. snowflake/ml/registry/_artifact_manager.py +0 -156
  230. snowflake/ml/registry/artifact.py +0 -46
  231. snowflake_ml_python-1.4.0.dist-info/RECORD +0 -370
  232. {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/LICENSE.txt +0 -0
  233. {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/WHEEL +0 -0
  234. {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/top_level.txt +0 -0
@@ -33,6 +33,15 @@ from snowflake.ml.modeling._internal.transformer_protocols import (
33
33
  BatchInferenceKwargsTypedDict,
34
34
  ScoreKwargsTypedDict
35
35
  )
36
+ from snowflake.ml.model._signatures import utils as model_signature_utils
37
+ from snowflake.ml.model.model_signature import (
38
+ BaseFeatureSpec,
39
+ DataType,
40
+ FeatureSpec,
41
+ ModelSignature,
42
+ _infer_signature,
43
+ _rename_signature_with_snowflake_identifiers,
44
+ )
36
45
 
37
46
  from snowflake.ml.modeling._internal.model_transformer_builder import ModelTransformerBuilder
38
47
 
@@ -43,16 +52,6 @@ from snowflake.ml.modeling._internal.estimator_utils import (
43
52
  validate_sklearn_args,
44
53
  )
45
54
 
46
- from snowflake.ml.model.model_signature import (
47
- DataType,
48
- FeatureSpec,
49
- ModelSignature,
50
- _infer_signature,
51
- _rename_signature_with_snowflake_identifiers,
52
- BaseFeatureSpec,
53
- )
54
- from snowflake.ml.model._signatures import utils as model_signature_utils
55
-
56
55
  _PROJECT = "ModelDevelopment"
57
56
  # Derive subproject from module name by removing "sklearn"
58
57
  # and converting module name from underscore to CamelCase
@@ -61,12 +60,6 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.ensemble".replace("sklea
61
60
 
62
61
  DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
63
62
 
64
- def _is_fit_transform_method_enabled() -> Callable[[Any], bool]:
65
- def check(self: BaseTransformer) -> TypeGuard[Callable[..., object]]:
66
- return False and callable(getattr(self._sklearn_object, "fit_transform", None))
67
- return check
68
-
69
-
70
63
  class GradientBoostingClassifier(BaseTransformer):
71
64
  r"""Gradient Boosting for classification
72
65
  For more details on this class, see [sklearn.ensemble.GradientBoostingClassifier]
@@ -391,12 +384,7 @@ class GradientBoostingClassifier(BaseTransformer):
391
384
  )
392
385
  return selected_cols
393
386
 
394
- @telemetry.send_api_usage_telemetry(
395
- project=_PROJECT,
396
- subproject=_SUBPROJECT,
397
- custom_tags=dict([("autogen", True)]),
398
- )
399
- def fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "GradientBoostingClassifier":
387
+ def _fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "GradientBoostingClassifier":
400
388
  """Fit the gradient boosting model
401
389
  For more details on this function, see [sklearn.ensemble.GradientBoostingClassifier.fit]
402
390
  (https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.GradientBoostingClassifier.html#sklearn.ensemble.GradientBoostingClassifier.fit)
@@ -423,12 +411,14 @@ class GradientBoostingClassifier(BaseTransformer):
423
411
 
424
412
  self._snowpark_cols = dataset.select(self.input_cols).columns
425
413
 
426
- # If we are already in a stored procedure, no need to kick off another one.
414
+ # If we are already in a stored procedure, no need to kick off another one.
427
415
  if SNOWML_SPROC_ENV in os.environ:
428
416
  statement_params = telemetry.get_function_usage_statement_params(
429
417
  project=_PROJECT,
430
418
  subproject=_SUBPROJECT,
431
- function_name=telemetry.get_statement_params_full_func_name(inspect.currentframe(), GradientBoostingClassifier.__class__.__name__),
419
+ function_name=telemetry.get_statement_params_full_func_name(
420
+ inspect.currentframe(), GradientBoostingClassifier.__class__.__name__
421
+ ),
432
422
  api_calls=[Session.call],
433
423
  custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
434
424
  )
@@ -449,27 +439,24 @@ class GradientBoostingClassifier(BaseTransformer):
449
439
  )
450
440
  self._sklearn_object = model_trainer.train()
451
441
  self._is_fitted = True
452
- self._get_model_signatures(dataset)
442
+ self._generate_model_signatures(dataset)
453
443
  return self
454
444
 
455
445
  def _batch_inference_validate_snowpark(
456
446
  self,
457
447
  dataset: DataFrame,
458
448
  inference_method: str,
459
- ) -> List[str]:
460
- """Util method to run validate that batch inference can be run on a snowpark dataframe and
461
- return the available package that exists in the snowflake anaconda channel
449
+ ) -> None:
450
+ """Util method to run validate that batch inference can be run on a snowpark dataframe.
462
451
 
463
452
  Args:
464
453
  dataset: snowpark dataframe
465
454
  inference_method: the inference method such as predict, score...
466
-
455
+
467
456
  Raises:
468
457
  SnowflakeMLException: If the estimator is not fitted, raise error
469
458
  SnowflakeMLException: If the session is None, raise error
470
459
 
471
- Returns:
472
- A list of available package that exists in the snowflake anaconda channel
473
460
  """
474
461
  if not self._is_fitted:
475
462
  raise exceptions.SnowflakeMLException(
@@ -487,9 +474,7 @@ class GradientBoostingClassifier(BaseTransformer):
487
474
  "Session must not specified for snowpark dataset."
488
475
  ),
489
476
  )
490
- # Validate that key package version in user workspace are supported in snowflake conda channel
491
- return pkg_version_utils.get_valid_pkg_versions_supported_in_snowflake_conda_channel(
492
- pkg_versions=self._get_dependencies(), session=session, subproject=_SUBPROJECT)
477
+
493
478
 
494
479
  @available_if(original_estimator_has_callable("predict")) # type: ignore[misc]
495
480
  @telemetry.send_api_usage_telemetry(
@@ -525,7 +510,9 @@ class GradientBoostingClassifier(BaseTransformer):
525
510
  # when it is classifier, infer the datatype from label columns
526
511
  if expected_type_inferred == "" and 'predict' in self.model_signatures:
527
512
  # Batch inference takes a single expected output column type. Use the first columns type for now.
528
- label_cols_signatures = [row for row in self.model_signatures['predict'].outputs if row.name in self.output_cols]
513
+ label_cols_signatures = [
514
+ row for row in self.model_signatures['predict'].outputs if row.name in self.output_cols
515
+ ]
529
516
  if len(label_cols_signatures) == 0:
530
517
  error_str = f"Output columns {self.output_cols} do not match model signatures {self.model_signatures['predict'].outputs}."
531
518
  raise exceptions.SnowflakeMLException(
@@ -533,25 +520,23 @@ class GradientBoostingClassifier(BaseTransformer):
533
520
  original_exception=ValueError(error_str),
534
521
  )
535
522
 
536
- expected_type_inferred = convert_sp_to_sf_type(
537
- label_cols_signatures[0].as_snowpark_type()
538
- )
523
+ expected_type_inferred = convert_sp_to_sf_type(label_cols_signatures[0].as_snowpark_type())
539
524
 
540
- self._deps = self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
541
- assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
525
+ self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
526
+ self._deps = self._get_dependencies()
527
+ assert isinstance(
528
+ dataset._session, Session
529
+ ) # mypy does not recognize the check in _batch_inference_validate_snowpark()
542
530
 
543
531
  transform_kwargs = dict(
544
- session = dataset._session,
545
- dependencies = self._deps,
546
- drop_input_cols = self._drop_input_cols,
547
- expected_output_cols_type = expected_type_inferred,
532
+ session=dataset._session,
533
+ dependencies=self._deps,
534
+ drop_input_cols=self._drop_input_cols,
535
+ expected_output_cols_type=expected_type_inferred,
548
536
  )
549
537
 
550
538
  elif isinstance(dataset, pd.DataFrame):
551
- transform_kwargs = dict(
552
- snowpark_input_cols = self._snowpark_cols,
553
- drop_input_cols = self._drop_input_cols
554
- )
539
+ transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
555
540
 
556
541
  transform_handlers = ModelTransformerBuilder.build(
557
542
  dataset=dataset,
@@ -591,7 +576,7 @@ class GradientBoostingClassifier(BaseTransformer):
591
576
  Transformed dataset.
592
577
  """
593
578
  super()._check_dataset_type(dataset)
594
- inference_method="transform"
579
+ inference_method = "transform"
595
580
 
596
581
  # This dictionary contains optional kwargs for batch inference. These kwargs
597
582
  # are specific to the type of dataset used.
@@ -621,24 +606,19 @@ class GradientBoostingClassifier(BaseTransformer):
621
606
  if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
622
607
  expected_dtype = convert_sp_to_sf_type(output_types[0])
623
608
 
624
- self._deps = self._batch_inference_validate_snowpark(
625
- dataset=dataset,
626
- inference_method=inference_method,
627
- )
609
+ self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
610
+ self._deps = self._get_dependencies()
628
611
  assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
629
612
 
630
613
  transform_kwargs = dict(
631
- session = dataset._session,
632
- dependencies = self._deps,
633
- drop_input_cols = self._drop_input_cols,
634
- expected_output_cols_type = expected_dtype,
614
+ session=dataset._session,
615
+ dependencies=self._deps,
616
+ drop_input_cols=self._drop_input_cols,
617
+ expected_output_cols_type=expected_dtype,
635
618
  )
636
619
 
637
620
  elif isinstance(dataset, pd.DataFrame):
638
- transform_kwargs = dict(
639
- snowpark_input_cols = self._snowpark_cols,
640
- drop_input_cols = self._drop_input_cols
641
- )
621
+ transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
642
622
 
643
623
  transform_handlers = ModelTransformerBuilder.build(
644
624
  dataset=dataset,
@@ -657,7 +637,11 @@ class GradientBoostingClassifier(BaseTransformer):
657
637
  return output_df
658
638
 
659
639
  @available_if(original_estimator_has_callable("fit_predict")) # type: ignore[misc]
660
- def fit_predict(self, dataset: Union[DataFrame, pd.DataFrame], output_cols_prefix: str = "fit_predict_",) -> Union[DataFrame, pd.DataFrame]:
640
+ def fit_predict(
641
+ self,
642
+ dataset: Union[DataFrame, pd.DataFrame],
643
+ output_cols_prefix: str = "fit_predict_",
644
+ ) -> Union[DataFrame, pd.DataFrame]:
661
645
  """ Method not supported for this class.
662
646
 
663
647
 
@@ -682,22 +666,104 @@ class GradientBoostingClassifier(BaseTransformer):
682
666
  )
683
667
  output_result, fitted_estimator = model_trainer.train_fit_predict(
684
668
  drop_input_cols=self._drop_input_cols,
685
- expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
669
+ expected_output_cols_list=(
670
+ self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix)
671
+ ),
686
672
  )
687
673
  self._sklearn_object = fitted_estimator
688
674
  self._is_fitted = True
689
675
  return output_result
690
676
 
677
+
678
+ @available_if(original_estimator_has_callable("fit_transform")) # type: ignore[misc]
679
+ def fit_transform(self, dataset: Union[DataFrame, pd.DataFrame], output_cols_prefix: str = "fit_transform_",) -> Union[DataFrame, pd.DataFrame]:
680
+ """ Method not supported for this class.
681
+
691
682
 
692
- @available_if(_is_fit_transform_method_enabled()) # type: ignore[misc]
693
- def fit_transform(self, dataset: Union[DataFrame, pd.DataFrame]) -> Union[Any, npt.NDArray[Any]]:
694
- """
683
+ Raises:
684
+ TypeError: Supported dataset types: snowpark.DataFrame, pandas.DataFrame.
685
+
686
+ Args:
687
+ dataset: Union[snowflake.snowpark.DataFrame, pandas.DataFrame]
688
+ Snowpark or Pandas DataFrame.
689
+ output_cols_prefix: Prefix for the response columns
695
690
  Returns:
696
691
  Transformed dataset.
697
692
  """
698
- self.fit(dataset)
699
- assert self._sklearn_object is not None
700
- return self._sklearn_object.embedding_
693
+ self._infer_input_output_cols(dataset)
694
+ super()._check_dataset_type(dataset)
695
+ model_trainer = ModelTrainerBuilder.build_fit_transform(
696
+ estimator=self._sklearn_object,
697
+ dataset=dataset,
698
+ input_cols=self.input_cols,
699
+ label_cols=self.label_cols,
700
+ sample_weight_col=self.sample_weight_col,
701
+ autogenerated=self._autogenerated,
702
+ subproject=_SUBPROJECT,
703
+ )
704
+ output_result, fitted_estimator = model_trainer.train_fit_transform(
705
+ drop_input_cols=self._drop_input_cols,
706
+ expected_output_cols_list=self.output_cols,
707
+ )
708
+ self._sklearn_object = fitted_estimator
709
+ self._is_fitted = True
710
+ return output_result
711
+
712
+
713
+ def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
714
+ """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
715
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
716
+ """
717
+ output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
718
+ # The following condition is introduced for kneighbors methods, and not used in other methods
719
+ if output_cols:
720
+ output_cols = [
721
+ identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
722
+ for c in output_cols
723
+ ]
724
+ elif getattr(self._sklearn_object, "classes_", None) is None:
725
+ output_cols = [output_cols_prefix]
726
+ elif self._sklearn_object is not None:
727
+ classes = self._sklearn_object.classes_
728
+ if isinstance(classes, numpy.ndarray):
729
+ output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
730
+ elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
731
+ # If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
732
+ output_cols = []
733
+ for i, cl in enumerate(classes):
734
+ # For binary classification, there is only one output column for each class
735
+ # ndarray as the two classes are complementary.
736
+ if len(cl) == 2:
737
+ output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
738
+ else:
739
+ output_cols.extend([
740
+ f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
741
+ ])
742
+ else:
743
+ output_cols = []
744
+
745
+ # Make sure column names are valid snowflake identifiers.
746
+ assert output_cols is not None # Make MyPy happy
747
+ rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
748
+
749
+ return rv
750
+
751
+ def _align_expected_output_names(
752
+ self, method: str, dataset: DataFrame, expected_output_cols_list: List[str], output_cols_prefix: str
753
+ ) -> List[str]:
754
+ # in case the inferred output column names dimension is different
755
+ # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
756
+ output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
757
+ output_df_columns = list(output_df_pd.columns)
758
+ output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
759
+ if self.sample_weight_col:
760
+ output_df_columns_set -= set(self.sample_weight_col)
761
+ # if the dimension of inferred output column names is correct; use it
762
+ if len(expected_output_cols_list) == len(output_df_columns_set):
763
+ return expected_output_cols_list
764
+ # otherwise, use the sklearn estimator's output
765
+ else:
766
+ return sorted(list(output_df_columns_set), key=lambda x: output_df_columns.index(x))
701
767
 
702
768
  @available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
703
769
  @telemetry.send_api_usage_telemetry(
@@ -731,24 +797,26 @@ class GradientBoostingClassifier(BaseTransformer):
731
797
  # are specific to the type of dataset used.
732
798
  transform_kwargs: BatchInferenceKwargsTypedDict = dict()
733
799
 
800
+ expected_output_cols = self._get_output_column_names(output_cols_prefix)
801
+
734
802
  if isinstance(dataset, DataFrame):
735
- self._deps = self._batch_inference_validate_snowpark(
736
- dataset=dataset,
737
- inference_method=inference_method,
738
- )
739
- assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
803
+ self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
804
+ self._deps = self._get_dependencies()
805
+ assert isinstance(
806
+ dataset._session, Session
807
+ ) # mypy does not recognize the check in _batch_inference_validate_snowpark()
740
808
  transform_kwargs = dict(
741
809
  session=dataset._session,
742
810
  dependencies=self._deps,
743
- drop_input_cols = self._drop_input_cols,
811
+ drop_input_cols=self._drop_input_cols,
744
812
  expected_output_cols_type="float",
745
813
  )
814
+ expected_output_cols = self._align_expected_output_names(
815
+ inference_method, dataset, expected_output_cols, output_cols_prefix
816
+ )
746
817
 
747
818
  elif isinstance(dataset, pd.DataFrame):
748
- transform_kwargs = dict(
749
- snowpark_input_cols = self._snowpark_cols,
750
- drop_input_cols = self._drop_input_cols
751
- )
819
+ transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
752
820
 
753
821
  transform_handlers = ModelTransformerBuilder.build(
754
822
  dataset=dataset,
@@ -760,7 +828,7 @@ class GradientBoostingClassifier(BaseTransformer):
760
828
  output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
761
829
  inference_method=inference_method,
762
830
  input_cols=self.input_cols,
763
- expected_output_cols=self._get_output_column_names(output_cols_prefix),
831
+ expected_output_cols=expected_output_cols,
764
832
  **transform_kwargs
765
833
  )
766
834
  return output_df
@@ -792,29 +860,30 @@ class GradientBoostingClassifier(BaseTransformer):
792
860
  Output dataset with log probability of the sample for each class in the model.
793
861
  """
794
862
  super()._check_dataset_type(dataset)
795
- inference_method="predict_log_proba"
863
+ inference_method = "predict_log_proba"
864
+ expected_output_cols = self._get_output_column_names(output_cols_prefix)
796
865
 
797
866
  # This dictionary contains optional kwargs for batch inference. These kwargs
798
867
  # are specific to the type of dataset used.
799
868
  transform_kwargs: BatchInferenceKwargsTypedDict = dict()
800
869
 
801
870
  if isinstance(dataset, DataFrame):
802
- self._deps = self._batch_inference_validate_snowpark(
803
- dataset=dataset,
804
- inference_method=inference_method,
805
- )
806
- assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
871
+ self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
872
+ self._deps = self._get_dependencies()
873
+ assert isinstance(
874
+ dataset._session, Session
875
+ ) # mypy does not recognize the check in _batch_inference_validate_snowpark()
807
876
  transform_kwargs = dict(
808
877
  session=dataset._session,
809
878
  dependencies=self._deps,
810
- drop_input_cols = self._drop_input_cols,
879
+ drop_input_cols=self._drop_input_cols,
811
880
  expected_output_cols_type="float",
812
881
  )
882
+ expected_output_cols = self._align_expected_output_names(
883
+ inference_method, dataset, expected_output_cols, output_cols_prefix
884
+ )
813
885
  elif isinstance(dataset, pd.DataFrame):
814
- transform_kwargs = dict(
815
- snowpark_input_cols = self._snowpark_cols,
816
- drop_input_cols = self._drop_input_cols
817
- )
886
+ transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
818
887
 
819
888
  transform_handlers = ModelTransformerBuilder.build(
820
889
  dataset=dataset,
@@ -827,7 +896,7 @@ class GradientBoostingClassifier(BaseTransformer):
827
896
  output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
828
897
  inference_method=inference_method,
829
898
  input_cols=self.input_cols,
830
- expected_output_cols=self._get_output_column_names(output_cols_prefix),
899
+ expected_output_cols=expected_output_cols,
831
900
  **transform_kwargs
832
901
  )
833
902
  return output_df
@@ -855,30 +924,32 @@ class GradientBoostingClassifier(BaseTransformer):
855
924
  Output dataset with results of the decision function for the samples in input dataset.
856
925
  """
857
926
  super()._check_dataset_type(dataset)
858
- inference_method="decision_function"
927
+ inference_method = "decision_function"
859
928
 
860
929
  # This dictionary contains optional kwargs for batch inference. These kwargs
861
930
  # are specific to the type of dataset used.
862
931
  transform_kwargs: BatchInferenceKwargsTypedDict = dict()
863
932
 
933
+ expected_output_cols = self._get_output_column_names(output_cols_prefix)
934
+
864
935
  if isinstance(dataset, DataFrame):
865
- self._deps = self._batch_inference_validate_snowpark(
866
- dataset=dataset,
867
- inference_method=inference_method,
868
- )
869
- assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
936
+ self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
937
+ self._deps = self._get_dependencies()
938
+ assert isinstance(
939
+ dataset._session, Session
940
+ ) # mypy does not recognize the check in _batch_inference_validate_snowpark()
870
941
  transform_kwargs = dict(
871
942
  session=dataset._session,
872
943
  dependencies=self._deps,
873
- drop_input_cols = self._drop_input_cols,
944
+ drop_input_cols=self._drop_input_cols,
874
945
  expected_output_cols_type="float",
875
946
  )
947
+ expected_output_cols = self._align_expected_output_names(
948
+ inference_method, dataset, expected_output_cols, output_cols_prefix
949
+ )
876
950
 
877
951
  elif isinstance(dataset, pd.DataFrame):
878
- transform_kwargs = dict(
879
- snowpark_input_cols = self._snowpark_cols,
880
- drop_input_cols = self._drop_input_cols
881
- )
952
+ transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
882
953
 
883
954
  transform_handlers = ModelTransformerBuilder.build(
884
955
  dataset=dataset,
@@ -891,7 +962,7 @@ class GradientBoostingClassifier(BaseTransformer):
891
962
  output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
892
963
  inference_method=inference_method,
893
964
  input_cols=self.input_cols,
894
- expected_output_cols=self._get_output_column_names(output_cols_prefix),
965
+ expected_output_cols=expected_output_cols,
895
966
  **transform_kwargs
896
967
  )
897
968
  return output_df
@@ -920,17 +991,17 @@ class GradientBoostingClassifier(BaseTransformer):
920
991
  Output dataset with probability of the sample for each class in the model.
921
992
  """
922
993
  super()._check_dataset_type(dataset)
923
- inference_method="score_samples"
994
+ inference_method = "score_samples"
924
995
 
925
996
  # This dictionary contains optional kwargs for batch inference. These kwargs
926
997
  # are specific to the type of dataset used.
927
998
  transform_kwargs: BatchInferenceKwargsTypedDict = dict()
928
999
 
1000
+ expected_output_cols = self._get_output_column_names(output_cols_prefix)
1001
+
929
1002
  if isinstance(dataset, DataFrame):
930
- self._deps = self._batch_inference_validate_snowpark(
931
- dataset=dataset,
932
- inference_method=inference_method,
933
- )
1003
+ self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
1004
+ self._deps = self._get_dependencies()
934
1005
  assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
935
1006
  transform_kwargs = dict(
936
1007
  session=dataset._session,
@@ -938,6 +1009,9 @@ class GradientBoostingClassifier(BaseTransformer):
938
1009
  drop_input_cols = self._drop_input_cols,
939
1010
  expected_output_cols_type="float",
940
1011
  )
1012
+ expected_output_cols = self._align_expected_output_names(
1013
+ inference_method, dataset, expected_output_cols, output_cols_prefix
1014
+ )
941
1015
 
942
1016
  elif isinstance(dataset, pd.DataFrame):
943
1017
  transform_kwargs = dict(
@@ -956,7 +1030,7 @@ class GradientBoostingClassifier(BaseTransformer):
956
1030
  output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
957
1031
  inference_method=inference_method,
958
1032
  input_cols=self.input_cols,
959
- expected_output_cols=self._get_output_column_names(output_cols_prefix),
1033
+ expected_output_cols=expected_output_cols,
960
1034
  **transform_kwargs
961
1035
  )
962
1036
  return output_df
@@ -991,17 +1065,15 @@ class GradientBoostingClassifier(BaseTransformer):
991
1065
  transform_kwargs: ScoreKwargsTypedDict = dict()
992
1066
 
993
1067
  if isinstance(dataset, DataFrame):
994
- self._deps = self._batch_inference_validate_snowpark(
995
- dataset=dataset,
996
- inference_method="score",
997
- )
1068
+ self._batch_inference_validate_snowpark(dataset=dataset, inference_method="score")
1069
+ self._deps = self._get_dependencies()
998
1070
  selected_cols = self._get_active_columns()
999
1071
  if len(selected_cols) > 0:
1000
1072
  dataset = dataset.select(selected_cols)
1001
1073
  assert isinstance(dataset._session, Session) # keep mypy happy
1002
1074
  transform_kwargs = dict(
1003
1075
  session=dataset._session,
1004
- dependencies=["snowflake-snowpark-python"] + self._deps,
1076
+ dependencies=self._deps,
1005
1077
  score_sproc_imports=['sklearn'],
1006
1078
  )
1007
1079
  elif isinstance(dataset, pd.DataFrame):
@@ -1066,11 +1138,8 @@ class GradientBoostingClassifier(BaseTransformer):
1066
1138
 
1067
1139
  if isinstance(dataset, DataFrame):
1068
1140
 
1069
- self._deps = self._batch_inference_validate_snowpark(
1070
- dataset=dataset,
1071
- inference_method=inference_method,
1072
-
1073
- )
1141
+ self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
1142
+ self._deps = self._get_dependencies()
1074
1143
  assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
1075
1144
  transform_kwargs = dict(
1076
1145
  session = dataset._session,
@@ -1103,50 +1172,84 @@ class GradientBoostingClassifier(BaseTransformer):
1103
1172
  )
1104
1173
  return output_df
1105
1174
 
1175
+
1176
+
1177
+ def to_sklearn(self) -> Any:
1178
+ """Get sklearn.ensemble.GradientBoostingClassifier object.
1179
+ """
1180
+ if self._sklearn_object is None:
1181
+ self._sklearn_object = self._create_sklearn_object()
1182
+ return self._sklearn_object
1183
+
1184
+ def to_xgboost(self) -> Any:
1185
+ raise exceptions.SnowflakeMLException(
1186
+ error_code=error_codes.METHOD_NOT_ALLOWED,
1187
+ original_exception=AttributeError(
1188
+ modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
1189
+ "to_xgboost()",
1190
+ "to_sklearn()"
1191
+ )
1192
+ ),
1193
+ )
1194
+
1195
+ def to_lightgbm(self) -> Any:
1196
+ raise exceptions.SnowflakeMLException(
1197
+ error_code=error_codes.METHOD_NOT_ALLOWED,
1198
+ original_exception=AttributeError(
1199
+ modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
1200
+ "to_lightgbm()",
1201
+ "to_sklearn()"
1202
+ )
1203
+ ),
1204
+ )
1205
+
1206
+ def _get_dependencies(self) -> List[str]:
1207
+ return self._deps
1208
+
1106
1209
 
1107
- def _get_model_signatures(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
1210
+ def _generate_model_signatures(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
1108
1211
  self._model_signature_dict = dict()
1109
1212
 
1110
1213
  PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
1111
1214
 
1112
- inputs = list(_infer_signature(dataset[self.input_cols], "input"))
1215
+ inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
1113
1216
  outputs: List[BaseFeatureSpec] = []
1114
1217
  if hasattr(self, "predict"):
1115
1218
  # keep mypy happy
1116
- assert self._sklearn_object is not None and hasattr(self._sklearn_object, "_estimator_type")
1219
+ assert self._sklearn_object is not None and hasattr(self._sklearn_object, "_estimator_type")
1117
1220
  # For classifier, the type of predict is the same as the type of label
1118
- if self._sklearn_object._estimator_type == 'classifier':
1119
- # label columns is the desired type for output
1221
+ if self._sklearn_object._estimator_type == "classifier":
1222
+ # label columns is the desired type for output
1120
1223
  outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
1121
1224
  # rename the output columns
1122
1225
  outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
1123
- self._model_signature_dict["predict"] = ModelSignature(inputs,
1124
- ([] if self._drop_input_cols else inputs)
1125
- + outputs)
1226
+ self._model_signature_dict["predict"] = ModelSignature(
1227
+ inputs, ([] if self._drop_input_cols else inputs) + outputs
1228
+ )
1126
1229
  # For mixture models that use the density mixin, `predict` returns the argmax of the log prob.
1127
1230
  # For outlier models, returns -1 for outliers and 1 for inliers.
1128
- # Clusterer returns int64 cluster labels.
1231
+ # Clusterer returns int64 cluster labels.
1129
1232
  elif self._sklearn_object._estimator_type in ["DensityEstimator", "clusterer", "outlier_detector"]:
1130
1233
  outputs = [FeatureSpec(dtype=DataType.INT64, name=c) for c in self.output_cols]
1131
- self._model_signature_dict["predict"] = ModelSignature(inputs,
1132
- ([] if self._drop_input_cols else inputs)
1133
- + outputs)
1134
-
1234
+ self._model_signature_dict["predict"] = ModelSignature(
1235
+ inputs, ([] if self._drop_input_cols else inputs) + outputs
1236
+ )
1237
+
1135
1238
  # For regressor, the type of predict is float64
1136
- elif self._sklearn_object._estimator_type == 'regressor':
1239
+ elif self._sklearn_object._estimator_type == "regressor":
1137
1240
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1138
- self._model_signature_dict["predict"] = ModelSignature(inputs,
1139
- ([] if self._drop_input_cols else inputs)
1140
- + outputs)
1141
-
1241
+ self._model_signature_dict["predict"] = ModelSignature(
1242
+ inputs, ([] if self._drop_input_cols else inputs) + outputs
1243
+ )
1244
+
1142
1245
  for prob_func in PROB_FUNCTIONS:
1143
1246
  if hasattr(self, prob_func):
1144
1247
  output_cols_prefix: str = f"{prob_func}_"
1145
1248
  output_column_names = self._get_output_column_names(output_cols_prefix)
1146
1249
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1147
- self._model_signature_dict[prob_func] = ModelSignature(inputs,
1148
- ([] if self._drop_input_cols else inputs)
1149
- + outputs)
1250
+ self._model_signature_dict[prob_func] = ModelSignature(
1251
+ inputs, ([] if self._drop_input_cols else inputs) + outputs
1252
+ )
1150
1253
 
1151
1254
  # Output signature names may still need to be renamed, since they were not created with `_infer_signature`.
1152
1255
  items = list(self._model_signature_dict.items())
@@ -1159,10 +1262,10 @@ class GradientBoostingClassifier(BaseTransformer):
1159
1262
  """Returns model signature of current class.
1160
1263
 
1161
1264
  Raises:
1162
- exceptions.SnowflakeMLException: If estimator is not fitted, then model signature cannot be inferred
1265
+ SnowflakeMLException: If estimator is not fitted, then model signature cannot be inferred
1163
1266
 
1164
1267
  Returns:
1165
- Dict[str, ModelSignature]: each method and its input output signature
1268
+ Dict with each method and its input output signature
1166
1269
  """
1167
1270
  if self._model_signature_dict is None:
1168
1271
  raise exceptions.SnowflakeMLException(
@@ -1170,35 +1273,3 @@ class GradientBoostingClassifier(BaseTransformer):
1170
1273
  original_exception=RuntimeError("Estimator not fitted before accessing property model_signatures!"),
1171
1274
  )
1172
1275
  return self._model_signature_dict
1173
-
1174
- def to_sklearn(self) -> Any:
1175
- """Get sklearn.ensemble.GradientBoostingClassifier object.
1176
- """
1177
- if self._sklearn_object is None:
1178
- self._sklearn_object = self._create_sklearn_object()
1179
- return self._sklearn_object
1180
-
1181
- def to_xgboost(self) -> Any:
1182
- raise exceptions.SnowflakeMLException(
1183
- error_code=error_codes.METHOD_NOT_ALLOWED,
1184
- original_exception=AttributeError(
1185
- modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
1186
- "to_xgboost()",
1187
- "to_sklearn()"
1188
- )
1189
- ),
1190
- )
1191
-
1192
- def to_lightgbm(self) -> Any:
1193
- raise exceptions.SnowflakeMLException(
1194
- error_code=error_codes.METHOD_NOT_ALLOWED,
1195
- original_exception=AttributeError(
1196
- modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
1197
- "to_lightgbm()",
1198
- "to_sklearn()"
1199
- )
1200
- ),
1201
- )
1202
-
1203
- def _get_dependencies(self) -> List[str]:
1204
- return self._deps