snowflake-ml-python 1.4.0__py3-none-any.whl → 1.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (234) hide show
  1. snowflake/ml/_internal/env_utils.py +77 -32
  2. snowflake/ml/_internal/exceptions/dataset_error_messages.py +5 -0
  3. snowflake/ml/_internal/exceptions/dataset_errors.py +24 -0
  4. snowflake/ml/_internal/exceptions/error_codes.py +3 -0
  5. snowflake/ml/_internal/lineage/data_source.py +10 -0
  6. snowflake/ml/_internal/lineage/dataset_dataframe.py +44 -0
  7. snowflake/ml/_internal/utils/identifier.py +3 -1
  8. snowflake/ml/_internal/utils/sql_identifier.py +2 -6
  9. snowflake/ml/dataset/__init__.py +10 -0
  10. snowflake/ml/dataset/dataset.py +454 -129
  11. snowflake/ml/dataset/dataset_factory.py +53 -0
  12. snowflake/ml/dataset/dataset_metadata.py +103 -0
  13. snowflake/ml/dataset/dataset_reader.py +202 -0
  14. snowflake/ml/feature_store/feature_store.py +531 -332
  15. snowflake/ml/feature_store/feature_view.py +40 -23
  16. snowflake/ml/fileset/embedded_stage_fs.py +146 -0
  17. snowflake/ml/fileset/sfcfs.py +56 -54
  18. snowflake/ml/fileset/snowfs.py +159 -0
  19. snowflake/ml/fileset/stage_fs.py +49 -17
  20. snowflake/ml/model/__init__.py +2 -2
  21. snowflake/ml/model/_api.py +16 -1
  22. snowflake/ml/model/_client/model/model_impl.py +27 -0
  23. snowflake/ml/model/_client/model/model_version_impl.py +137 -50
  24. snowflake/ml/model/_client/ops/model_ops.py +159 -40
  25. snowflake/ml/model/_client/sql/model.py +25 -2
  26. snowflake/ml/model/_client/sql/model_version.py +131 -2
  27. snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +5 -1
  28. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +1 -0
  29. snowflake/ml/model/_deploy_client/snowservice/deploy.py +2 -0
  30. snowflake/ml/model/_deploy_client/utils/constants.py +0 -5
  31. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +21 -50
  32. snowflake/ml/model/_model_composer/model_composer.py +22 -1
  33. snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +38 -51
  34. snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +19 -1
  35. snowflake/ml/model/_model_composer/model_method/model_method.py +6 -10
  36. snowflake/ml/model/_packager/model_env/model_env.py +41 -0
  37. snowflake/ml/model/_packager/model_handlers/catboost.py +206 -0
  38. snowflake/ml/model/_packager/model_handlers/lightgbm.py +218 -0
  39. snowflake/ml/model/_packager/model_handlers/sklearn.py +3 -0
  40. snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
  41. snowflake/ml/model/_packager/model_meta/model_meta.py +37 -11
  42. snowflake/ml/model/_packager/model_meta/model_meta_schema.py +20 -1
  43. snowflake/ml/model/_packager/model_meta_migrator/migrator_plans.py +3 -1
  44. snowflake/ml/model/_packager/model_packager.py +2 -5
  45. snowflake/ml/model/{_model_composer/model_runtime/_runtime_requirements.py → _packager/model_runtime/_snowml_inference_alternative_requirements.py} +1 -1
  46. snowflake/ml/model/_packager/model_runtime/model_runtime.py +137 -0
  47. snowflake/ml/model/type_hints.py +21 -2
  48. snowflake/ml/modeling/_internal/estimator_utils.py +16 -11
  49. snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +4 -1
  50. snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py +55 -3
  51. snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_handlers.py +34 -18
  52. snowflake/ml/modeling/_internal/model_trainer.py +7 -0
  53. snowflake/ml/modeling/_internal/model_trainer_builder.py +42 -9
  54. snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +13 -14
  55. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +29 -7
  56. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +261 -16
  57. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +246 -175
  58. snowflake/ml/modeling/cluster/affinity_propagation.py +246 -175
  59. snowflake/ml/modeling/cluster/agglomerative_clustering.py +246 -175
  60. snowflake/ml/modeling/cluster/birch.py +248 -175
  61. snowflake/ml/modeling/cluster/bisecting_k_means.py +248 -175
  62. snowflake/ml/modeling/cluster/dbscan.py +246 -175
  63. snowflake/ml/modeling/cluster/feature_agglomeration.py +248 -175
  64. snowflake/ml/modeling/cluster/k_means.py +248 -175
  65. snowflake/ml/modeling/cluster/mean_shift.py +246 -175
  66. snowflake/ml/modeling/cluster/mini_batch_k_means.py +248 -175
  67. snowflake/ml/modeling/cluster/optics.py +246 -175
  68. snowflake/ml/modeling/cluster/spectral_biclustering.py +246 -175
  69. snowflake/ml/modeling/cluster/spectral_clustering.py +246 -175
  70. snowflake/ml/modeling/cluster/spectral_coclustering.py +246 -175
  71. snowflake/ml/modeling/compose/column_transformer.py +248 -175
  72. snowflake/ml/modeling/compose/transformed_target_regressor.py +246 -175
  73. snowflake/ml/modeling/covariance/elliptic_envelope.py +246 -175
  74. snowflake/ml/modeling/covariance/empirical_covariance.py +246 -175
  75. snowflake/ml/modeling/covariance/graphical_lasso.py +246 -175
  76. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +246 -175
  77. snowflake/ml/modeling/covariance/ledoit_wolf.py +246 -175
  78. snowflake/ml/modeling/covariance/min_cov_det.py +246 -175
  79. snowflake/ml/modeling/covariance/oas.py +246 -175
  80. snowflake/ml/modeling/covariance/shrunk_covariance.py +246 -175
  81. snowflake/ml/modeling/decomposition/dictionary_learning.py +248 -175
  82. snowflake/ml/modeling/decomposition/factor_analysis.py +248 -175
  83. snowflake/ml/modeling/decomposition/fast_ica.py +248 -175
  84. snowflake/ml/modeling/decomposition/incremental_pca.py +248 -175
  85. snowflake/ml/modeling/decomposition/kernel_pca.py +248 -175
  86. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +248 -175
  87. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +248 -175
  88. snowflake/ml/modeling/decomposition/pca.py +248 -175
  89. snowflake/ml/modeling/decomposition/sparse_pca.py +248 -175
  90. snowflake/ml/modeling/decomposition/truncated_svd.py +248 -175
  91. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +248 -175
  92. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +246 -175
  93. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +246 -175
  94. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +246 -175
  95. snowflake/ml/modeling/ensemble/bagging_classifier.py +246 -175
  96. snowflake/ml/modeling/ensemble/bagging_regressor.py +246 -175
  97. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +246 -175
  98. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +246 -175
  99. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +246 -175
  100. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +246 -175
  101. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +246 -175
  102. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +246 -175
  103. snowflake/ml/modeling/ensemble/isolation_forest.py +246 -175
  104. snowflake/ml/modeling/ensemble/random_forest_classifier.py +246 -175
  105. snowflake/ml/modeling/ensemble/random_forest_regressor.py +246 -175
  106. snowflake/ml/modeling/ensemble/stacking_regressor.py +248 -175
  107. snowflake/ml/modeling/ensemble/voting_classifier.py +248 -175
  108. snowflake/ml/modeling/ensemble/voting_regressor.py +248 -175
  109. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +248 -175
  110. snowflake/ml/modeling/feature_selection/select_fdr.py +248 -175
  111. snowflake/ml/modeling/feature_selection/select_fpr.py +248 -175
  112. snowflake/ml/modeling/feature_selection/select_fwe.py +248 -175
  113. snowflake/ml/modeling/feature_selection/select_k_best.py +248 -175
  114. snowflake/ml/modeling/feature_selection/select_percentile.py +248 -175
  115. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +248 -175
  116. snowflake/ml/modeling/feature_selection/variance_threshold.py +248 -175
  117. snowflake/ml/modeling/framework/_utils.py +8 -1
  118. snowflake/ml/modeling/framework/base.py +72 -37
  119. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +246 -175
  120. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +246 -175
  121. snowflake/ml/modeling/impute/iterative_imputer.py +248 -175
  122. snowflake/ml/modeling/impute/knn_imputer.py +248 -175
  123. snowflake/ml/modeling/impute/missing_indicator.py +248 -175
  124. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +248 -175
  125. snowflake/ml/modeling/kernel_approximation/nystroem.py +248 -175
  126. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +248 -175
  127. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +248 -175
  128. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +248 -175
  129. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +246 -175
  130. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +246 -175
  131. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +246 -175
  132. snowflake/ml/modeling/linear_model/ard_regression.py +246 -175
  133. snowflake/ml/modeling/linear_model/bayesian_ridge.py +246 -175
  134. snowflake/ml/modeling/linear_model/elastic_net.py +246 -175
  135. snowflake/ml/modeling/linear_model/elastic_net_cv.py +246 -175
  136. snowflake/ml/modeling/linear_model/gamma_regressor.py +246 -175
  137. snowflake/ml/modeling/linear_model/huber_regressor.py +246 -175
  138. snowflake/ml/modeling/linear_model/lars.py +246 -175
  139. snowflake/ml/modeling/linear_model/lars_cv.py +246 -175
  140. snowflake/ml/modeling/linear_model/lasso.py +246 -175
  141. snowflake/ml/modeling/linear_model/lasso_cv.py +246 -175
  142. snowflake/ml/modeling/linear_model/lasso_lars.py +246 -175
  143. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +246 -175
  144. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +246 -175
  145. snowflake/ml/modeling/linear_model/linear_regression.py +246 -175
  146. snowflake/ml/modeling/linear_model/logistic_regression.py +246 -175
  147. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +246 -175
  148. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +246 -175
  149. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +246 -175
  150. snowflake/ml/modeling/linear_model/multi_task_lasso.py +246 -175
  151. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +246 -175
  152. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +246 -175
  153. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +246 -175
  154. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +246 -175
  155. snowflake/ml/modeling/linear_model/perceptron.py +246 -175
  156. snowflake/ml/modeling/linear_model/poisson_regressor.py +246 -175
  157. snowflake/ml/modeling/linear_model/ransac_regressor.py +246 -175
  158. snowflake/ml/modeling/linear_model/ridge.py +246 -175
  159. snowflake/ml/modeling/linear_model/ridge_classifier.py +246 -175
  160. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +246 -175
  161. snowflake/ml/modeling/linear_model/ridge_cv.py +246 -175
  162. snowflake/ml/modeling/linear_model/sgd_classifier.py +246 -175
  163. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +246 -175
  164. snowflake/ml/modeling/linear_model/sgd_regressor.py +246 -175
  165. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +246 -175
  166. snowflake/ml/modeling/linear_model/tweedie_regressor.py +246 -175
  167. snowflake/ml/modeling/manifold/isomap.py +248 -175
  168. snowflake/ml/modeling/manifold/mds.py +248 -175
  169. snowflake/ml/modeling/manifold/spectral_embedding.py +248 -175
  170. snowflake/ml/modeling/manifold/tsne.py +248 -175
  171. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +246 -175
  172. snowflake/ml/modeling/mixture/gaussian_mixture.py +246 -175
  173. snowflake/ml/modeling/model_selection/grid_search_cv.py +63 -41
  174. snowflake/ml/modeling/model_selection/randomized_search_cv.py +80 -38
  175. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +246 -175
  176. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +246 -175
  177. snowflake/ml/modeling/multiclass/output_code_classifier.py +246 -175
  178. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +246 -175
  179. snowflake/ml/modeling/naive_bayes/categorical_nb.py +246 -175
  180. snowflake/ml/modeling/naive_bayes/complement_nb.py +246 -175
  181. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +246 -175
  182. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +246 -175
  183. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +246 -175
  184. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +246 -175
  185. snowflake/ml/modeling/neighbors/kernel_density.py +246 -175
  186. snowflake/ml/modeling/neighbors/local_outlier_factor.py +246 -175
  187. snowflake/ml/modeling/neighbors/nearest_centroid.py +246 -175
  188. snowflake/ml/modeling/neighbors/nearest_neighbors.py +246 -175
  189. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +248 -175
  190. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +246 -175
  191. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +246 -175
  192. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +248 -175
  193. snowflake/ml/modeling/neural_network/mlp_classifier.py +246 -175
  194. snowflake/ml/modeling/neural_network/mlp_regressor.py +246 -175
  195. snowflake/ml/modeling/pipeline/pipeline.py +517 -35
  196. snowflake/ml/modeling/preprocessing/binarizer.py +1 -5
  197. snowflake/ml/modeling/preprocessing/k_bins_discretizer.py +1 -5
  198. snowflake/ml/modeling/preprocessing/label_encoder.py +1 -5
  199. snowflake/ml/modeling/preprocessing/max_abs_scaler.py +1 -5
  200. snowflake/ml/modeling/preprocessing/min_max_scaler.py +10 -12
  201. snowflake/ml/modeling/preprocessing/normalizer.py +1 -5
  202. snowflake/ml/modeling/preprocessing/one_hot_encoder.py +13 -5
  203. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +1 -5
  204. snowflake/ml/modeling/preprocessing/polynomial_features.py +248 -175
  205. snowflake/ml/modeling/preprocessing/robust_scaler.py +1 -5
  206. snowflake/ml/modeling/preprocessing/standard_scaler.py +11 -11
  207. snowflake/ml/modeling/semi_supervised/label_propagation.py +246 -175
  208. snowflake/ml/modeling/semi_supervised/label_spreading.py +246 -175
  209. snowflake/ml/modeling/svm/linear_svc.py +246 -175
  210. snowflake/ml/modeling/svm/linear_svr.py +246 -175
  211. snowflake/ml/modeling/svm/nu_svc.py +246 -175
  212. snowflake/ml/modeling/svm/nu_svr.py +246 -175
  213. snowflake/ml/modeling/svm/svc.py +246 -175
  214. snowflake/ml/modeling/svm/svr.py +246 -175
  215. snowflake/ml/modeling/tree/decision_tree_classifier.py +246 -175
  216. snowflake/ml/modeling/tree/decision_tree_regressor.py +246 -175
  217. snowflake/ml/modeling/tree/extra_tree_classifier.py +246 -175
  218. snowflake/ml/modeling/tree/extra_tree_regressor.py +246 -175
  219. snowflake/ml/modeling/xgboost/xgb_classifier.py +246 -175
  220. snowflake/ml/modeling/xgboost/xgb_regressor.py +246 -175
  221. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +246 -175
  222. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +246 -175
  223. snowflake/ml/registry/model_registry.py +3 -149
  224. snowflake/ml/registry/registry.py +1 -1
  225. snowflake/ml/version.py +1 -1
  226. {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/METADATA +129 -57
  227. snowflake_ml_python-1.5.0.dist-info/RECORD +380 -0
  228. snowflake/ml/model/_model_composer/model_runtime/model_runtime.py +0 -97
  229. snowflake/ml/registry/_artifact_manager.py +0 -156
  230. snowflake/ml/registry/artifact.py +0 -46
  231. snowflake_ml_python-1.4.0.dist-info/RECORD +0 -370
  232. {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/LICENSE.txt +0 -0
  233. {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/WHEEL +0 -0
  234. {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/top_level.txt +0 -0
@@ -33,6 +33,15 @@ from snowflake.ml.modeling._internal.transformer_protocols import (
33
33
  BatchInferenceKwargsTypedDict,
34
34
  ScoreKwargsTypedDict
35
35
  )
36
+ from snowflake.ml.model._signatures import utils as model_signature_utils
37
+ from snowflake.ml.model.model_signature import (
38
+ BaseFeatureSpec,
39
+ DataType,
40
+ FeatureSpec,
41
+ ModelSignature,
42
+ _infer_signature,
43
+ _rename_signature_with_snowflake_identifiers,
44
+ )
36
45
 
37
46
  from snowflake.ml.modeling._internal.model_transformer_builder import ModelTransformerBuilder
38
47
 
@@ -43,16 +52,6 @@ from snowflake.ml.modeling._internal.estimator_utils import (
43
52
  validate_sklearn_args,
44
53
  )
45
54
 
46
- from snowflake.ml.model.model_signature import (
47
- DataType,
48
- FeatureSpec,
49
- ModelSignature,
50
- _infer_signature,
51
- _rename_signature_with_snowflake_identifiers,
52
- BaseFeatureSpec,
53
- )
54
- from snowflake.ml.model._signatures import utils as model_signature_utils
55
-
56
55
  _PROJECT = "ModelDevelopment"
57
56
  # Derive subproject from module name by removing "sklearn"
58
57
  # and converting module name from underscore to CamelCase
@@ -61,12 +60,6 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("s
61
60
 
62
61
  DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
63
62
 
64
- def _is_fit_transform_method_enabled() -> Callable[[Any], bool]:
65
- def check(self: BaseTransformer) -> TypeGuard[Callable[..., object]]:
66
- return False and callable(getattr(self._sklearn_object, "fit_transform", None))
67
- return check
68
-
69
-
70
63
  class PassiveAggressiveClassifier(BaseTransformer):
71
64
  r"""Passive Aggressive Classifier
72
65
  For more details on this class, see [sklearn.linear_model.PassiveAggressiveClassifier]
@@ -301,12 +294,7 @@ class PassiveAggressiveClassifier(BaseTransformer):
301
294
  )
302
295
  return selected_cols
303
296
 
304
- @telemetry.send_api_usage_telemetry(
305
- project=_PROJECT,
306
- subproject=_SUBPROJECT,
307
- custom_tags=dict([("autogen", True)]),
308
- )
309
- def fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "PassiveAggressiveClassifier":
297
+ def _fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "PassiveAggressiveClassifier":
310
298
  """Fit linear model with Passive Aggressive algorithm
311
299
  For more details on this function, see [sklearn.linear_model.PassiveAggressiveClassifier.fit]
312
300
  (https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.PassiveAggressiveClassifier.html#sklearn.linear_model.PassiveAggressiveClassifier.fit)
@@ -333,12 +321,14 @@ class PassiveAggressiveClassifier(BaseTransformer):
333
321
 
334
322
  self._snowpark_cols = dataset.select(self.input_cols).columns
335
323
 
336
- # If we are already in a stored procedure, no need to kick off another one.
324
+ # If we are already in a stored procedure, no need to kick off another one.
337
325
  if SNOWML_SPROC_ENV in os.environ:
338
326
  statement_params = telemetry.get_function_usage_statement_params(
339
327
  project=_PROJECT,
340
328
  subproject=_SUBPROJECT,
341
- function_name=telemetry.get_statement_params_full_func_name(inspect.currentframe(), PassiveAggressiveClassifier.__class__.__name__),
329
+ function_name=telemetry.get_statement_params_full_func_name(
330
+ inspect.currentframe(), PassiveAggressiveClassifier.__class__.__name__
331
+ ),
342
332
  api_calls=[Session.call],
343
333
  custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
344
334
  )
@@ -359,27 +349,24 @@ class PassiveAggressiveClassifier(BaseTransformer):
359
349
  )
360
350
  self._sklearn_object = model_trainer.train()
361
351
  self._is_fitted = True
362
- self._get_model_signatures(dataset)
352
+ self._generate_model_signatures(dataset)
363
353
  return self
364
354
 
365
355
  def _batch_inference_validate_snowpark(
366
356
  self,
367
357
  dataset: DataFrame,
368
358
  inference_method: str,
369
- ) -> List[str]:
370
- """Util method to run validate that batch inference can be run on a snowpark dataframe and
371
- return the available package that exists in the snowflake anaconda channel
359
+ ) -> None:
360
+ """Util method to run validate that batch inference can be run on a snowpark dataframe.
372
361
 
373
362
  Args:
374
363
  dataset: snowpark dataframe
375
364
  inference_method: the inference method such as predict, score...
376
-
365
+
377
366
  Raises:
378
367
  SnowflakeMLException: If the estimator is not fitted, raise error
379
368
  SnowflakeMLException: If the session is None, raise error
380
369
 
381
- Returns:
382
- A list of available package that exists in the snowflake anaconda channel
383
370
  """
384
371
  if not self._is_fitted:
385
372
  raise exceptions.SnowflakeMLException(
@@ -397,9 +384,7 @@ class PassiveAggressiveClassifier(BaseTransformer):
397
384
  "Session must not specified for snowpark dataset."
398
385
  ),
399
386
  )
400
- # Validate that key package version in user workspace are supported in snowflake conda channel
401
- return pkg_version_utils.get_valid_pkg_versions_supported_in_snowflake_conda_channel(
402
- pkg_versions=self._get_dependencies(), session=session, subproject=_SUBPROJECT)
387
+
403
388
 
404
389
  @available_if(original_estimator_has_callable("predict")) # type: ignore[misc]
405
390
  @telemetry.send_api_usage_telemetry(
@@ -435,7 +420,9 @@ class PassiveAggressiveClassifier(BaseTransformer):
435
420
  # when it is classifier, infer the datatype from label columns
436
421
  if expected_type_inferred == "" and 'predict' in self.model_signatures:
437
422
  # Batch inference takes a single expected output column type. Use the first columns type for now.
438
- label_cols_signatures = [row for row in self.model_signatures['predict'].outputs if row.name in self.output_cols]
423
+ label_cols_signatures = [
424
+ row for row in self.model_signatures['predict'].outputs if row.name in self.output_cols
425
+ ]
439
426
  if len(label_cols_signatures) == 0:
440
427
  error_str = f"Output columns {self.output_cols} do not match model signatures {self.model_signatures['predict'].outputs}."
441
428
  raise exceptions.SnowflakeMLException(
@@ -443,25 +430,23 @@ class PassiveAggressiveClassifier(BaseTransformer):
443
430
  original_exception=ValueError(error_str),
444
431
  )
445
432
 
446
- expected_type_inferred = convert_sp_to_sf_type(
447
- label_cols_signatures[0].as_snowpark_type()
448
- )
433
+ expected_type_inferred = convert_sp_to_sf_type(label_cols_signatures[0].as_snowpark_type())
449
434
 
450
- self._deps = self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
451
- assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
435
+ self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
436
+ self._deps = self._get_dependencies()
437
+ assert isinstance(
438
+ dataset._session, Session
439
+ ) # mypy does not recognize the check in _batch_inference_validate_snowpark()
452
440
 
453
441
  transform_kwargs = dict(
454
- session = dataset._session,
455
- dependencies = self._deps,
456
- drop_input_cols = self._drop_input_cols,
457
- expected_output_cols_type = expected_type_inferred,
442
+ session=dataset._session,
443
+ dependencies=self._deps,
444
+ drop_input_cols=self._drop_input_cols,
445
+ expected_output_cols_type=expected_type_inferred,
458
446
  )
459
447
 
460
448
  elif isinstance(dataset, pd.DataFrame):
461
- transform_kwargs = dict(
462
- snowpark_input_cols = self._snowpark_cols,
463
- drop_input_cols = self._drop_input_cols
464
- )
449
+ transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
465
450
 
466
451
  transform_handlers = ModelTransformerBuilder.build(
467
452
  dataset=dataset,
@@ -501,7 +486,7 @@ class PassiveAggressiveClassifier(BaseTransformer):
501
486
  Transformed dataset.
502
487
  """
503
488
  super()._check_dataset_type(dataset)
504
- inference_method="transform"
489
+ inference_method = "transform"
505
490
 
506
491
  # This dictionary contains optional kwargs for batch inference. These kwargs
507
492
  # are specific to the type of dataset used.
@@ -531,24 +516,19 @@ class PassiveAggressiveClassifier(BaseTransformer):
531
516
  if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
532
517
  expected_dtype = convert_sp_to_sf_type(output_types[0])
533
518
 
534
- self._deps = self._batch_inference_validate_snowpark(
535
- dataset=dataset,
536
- inference_method=inference_method,
537
- )
519
+ self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
520
+ self._deps = self._get_dependencies()
538
521
  assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
539
522
 
540
523
  transform_kwargs = dict(
541
- session = dataset._session,
542
- dependencies = self._deps,
543
- drop_input_cols = self._drop_input_cols,
544
- expected_output_cols_type = expected_dtype,
524
+ session=dataset._session,
525
+ dependencies=self._deps,
526
+ drop_input_cols=self._drop_input_cols,
527
+ expected_output_cols_type=expected_dtype,
545
528
  )
546
529
 
547
530
  elif isinstance(dataset, pd.DataFrame):
548
- transform_kwargs = dict(
549
- snowpark_input_cols = self._snowpark_cols,
550
- drop_input_cols = self._drop_input_cols
551
- )
531
+ transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
552
532
 
553
533
  transform_handlers = ModelTransformerBuilder.build(
554
534
  dataset=dataset,
@@ -567,7 +547,11 @@ class PassiveAggressiveClassifier(BaseTransformer):
567
547
  return output_df
568
548
 
569
549
  @available_if(original_estimator_has_callable("fit_predict")) # type: ignore[misc]
570
- def fit_predict(self, dataset: Union[DataFrame, pd.DataFrame], output_cols_prefix: str = "fit_predict_",) -> Union[DataFrame, pd.DataFrame]:
550
+ def fit_predict(
551
+ self,
552
+ dataset: Union[DataFrame, pd.DataFrame],
553
+ output_cols_prefix: str = "fit_predict_",
554
+ ) -> Union[DataFrame, pd.DataFrame]:
571
555
  """ Method not supported for this class.
572
556
 
573
557
 
@@ -592,22 +576,104 @@ class PassiveAggressiveClassifier(BaseTransformer):
592
576
  )
593
577
  output_result, fitted_estimator = model_trainer.train_fit_predict(
594
578
  drop_input_cols=self._drop_input_cols,
595
- expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
579
+ expected_output_cols_list=(
580
+ self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix)
581
+ ),
596
582
  )
597
583
  self._sklearn_object = fitted_estimator
598
584
  self._is_fitted = True
599
585
  return output_result
600
586
 
587
+
588
+ @available_if(original_estimator_has_callable("fit_transform")) # type: ignore[misc]
589
+ def fit_transform(self, dataset: Union[DataFrame, pd.DataFrame], output_cols_prefix: str = "fit_transform_",) -> Union[DataFrame, pd.DataFrame]:
590
+ """ Method not supported for this class.
591
+
601
592
 
602
- @available_if(_is_fit_transform_method_enabled()) # type: ignore[misc]
603
- def fit_transform(self, dataset: Union[DataFrame, pd.DataFrame]) -> Union[Any, npt.NDArray[Any]]:
604
- """
593
+ Raises:
594
+ TypeError: Supported dataset types: snowpark.DataFrame, pandas.DataFrame.
595
+
596
+ Args:
597
+ dataset: Union[snowflake.snowpark.DataFrame, pandas.DataFrame]
598
+ Snowpark or Pandas DataFrame.
599
+ output_cols_prefix: Prefix for the response columns
605
600
  Returns:
606
601
  Transformed dataset.
607
602
  """
608
- self.fit(dataset)
609
- assert self._sklearn_object is not None
610
- return self._sklearn_object.embedding_
603
+ self._infer_input_output_cols(dataset)
604
+ super()._check_dataset_type(dataset)
605
+ model_trainer = ModelTrainerBuilder.build_fit_transform(
606
+ estimator=self._sklearn_object,
607
+ dataset=dataset,
608
+ input_cols=self.input_cols,
609
+ label_cols=self.label_cols,
610
+ sample_weight_col=self.sample_weight_col,
611
+ autogenerated=self._autogenerated,
612
+ subproject=_SUBPROJECT,
613
+ )
614
+ output_result, fitted_estimator = model_trainer.train_fit_transform(
615
+ drop_input_cols=self._drop_input_cols,
616
+ expected_output_cols_list=self.output_cols,
617
+ )
618
+ self._sklearn_object = fitted_estimator
619
+ self._is_fitted = True
620
+ return output_result
621
+
622
+
623
+ def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
624
+ """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
625
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
626
+ """
627
+ output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
628
+ # The following condition is introduced for kneighbors methods, and not used in other methods
629
+ if output_cols:
630
+ output_cols = [
631
+ identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
632
+ for c in output_cols
633
+ ]
634
+ elif getattr(self._sklearn_object, "classes_", None) is None:
635
+ output_cols = [output_cols_prefix]
636
+ elif self._sklearn_object is not None:
637
+ classes = self._sklearn_object.classes_
638
+ if isinstance(classes, numpy.ndarray):
639
+ output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
640
+ elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
641
+ # If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
642
+ output_cols = []
643
+ for i, cl in enumerate(classes):
644
+ # For binary classification, there is only one output column for each class
645
+ # ndarray as the two classes are complementary.
646
+ if len(cl) == 2:
647
+ output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
648
+ else:
649
+ output_cols.extend([
650
+ f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
651
+ ])
652
+ else:
653
+ output_cols = []
654
+
655
+ # Make sure column names are valid snowflake identifiers.
656
+ assert output_cols is not None # Make MyPy happy
657
+ rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
658
+
659
+ return rv
660
+
661
+ def _align_expected_output_names(
662
+ self, method: str, dataset: DataFrame, expected_output_cols_list: List[str], output_cols_prefix: str
663
+ ) -> List[str]:
664
+ # in case the inferred output column names dimension is different
665
+ # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
666
+ output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
667
+ output_df_columns = list(output_df_pd.columns)
668
+ output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
669
+ if self.sample_weight_col:
670
+ output_df_columns_set -= set(self.sample_weight_col)
671
+ # if the dimension of inferred output column names is correct; use it
672
+ if len(expected_output_cols_list) == len(output_df_columns_set):
673
+ return expected_output_cols_list
674
+ # otherwise, use the sklearn estimator's output
675
+ else:
676
+ return sorted(list(output_df_columns_set), key=lambda x: output_df_columns.index(x))
611
677
 
612
678
  @available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
613
679
  @telemetry.send_api_usage_telemetry(
@@ -639,24 +705,26 @@ class PassiveAggressiveClassifier(BaseTransformer):
639
705
  # are specific to the type of dataset used.
640
706
  transform_kwargs: BatchInferenceKwargsTypedDict = dict()
641
707
 
708
+ expected_output_cols = self._get_output_column_names(output_cols_prefix)
709
+
642
710
  if isinstance(dataset, DataFrame):
643
- self._deps = self._batch_inference_validate_snowpark(
644
- dataset=dataset,
645
- inference_method=inference_method,
646
- )
647
- assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
711
+ self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
712
+ self._deps = self._get_dependencies()
713
+ assert isinstance(
714
+ dataset._session, Session
715
+ ) # mypy does not recognize the check in _batch_inference_validate_snowpark()
648
716
  transform_kwargs = dict(
649
717
  session=dataset._session,
650
718
  dependencies=self._deps,
651
- drop_input_cols = self._drop_input_cols,
719
+ drop_input_cols=self._drop_input_cols,
652
720
  expected_output_cols_type="float",
653
721
  )
722
+ expected_output_cols = self._align_expected_output_names(
723
+ inference_method, dataset, expected_output_cols, output_cols_prefix
724
+ )
654
725
 
655
726
  elif isinstance(dataset, pd.DataFrame):
656
- transform_kwargs = dict(
657
- snowpark_input_cols = self._snowpark_cols,
658
- drop_input_cols = self._drop_input_cols
659
- )
727
+ transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
660
728
 
661
729
  transform_handlers = ModelTransformerBuilder.build(
662
730
  dataset=dataset,
@@ -668,7 +736,7 @@ class PassiveAggressiveClassifier(BaseTransformer):
668
736
  output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
669
737
  inference_method=inference_method,
670
738
  input_cols=self.input_cols,
671
- expected_output_cols=self._get_output_column_names(output_cols_prefix),
739
+ expected_output_cols=expected_output_cols,
672
740
  **transform_kwargs
673
741
  )
674
742
  return output_df
@@ -698,29 +766,30 @@ class PassiveAggressiveClassifier(BaseTransformer):
698
766
  Output dataset with log probability of the sample for each class in the model.
699
767
  """
700
768
  super()._check_dataset_type(dataset)
701
- inference_method="predict_log_proba"
769
+ inference_method = "predict_log_proba"
770
+ expected_output_cols = self._get_output_column_names(output_cols_prefix)
702
771
 
703
772
  # This dictionary contains optional kwargs for batch inference. These kwargs
704
773
  # are specific to the type of dataset used.
705
774
  transform_kwargs: BatchInferenceKwargsTypedDict = dict()
706
775
 
707
776
  if isinstance(dataset, DataFrame):
708
- self._deps = self._batch_inference_validate_snowpark(
709
- dataset=dataset,
710
- inference_method=inference_method,
711
- )
712
- assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
777
+ self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
778
+ self._deps = self._get_dependencies()
779
+ assert isinstance(
780
+ dataset._session, Session
781
+ ) # mypy does not recognize the check in _batch_inference_validate_snowpark()
713
782
  transform_kwargs = dict(
714
783
  session=dataset._session,
715
784
  dependencies=self._deps,
716
- drop_input_cols = self._drop_input_cols,
785
+ drop_input_cols=self._drop_input_cols,
717
786
  expected_output_cols_type="float",
718
787
  )
788
+ expected_output_cols = self._align_expected_output_names(
789
+ inference_method, dataset, expected_output_cols, output_cols_prefix
790
+ )
719
791
  elif isinstance(dataset, pd.DataFrame):
720
- transform_kwargs = dict(
721
- snowpark_input_cols = self._snowpark_cols,
722
- drop_input_cols = self._drop_input_cols
723
- )
792
+ transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
724
793
 
725
794
  transform_handlers = ModelTransformerBuilder.build(
726
795
  dataset=dataset,
@@ -733,7 +802,7 @@ class PassiveAggressiveClassifier(BaseTransformer):
733
802
  output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
734
803
  inference_method=inference_method,
735
804
  input_cols=self.input_cols,
736
- expected_output_cols=self._get_output_column_names(output_cols_prefix),
805
+ expected_output_cols=expected_output_cols,
737
806
  **transform_kwargs
738
807
  )
739
808
  return output_df
@@ -761,30 +830,32 @@ class PassiveAggressiveClassifier(BaseTransformer):
761
830
  Output dataset with results of the decision function for the samples in input dataset.
762
831
  """
763
832
  super()._check_dataset_type(dataset)
764
- inference_method="decision_function"
833
+ inference_method = "decision_function"
765
834
 
766
835
  # This dictionary contains optional kwargs for batch inference. These kwargs
767
836
  # are specific to the type of dataset used.
768
837
  transform_kwargs: BatchInferenceKwargsTypedDict = dict()
769
838
 
839
+ expected_output_cols = self._get_output_column_names(output_cols_prefix)
840
+
770
841
  if isinstance(dataset, DataFrame):
771
- self._deps = self._batch_inference_validate_snowpark(
772
- dataset=dataset,
773
- inference_method=inference_method,
774
- )
775
- assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
842
+ self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
843
+ self._deps = self._get_dependencies()
844
+ assert isinstance(
845
+ dataset._session, Session
846
+ ) # mypy does not recognize the check in _batch_inference_validate_snowpark()
776
847
  transform_kwargs = dict(
777
848
  session=dataset._session,
778
849
  dependencies=self._deps,
779
- drop_input_cols = self._drop_input_cols,
850
+ drop_input_cols=self._drop_input_cols,
780
851
  expected_output_cols_type="float",
781
852
  )
853
+ expected_output_cols = self._align_expected_output_names(
854
+ inference_method, dataset, expected_output_cols, output_cols_prefix
855
+ )
782
856
 
783
857
  elif isinstance(dataset, pd.DataFrame):
784
- transform_kwargs = dict(
785
- snowpark_input_cols = self._snowpark_cols,
786
- drop_input_cols = self._drop_input_cols
787
- )
858
+ transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
788
859
 
789
860
  transform_handlers = ModelTransformerBuilder.build(
790
861
  dataset=dataset,
@@ -797,7 +868,7 @@ class PassiveAggressiveClassifier(BaseTransformer):
797
868
  output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
798
869
  inference_method=inference_method,
799
870
  input_cols=self.input_cols,
800
- expected_output_cols=self._get_output_column_names(output_cols_prefix),
871
+ expected_output_cols=expected_output_cols,
801
872
  **transform_kwargs
802
873
  )
803
874
  return output_df
@@ -826,17 +897,17 @@ class PassiveAggressiveClassifier(BaseTransformer):
826
897
  Output dataset with probability of the sample for each class in the model.
827
898
  """
828
899
  super()._check_dataset_type(dataset)
829
- inference_method="score_samples"
900
+ inference_method = "score_samples"
830
901
 
831
902
  # This dictionary contains optional kwargs for batch inference. These kwargs
832
903
  # are specific to the type of dataset used.
833
904
  transform_kwargs: BatchInferenceKwargsTypedDict = dict()
834
905
 
906
+ expected_output_cols = self._get_output_column_names(output_cols_prefix)
907
+
835
908
  if isinstance(dataset, DataFrame):
836
- self._deps = self._batch_inference_validate_snowpark(
837
- dataset=dataset,
838
- inference_method=inference_method,
839
- )
909
+ self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
910
+ self._deps = self._get_dependencies()
840
911
  assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
841
912
  transform_kwargs = dict(
842
913
  session=dataset._session,
@@ -844,6 +915,9 @@ class PassiveAggressiveClassifier(BaseTransformer):
844
915
  drop_input_cols = self._drop_input_cols,
845
916
  expected_output_cols_type="float",
846
917
  )
918
+ expected_output_cols = self._align_expected_output_names(
919
+ inference_method, dataset, expected_output_cols, output_cols_prefix
920
+ )
847
921
 
848
922
  elif isinstance(dataset, pd.DataFrame):
849
923
  transform_kwargs = dict(
@@ -862,7 +936,7 @@ class PassiveAggressiveClassifier(BaseTransformer):
862
936
  output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
863
937
  inference_method=inference_method,
864
938
  input_cols=self.input_cols,
865
- expected_output_cols=self._get_output_column_names(output_cols_prefix),
939
+ expected_output_cols=expected_output_cols,
866
940
  **transform_kwargs
867
941
  )
868
942
  return output_df
@@ -897,17 +971,15 @@ class PassiveAggressiveClassifier(BaseTransformer):
897
971
  transform_kwargs: ScoreKwargsTypedDict = dict()
898
972
 
899
973
  if isinstance(dataset, DataFrame):
900
- self._deps = self._batch_inference_validate_snowpark(
901
- dataset=dataset,
902
- inference_method="score",
903
- )
974
+ self._batch_inference_validate_snowpark(dataset=dataset, inference_method="score")
975
+ self._deps = self._get_dependencies()
904
976
  selected_cols = self._get_active_columns()
905
977
  if len(selected_cols) > 0:
906
978
  dataset = dataset.select(selected_cols)
907
979
  assert isinstance(dataset._session, Session) # keep mypy happy
908
980
  transform_kwargs = dict(
909
981
  session=dataset._session,
910
- dependencies=["snowflake-snowpark-python"] + self._deps,
982
+ dependencies=self._deps,
911
983
  score_sproc_imports=['sklearn'],
912
984
  )
913
985
  elif isinstance(dataset, pd.DataFrame):
@@ -972,11 +1044,8 @@ class PassiveAggressiveClassifier(BaseTransformer):
972
1044
 
973
1045
  if isinstance(dataset, DataFrame):
974
1046
 
975
- self._deps = self._batch_inference_validate_snowpark(
976
- dataset=dataset,
977
- inference_method=inference_method,
978
-
979
- )
1047
+ self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
1048
+ self._deps = self._get_dependencies()
980
1049
  assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
981
1050
  transform_kwargs = dict(
982
1051
  session = dataset._session,
@@ -1009,50 +1078,84 @@ class PassiveAggressiveClassifier(BaseTransformer):
1009
1078
  )
1010
1079
  return output_df
1011
1080
 
1081
+
1082
+
1083
+ def to_sklearn(self) -> Any:
1084
+ """Get sklearn.linear_model.PassiveAggressiveClassifier object.
1085
+ """
1086
+ if self._sklearn_object is None:
1087
+ self._sklearn_object = self._create_sklearn_object()
1088
+ return self._sklearn_object
1089
+
1090
+ def to_xgboost(self) -> Any:
1091
+ raise exceptions.SnowflakeMLException(
1092
+ error_code=error_codes.METHOD_NOT_ALLOWED,
1093
+ original_exception=AttributeError(
1094
+ modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
1095
+ "to_xgboost()",
1096
+ "to_sklearn()"
1097
+ )
1098
+ ),
1099
+ )
1100
+
1101
+ def to_lightgbm(self) -> Any:
1102
+ raise exceptions.SnowflakeMLException(
1103
+ error_code=error_codes.METHOD_NOT_ALLOWED,
1104
+ original_exception=AttributeError(
1105
+ modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
1106
+ "to_lightgbm()",
1107
+ "to_sklearn()"
1108
+ )
1109
+ ),
1110
+ )
1111
+
1112
+ def _get_dependencies(self) -> List[str]:
1113
+ return self._deps
1114
+
1012
1115
 
1013
- def _get_model_signatures(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
1116
+ def _generate_model_signatures(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
1014
1117
  self._model_signature_dict = dict()
1015
1118
 
1016
1119
  PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
1017
1120
 
1018
- inputs = list(_infer_signature(dataset[self.input_cols], "input"))
1121
+ inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
1019
1122
  outputs: List[BaseFeatureSpec] = []
1020
1123
  if hasattr(self, "predict"):
1021
1124
  # keep mypy happy
1022
- assert self._sklearn_object is not None and hasattr(self._sklearn_object, "_estimator_type")
1125
+ assert self._sklearn_object is not None and hasattr(self._sklearn_object, "_estimator_type")
1023
1126
  # For classifier, the type of predict is the same as the type of label
1024
- if self._sklearn_object._estimator_type == 'classifier':
1025
- # label columns is the desired type for output
1127
+ if self._sklearn_object._estimator_type == "classifier":
1128
+ # label columns is the desired type for output
1026
1129
  outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
1027
1130
  # rename the output columns
1028
1131
  outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
1029
- self._model_signature_dict["predict"] = ModelSignature(inputs,
1030
- ([] if self._drop_input_cols else inputs)
1031
- + outputs)
1132
+ self._model_signature_dict["predict"] = ModelSignature(
1133
+ inputs, ([] if self._drop_input_cols else inputs) + outputs
1134
+ )
1032
1135
  # For mixture models that use the density mixin, `predict` returns the argmax of the log prob.
1033
1136
  # For outlier models, returns -1 for outliers and 1 for inliers.
1034
- # Clusterer returns int64 cluster labels.
1137
+ # Clusterer returns int64 cluster labels.
1035
1138
  elif self._sklearn_object._estimator_type in ["DensityEstimator", "clusterer", "outlier_detector"]:
1036
1139
  outputs = [FeatureSpec(dtype=DataType.INT64, name=c) for c in self.output_cols]
1037
- self._model_signature_dict["predict"] = ModelSignature(inputs,
1038
- ([] if self._drop_input_cols else inputs)
1039
- + outputs)
1040
-
1140
+ self._model_signature_dict["predict"] = ModelSignature(
1141
+ inputs, ([] if self._drop_input_cols else inputs) + outputs
1142
+ )
1143
+
1041
1144
  # For regressor, the type of predict is float64
1042
- elif self._sklearn_object._estimator_type == 'regressor':
1145
+ elif self._sklearn_object._estimator_type == "regressor":
1043
1146
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1044
- self._model_signature_dict["predict"] = ModelSignature(inputs,
1045
- ([] if self._drop_input_cols else inputs)
1046
- + outputs)
1047
-
1147
+ self._model_signature_dict["predict"] = ModelSignature(
1148
+ inputs, ([] if self._drop_input_cols else inputs) + outputs
1149
+ )
1150
+
1048
1151
  for prob_func in PROB_FUNCTIONS:
1049
1152
  if hasattr(self, prob_func):
1050
1153
  output_cols_prefix: str = f"{prob_func}_"
1051
1154
  output_column_names = self._get_output_column_names(output_cols_prefix)
1052
1155
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1053
- self._model_signature_dict[prob_func] = ModelSignature(inputs,
1054
- ([] if self._drop_input_cols else inputs)
1055
- + outputs)
1156
+ self._model_signature_dict[prob_func] = ModelSignature(
1157
+ inputs, ([] if self._drop_input_cols else inputs) + outputs
1158
+ )
1056
1159
 
1057
1160
  # Output signature names may still need to be renamed, since they were not created with `_infer_signature`.
1058
1161
  items = list(self._model_signature_dict.items())
@@ -1065,10 +1168,10 @@ class PassiveAggressiveClassifier(BaseTransformer):
1065
1168
  """Returns model signature of current class.
1066
1169
 
1067
1170
  Raises:
1068
- exceptions.SnowflakeMLException: If estimator is not fitted, then model signature cannot be inferred
1171
+ SnowflakeMLException: If estimator is not fitted, then model signature cannot be inferred
1069
1172
 
1070
1173
  Returns:
1071
- Dict[str, ModelSignature]: each method and its input output signature
1174
+ Dict with each method and its input output signature
1072
1175
  """
1073
1176
  if self._model_signature_dict is None:
1074
1177
  raise exceptions.SnowflakeMLException(
@@ -1076,35 +1179,3 @@ class PassiveAggressiveClassifier(BaseTransformer):
1076
1179
  original_exception=RuntimeError("Estimator not fitted before accessing property model_signatures!"),
1077
1180
  )
1078
1181
  return self._model_signature_dict
1079
-
1080
- def to_sklearn(self) -> Any:
1081
- """Get sklearn.linear_model.PassiveAggressiveClassifier object.
1082
- """
1083
- if self._sklearn_object is None:
1084
- self._sklearn_object = self._create_sklearn_object()
1085
- return self._sklearn_object
1086
-
1087
- def to_xgboost(self) -> Any:
1088
- raise exceptions.SnowflakeMLException(
1089
- error_code=error_codes.METHOD_NOT_ALLOWED,
1090
- original_exception=AttributeError(
1091
- modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
1092
- "to_xgboost()",
1093
- "to_sklearn()"
1094
- )
1095
- ),
1096
- )
1097
-
1098
- def to_lightgbm(self) -> Any:
1099
- raise exceptions.SnowflakeMLException(
1100
- error_code=error_codes.METHOD_NOT_ALLOWED,
1101
- original_exception=AttributeError(
1102
- modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
1103
- "to_lightgbm()",
1104
- "to_sklearn()"
1105
- )
1106
- ),
1107
- )
1108
-
1109
- def _get_dependencies(self) -> List[str]:
1110
- return self._deps