snowflake-ml-python 1.3.1__py3-none-any.whl → 1.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (219) hide show
  1. snowflake/ml/_internal/env_utils.py +11 -1
  2. snowflake/ml/_internal/human_readable_id/adjectives.txt +128 -0
  3. snowflake/ml/_internal/human_readable_id/animals.txt +128 -0
  4. snowflake/ml/_internal/human_readable_id/hrid_generator.py +40 -0
  5. snowflake/ml/_internal/human_readable_id/hrid_generator_base.py +135 -0
  6. snowflake/ml/_internal/utils/formatting.py +1 -1
  7. snowflake/ml/_internal/utils/identifier.py +3 -1
  8. snowflake/ml/_internal/utils/sql_identifier.py +2 -6
  9. snowflake/ml/feature_store/feature_store.py +166 -184
  10. snowflake/ml/feature_store/feature_view.py +12 -24
  11. snowflake/ml/fileset/sfcfs.py +56 -50
  12. snowflake/ml/fileset/stage_fs.py +48 -13
  13. snowflake/ml/model/_client/model/model_version_impl.py +6 -49
  14. snowflake/ml/model/_client/ops/model_ops.py +78 -29
  15. snowflake/ml/model/_client/sql/model.py +23 -2
  16. snowflake/ml/model/_client/sql/model_version.py +22 -1
  17. snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +1 -3
  18. snowflake/ml/model/_deploy_client/snowservice/deploy.py +5 -2
  19. snowflake/ml/model/_model_composer/model_composer.py +7 -5
  20. snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +19 -54
  21. snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +8 -1
  22. snowflake/ml/model/_model_composer/model_method/infer_table_function.py_template +1 -1
  23. snowflake/ml/model/_model_composer/model_method/model_method.py +6 -10
  24. snowflake/ml/model/_packager/model_handlers/catboost.py +206 -0
  25. snowflake/ml/model/_packager/model_handlers/lightgbm.py +218 -0
  26. snowflake/ml/model/_packager/model_handlers/sklearn.py +3 -0
  27. snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +13 -1
  28. snowflake/ml/model/_packager/model_handlers/xgboost.py +1 -1
  29. snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
  30. snowflake/ml/model/_packager/model_meta/model_meta.py +36 -6
  31. snowflake/ml/model/_packager/model_meta/model_meta_schema.py +20 -1
  32. snowflake/ml/model/_packager/model_meta_migrator/migrator_plans.py +3 -1
  33. snowflake/ml/model/_packager/model_packager.py +2 -2
  34. snowflake/ml/model/{_model_composer/model_runtime/_runtime_requirements.py → _packager/model_runtime/_snowml_inference_alternative_requirements.py} +1 -1
  35. snowflake/ml/model/_packager/model_runtime/model_runtime.py +137 -0
  36. snowflake/ml/model/custom_model.py +3 -1
  37. snowflake/ml/model/type_hints.py +21 -2
  38. snowflake/ml/modeling/_internal/estimator_utils.py +16 -11
  39. snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +4 -1
  40. snowflake/ml/modeling/_internal/model_specifications.py +3 -1
  41. snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +545 -0
  42. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +8 -5
  43. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +195 -123
  44. snowflake/ml/modeling/cluster/affinity_propagation.py +195 -123
  45. snowflake/ml/modeling/cluster/agglomerative_clustering.py +195 -123
  46. snowflake/ml/modeling/cluster/birch.py +195 -123
  47. snowflake/ml/modeling/cluster/bisecting_k_means.py +195 -123
  48. snowflake/ml/modeling/cluster/dbscan.py +195 -123
  49. snowflake/ml/modeling/cluster/feature_agglomeration.py +195 -123
  50. snowflake/ml/modeling/cluster/k_means.py +195 -123
  51. snowflake/ml/modeling/cluster/mean_shift.py +195 -123
  52. snowflake/ml/modeling/cluster/mini_batch_k_means.py +195 -123
  53. snowflake/ml/modeling/cluster/optics.py +195 -123
  54. snowflake/ml/modeling/cluster/spectral_biclustering.py +195 -123
  55. snowflake/ml/modeling/cluster/spectral_clustering.py +195 -123
  56. snowflake/ml/modeling/cluster/spectral_coclustering.py +195 -123
  57. snowflake/ml/modeling/compose/column_transformer.py +195 -123
  58. snowflake/ml/modeling/compose/transformed_target_regressor.py +195 -123
  59. snowflake/ml/modeling/covariance/elliptic_envelope.py +195 -123
  60. snowflake/ml/modeling/covariance/empirical_covariance.py +195 -123
  61. snowflake/ml/modeling/covariance/graphical_lasso.py +195 -123
  62. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +195 -123
  63. snowflake/ml/modeling/covariance/ledoit_wolf.py +195 -123
  64. snowflake/ml/modeling/covariance/min_cov_det.py +195 -123
  65. snowflake/ml/modeling/covariance/oas.py +195 -123
  66. snowflake/ml/modeling/covariance/shrunk_covariance.py +195 -123
  67. snowflake/ml/modeling/decomposition/dictionary_learning.py +195 -123
  68. snowflake/ml/modeling/decomposition/factor_analysis.py +195 -123
  69. snowflake/ml/modeling/decomposition/fast_ica.py +195 -123
  70. snowflake/ml/modeling/decomposition/incremental_pca.py +195 -123
  71. snowflake/ml/modeling/decomposition/kernel_pca.py +195 -123
  72. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +195 -123
  73. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +195 -123
  74. snowflake/ml/modeling/decomposition/pca.py +195 -123
  75. snowflake/ml/modeling/decomposition/sparse_pca.py +195 -123
  76. snowflake/ml/modeling/decomposition/truncated_svd.py +195 -123
  77. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +195 -123
  78. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +195 -123
  79. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +195 -123
  80. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +195 -123
  81. snowflake/ml/modeling/ensemble/bagging_classifier.py +195 -123
  82. snowflake/ml/modeling/ensemble/bagging_regressor.py +195 -123
  83. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +195 -123
  84. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +195 -123
  85. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +195 -123
  86. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +195 -123
  87. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +195 -123
  88. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +195 -123
  89. snowflake/ml/modeling/ensemble/isolation_forest.py +195 -123
  90. snowflake/ml/modeling/ensemble/random_forest_classifier.py +195 -123
  91. snowflake/ml/modeling/ensemble/random_forest_regressor.py +195 -123
  92. snowflake/ml/modeling/ensemble/stacking_regressor.py +195 -123
  93. snowflake/ml/modeling/ensemble/voting_classifier.py +195 -123
  94. snowflake/ml/modeling/ensemble/voting_regressor.py +195 -123
  95. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +195 -123
  96. snowflake/ml/modeling/feature_selection/select_fdr.py +195 -123
  97. snowflake/ml/modeling/feature_selection/select_fpr.py +195 -123
  98. snowflake/ml/modeling/feature_selection/select_fwe.py +195 -123
  99. snowflake/ml/modeling/feature_selection/select_k_best.py +195 -123
  100. snowflake/ml/modeling/feature_selection/select_percentile.py +195 -123
  101. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +195 -123
  102. snowflake/ml/modeling/feature_selection/variance_threshold.py +195 -123
  103. snowflake/ml/modeling/framework/_utils.py +8 -1
  104. snowflake/ml/modeling/framework/base.py +24 -6
  105. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +195 -123
  106. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +195 -123
  107. snowflake/ml/modeling/impute/iterative_imputer.py +195 -123
  108. snowflake/ml/modeling/impute/knn_imputer.py +195 -123
  109. snowflake/ml/modeling/impute/missing_indicator.py +195 -123
  110. snowflake/ml/modeling/impute/simple_imputer.py +4 -15
  111. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +195 -123
  112. snowflake/ml/modeling/kernel_approximation/nystroem.py +195 -123
  113. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +195 -123
  114. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +195 -123
  115. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +195 -123
  116. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +195 -123
  117. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +198 -125
  118. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +198 -125
  119. snowflake/ml/modeling/linear_model/ard_regression.py +195 -123
  120. snowflake/ml/modeling/linear_model/bayesian_ridge.py +195 -123
  121. snowflake/ml/modeling/linear_model/elastic_net.py +195 -123
  122. snowflake/ml/modeling/linear_model/elastic_net_cv.py +195 -123
  123. snowflake/ml/modeling/linear_model/gamma_regressor.py +195 -123
  124. snowflake/ml/modeling/linear_model/huber_regressor.py +195 -123
  125. snowflake/ml/modeling/linear_model/lars.py +195 -123
  126. snowflake/ml/modeling/linear_model/lars_cv.py +195 -123
  127. snowflake/ml/modeling/linear_model/lasso.py +195 -123
  128. snowflake/ml/modeling/linear_model/lasso_cv.py +195 -123
  129. snowflake/ml/modeling/linear_model/lasso_lars.py +195 -123
  130. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +195 -123
  131. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +195 -123
  132. snowflake/ml/modeling/linear_model/linear_regression.py +195 -123
  133. snowflake/ml/modeling/linear_model/logistic_regression.py +195 -123
  134. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +195 -123
  135. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +195 -123
  136. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +195 -123
  137. snowflake/ml/modeling/linear_model/multi_task_lasso.py +195 -123
  138. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +195 -123
  139. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +195 -123
  140. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +195 -123
  141. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +195 -123
  142. snowflake/ml/modeling/linear_model/perceptron.py +195 -123
  143. snowflake/ml/modeling/linear_model/poisson_regressor.py +195 -123
  144. snowflake/ml/modeling/linear_model/ransac_regressor.py +195 -123
  145. snowflake/ml/modeling/linear_model/ridge.py +195 -123
  146. snowflake/ml/modeling/linear_model/ridge_classifier.py +195 -123
  147. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +195 -123
  148. snowflake/ml/modeling/linear_model/ridge_cv.py +195 -123
  149. snowflake/ml/modeling/linear_model/sgd_classifier.py +195 -123
  150. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +195 -123
  151. snowflake/ml/modeling/linear_model/sgd_regressor.py +195 -123
  152. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +195 -123
  153. snowflake/ml/modeling/linear_model/tweedie_regressor.py +195 -123
  154. snowflake/ml/modeling/manifold/isomap.py +195 -123
  155. snowflake/ml/modeling/manifold/mds.py +195 -123
  156. snowflake/ml/modeling/manifold/spectral_embedding.py +195 -123
  157. snowflake/ml/modeling/manifold/tsne.py +195 -123
  158. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +195 -123
  159. snowflake/ml/modeling/mixture/gaussian_mixture.py +195 -123
  160. snowflake/ml/modeling/model_selection/grid_search_cv.py +42 -18
  161. snowflake/ml/modeling/model_selection/randomized_search_cv.py +42 -18
  162. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +195 -123
  163. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +195 -123
  164. snowflake/ml/modeling/multiclass/output_code_classifier.py +195 -123
  165. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +195 -123
  166. snowflake/ml/modeling/naive_bayes/categorical_nb.py +195 -123
  167. snowflake/ml/modeling/naive_bayes/complement_nb.py +195 -123
  168. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +195 -123
  169. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +195 -123
  170. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +195 -123
  171. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +195 -123
  172. snowflake/ml/modeling/neighbors/kernel_density.py +195 -123
  173. snowflake/ml/modeling/neighbors/local_outlier_factor.py +195 -123
  174. snowflake/ml/modeling/neighbors/nearest_centroid.py +195 -123
  175. snowflake/ml/modeling/neighbors/nearest_neighbors.py +195 -123
  176. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +195 -123
  177. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +195 -123
  178. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +195 -123
  179. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +195 -123
  180. snowflake/ml/modeling/neural_network/mlp_classifier.py +195 -123
  181. snowflake/ml/modeling/neural_network/mlp_regressor.py +195 -123
  182. snowflake/ml/modeling/pipeline/pipeline.py +4 -4
  183. snowflake/ml/modeling/preprocessing/binarizer.py +1 -5
  184. snowflake/ml/modeling/preprocessing/k_bins_discretizer.py +1 -5
  185. snowflake/ml/modeling/preprocessing/label_encoder.py +1 -5
  186. snowflake/ml/modeling/preprocessing/max_abs_scaler.py +1 -5
  187. snowflake/ml/modeling/preprocessing/min_max_scaler.py +10 -12
  188. snowflake/ml/modeling/preprocessing/normalizer.py +1 -5
  189. snowflake/ml/modeling/preprocessing/one_hot_encoder.py +1 -5
  190. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +1 -5
  191. snowflake/ml/modeling/preprocessing/polynomial_features.py +195 -123
  192. snowflake/ml/modeling/preprocessing/robust_scaler.py +1 -5
  193. snowflake/ml/modeling/preprocessing/standard_scaler.py +11 -11
  194. snowflake/ml/modeling/semi_supervised/label_propagation.py +195 -123
  195. snowflake/ml/modeling/semi_supervised/label_spreading.py +195 -123
  196. snowflake/ml/modeling/svm/linear_svc.py +195 -123
  197. snowflake/ml/modeling/svm/linear_svr.py +195 -123
  198. snowflake/ml/modeling/svm/nu_svc.py +195 -123
  199. snowflake/ml/modeling/svm/nu_svr.py +195 -123
  200. snowflake/ml/modeling/svm/svc.py +195 -123
  201. snowflake/ml/modeling/svm/svr.py +195 -123
  202. snowflake/ml/modeling/tree/decision_tree_classifier.py +195 -123
  203. snowflake/ml/modeling/tree/decision_tree_regressor.py +195 -123
  204. snowflake/ml/modeling/tree/extra_tree_classifier.py +195 -123
  205. snowflake/ml/modeling/tree/extra_tree_regressor.py +195 -123
  206. snowflake/ml/modeling/xgboost/xgb_classifier.py +195 -123
  207. snowflake/ml/modeling/xgboost/xgb_regressor.py +195 -123
  208. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +195 -123
  209. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +195 -123
  210. snowflake/ml/registry/_manager/model_manager.py +5 -1
  211. snowflake/ml/registry/model_registry.py +99 -26
  212. snowflake/ml/registry/registry.py +3 -2
  213. snowflake/ml/version.py +1 -1
  214. {snowflake_ml_python-1.3.1.dist-info → snowflake_ml_python-1.4.1.dist-info}/METADATA +94 -55
  215. {snowflake_ml_python-1.3.1.dist-info → snowflake_ml_python-1.4.1.dist-info}/RECORD +218 -212
  216. snowflake/ml/model/_model_composer/model_runtime/model_runtime.py +0 -97
  217. {snowflake_ml_python-1.3.1.dist-info → snowflake_ml_python-1.4.1.dist-info}/LICENSE.txt +0 -0
  218. {snowflake_ml_python-1.3.1.dist-info → snowflake_ml_python-1.4.1.dist-info}/WHEEL +0 -0
  219. {snowflake_ml_python-1.3.1.dist-info → snowflake_ml_python-1.4.1.dist-info}/top_level.txt +0 -0
@@ -33,6 +33,15 @@ from snowflake.ml.modeling._internal.transformer_protocols import (
33
33
  BatchInferenceKwargsTypedDict,
34
34
  ScoreKwargsTypedDict
35
35
  )
36
+ from snowflake.ml.model._signatures import utils as model_signature_utils
37
+ from snowflake.ml.model.model_signature import (
38
+ BaseFeatureSpec,
39
+ DataType,
40
+ FeatureSpec,
41
+ ModelSignature,
42
+ _infer_signature,
43
+ _rename_signature_with_snowflake_identifiers,
44
+ )
36
45
 
37
46
  from snowflake.ml.modeling._internal.model_transformer_builder import ModelTransformerBuilder
38
47
 
@@ -43,16 +52,6 @@ from snowflake.ml.modeling._internal.estimator_utils import (
43
52
  validate_sklearn_args,
44
53
  )
45
54
 
46
- from snowflake.ml.model.model_signature import (
47
- DataType,
48
- FeatureSpec,
49
- ModelSignature,
50
- _infer_signature,
51
- _rename_signature_with_snowflake_identifiers,
52
- BaseFeatureSpec,
53
- )
54
- from snowflake.ml.model._signatures import utils as model_signature_utils
55
-
56
55
  _PROJECT = "ModelDevelopment"
57
56
  # Derive subproject from module name by removing "sklearn"
58
57
  # and converting module name from underscore to CamelCase
@@ -291,12 +290,7 @@ class GaussianProcessClassifier(BaseTransformer):
291
290
  )
292
291
  return selected_cols
293
292
 
294
- @telemetry.send_api_usage_telemetry(
295
- project=_PROJECT,
296
- subproject=_SUBPROJECT,
297
- custom_tags=dict([("autogen", True)]),
298
- )
299
- def fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "GaussianProcessClassifier":
293
+ def _fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "GaussianProcessClassifier":
300
294
  """Fit Gaussian process classification model
301
295
  For more details on this function, see [sklearn.gaussian_process.GaussianProcessClassifier.fit]
302
296
  (https://scikit-learn.org/stable/modules/generated/sklearn.gaussian_process.GaussianProcessClassifier.html#sklearn.gaussian_process.GaussianProcessClassifier.fit)
@@ -323,12 +317,14 @@ class GaussianProcessClassifier(BaseTransformer):
323
317
 
324
318
  self._snowpark_cols = dataset.select(self.input_cols).columns
325
319
 
326
- # If we are already in a stored procedure, no need to kick off another one.
320
+ # If we are already in a stored procedure, no need to kick off another one.
327
321
  if SNOWML_SPROC_ENV in os.environ:
328
322
  statement_params = telemetry.get_function_usage_statement_params(
329
323
  project=_PROJECT,
330
324
  subproject=_SUBPROJECT,
331
- function_name=telemetry.get_statement_params_full_func_name(inspect.currentframe(), GaussianProcessClassifier.__class__.__name__),
325
+ function_name=telemetry.get_statement_params_full_func_name(
326
+ inspect.currentframe(), GaussianProcessClassifier.__class__.__name__
327
+ ),
332
328
  api_calls=[Session.call],
333
329
  custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
334
330
  )
@@ -349,7 +345,7 @@ class GaussianProcessClassifier(BaseTransformer):
349
345
  )
350
346
  self._sklearn_object = model_trainer.train()
351
347
  self._is_fitted = True
352
- self._get_model_signatures(dataset)
348
+ self._generate_model_signatures(dataset)
353
349
  return self
354
350
 
355
351
  def _batch_inference_validate_snowpark(
@@ -425,7 +421,9 @@ class GaussianProcessClassifier(BaseTransformer):
425
421
  # when it is classifier, infer the datatype from label columns
426
422
  if expected_type_inferred == "" and 'predict' in self.model_signatures:
427
423
  # Batch inference takes a single expected output column type. Use the first columns type for now.
428
- label_cols_signatures = [row for row in self.model_signatures['predict'].outputs if row.name in self.output_cols]
424
+ label_cols_signatures = [
425
+ row for row in self.model_signatures['predict'].outputs if row.name in self.output_cols
426
+ ]
429
427
  if len(label_cols_signatures) == 0:
430
428
  error_str = f"Output columns {self.output_cols} do not match model signatures {self.model_signatures['predict'].outputs}."
431
429
  raise exceptions.SnowflakeMLException(
@@ -433,25 +431,22 @@ class GaussianProcessClassifier(BaseTransformer):
433
431
  original_exception=ValueError(error_str),
434
432
  )
435
433
 
436
- expected_type_inferred = convert_sp_to_sf_type(
437
- label_cols_signatures[0].as_snowpark_type()
438
- )
434
+ expected_type_inferred = convert_sp_to_sf_type(label_cols_signatures[0].as_snowpark_type())
439
435
 
440
436
  self._deps = self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
441
- assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
437
+ assert isinstance(
438
+ dataset._session, Session
439
+ ) # mypy does not recognize the check in _batch_inference_validate_snowpark()
442
440
 
443
441
  transform_kwargs = dict(
444
- session = dataset._session,
445
- dependencies = self._deps,
446
- drop_input_cols = self._drop_input_cols,
447
- expected_output_cols_type = expected_type_inferred,
442
+ session=dataset._session,
443
+ dependencies=self._deps,
444
+ drop_input_cols=self._drop_input_cols,
445
+ expected_output_cols_type=expected_type_inferred,
448
446
  )
449
447
 
450
448
  elif isinstance(dataset, pd.DataFrame):
451
- transform_kwargs = dict(
452
- snowpark_input_cols = self._snowpark_cols,
453
- drop_input_cols = self._drop_input_cols
454
- )
449
+ transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
455
450
 
456
451
  transform_handlers = ModelTransformerBuilder.build(
457
452
  dataset=dataset,
@@ -491,7 +486,7 @@ class GaussianProcessClassifier(BaseTransformer):
491
486
  Transformed dataset.
492
487
  """
493
488
  super()._check_dataset_type(dataset)
494
- inference_method="transform"
489
+ inference_method = "transform"
495
490
 
496
491
  # This dictionary contains optional kwargs for batch inference. These kwargs
497
492
  # are specific to the type of dataset used.
@@ -528,17 +523,14 @@ class GaussianProcessClassifier(BaseTransformer):
528
523
  assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
529
524
 
530
525
  transform_kwargs = dict(
531
- session = dataset._session,
532
- dependencies = self._deps,
533
- drop_input_cols = self._drop_input_cols,
534
- expected_output_cols_type = expected_dtype,
526
+ session=dataset._session,
527
+ dependencies=self._deps,
528
+ drop_input_cols=self._drop_input_cols,
529
+ expected_output_cols_type=expected_dtype,
535
530
  )
536
531
 
537
532
  elif isinstance(dataset, pd.DataFrame):
538
- transform_kwargs = dict(
539
- snowpark_input_cols = self._snowpark_cols,
540
- drop_input_cols = self._drop_input_cols
541
- )
533
+ transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
542
534
 
543
535
  transform_handlers = ModelTransformerBuilder.build(
544
536
  dataset=dataset,
@@ -557,7 +549,11 @@ class GaussianProcessClassifier(BaseTransformer):
557
549
  return output_df
558
550
 
559
551
  @available_if(original_estimator_has_callable("fit_predict")) # type: ignore[misc]
560
- def fit_predict(self, dataset: Union[DataFrame, pd.DataFrame], output_cols_prefix: str = "fit_predict_",) -> Union[DataFrame, pd.DataFrame]:
552
+ def fit_predict(
553
+ self,
554
+ dataset: Union[DataFrame, pd.DataFrame],
555
+ output_cols_prefix: str = "fit_predict_",
556
+ ) -> Union[DataFrame, pd.DataFrame]:
561
557
  """ Method not supported for this class.
562
558
 
563
559
 
@@ -582,7 +578,9 @@ class GaussianProcessClassifier(BaseTransformer):
582
578
  )
583
579
  output_result, fitted_estimator = model_trainer.train_fit_predict(
584
580
  drop_input_cols=self._drop_input_cols,
585
- expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
581
+ expected_output_cols_list=(
582
+ self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix)
583
+ ),
586
584
  )
587
585
  self._sklearn_object = fitted_estimator
588
586
  self._is_fitted = True
@@ -599,6 +597,62 @@ class GaussianProcessClassifier(BaseTransformer):
599
597
  assert self._sklearn_object is not None
600
598
  return self._sklearn_object.embedding_
601
599
 
600
+
601
+ def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
602
+ """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
603
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
604
+ """
605
+ output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
606
+ # The following condition is introduced for kneighbors methods, and not used in other methods
607
+ if output_cols:
608
+ output_cols = [
609
+ identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
610
+ for c in output_cols
611
+ ]
612
+ elif getattr(self._sklearn_object, "classes_", None) is None:
613
+ output_cols = [output_cols_prefix]
614
+ elif self._sklearn_object is not None:
615
+ classes = self._sklearn_object.classes_
616
+ if isinstance(classes, numpy.ndarray):
617
+ output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
618
+ elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
619
+ # If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
620
+ output_cols = []
621
+ for i, cl in enumerate(classes):
622
+ # For binary classification, there is only one output column for each class
623
+ # ndarray as the two classes are complementary.
624
+ if len(cl) == 2:
625
+ output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
626
+ else:
627
+ output_cols.extend([
628
+ f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
629
+ ])
630
+ else:
631
+ output_cols = []
632
+
633
+ # Make sure column names are valid snowflake identifiers.
634
+ assert output_cols is not None # Make MyPy happy
635
+ rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
636
+
637
+ return rv
638
+
639
+ def _align_expected_output_names(
640
+ self, method: str, dataset: DataFrame, expected_output_cols_list: List[str], output_cols_prefix: str
641
+ ) -> List[str]:
642
+ # in case the inferred output column names dimension is different
643
+ # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
644
+ output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
645
+ output_df_columns = list(output_df_pd.columns)
646
+ output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
647
+ if self.sample_weight_col:
648
+ output_df_columns_set -= set(self.sample_weight_col)
649
+ # if the dimension of inferred output column names is correct; use it
650
+ if len(expected_output_cols_list) == len(output_df_columns_set):
651
+ return expected_output_cols_list
652
+ # otherwise, use the sklearn estimator's output
653
+ else:
654
+ return sorted(list(output_df_columns_set), key=lambda x: output_df_columns.index(x))
655
+
602
656
  @available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
603
657
  @telemetry.send_api_usage_telemetry(
604
658
  project=_PROJECT,
@@ -631,24 +685,28 @@ class GaussianProcessClassifier(BaseTransformer):
631
685
  # are specific to the type of dataset used.
632
686
  transform_kwargs: BatchInferenceKwargsTypedDict = dict()
633
687
 
688
+ expected_output_cols = self._get_output_column_names(output_cols_prefix)
689
+
634
690
  if isinstance(dataset, DataFrame):
635
691
  self._deps = self._batch_inference_validate_snowpark(
636
692
  dataset=dataset,
637
693
  inference_method=inference_method,
638
694
  )
639
- assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
695
+ assert isinstance(
696
+ dataset._session, Session
697
+ ) # mypy does not recognize the check in _batch_inference_validate_snowpark()
640
698
  transform_kwargs = dict(
641
699
  session=dataset._session,
642
700
  dependencies=self._deps,
643
- drop_input_cols = self._drop_input_cols,
701
+ drop_input_cols=self._drop_input_cols,
644
702
  expected_output_cols_type="float",
645
703
  )
704
+ expected_output_cols = self._align_expected_output_names(
705
+ inference_method, dataset, expected_output_cols, output_cols_prefix
706
+ )
646
707
 
647
708
  elif isinstance(dataset, pd.DataFrame):
648
- transform_kwargs = dict(
649
- snowpark_input_cols = self._snowpark_cols,
650
- drop_input_cols = self._drop_input_cols
651
- )
709
+ transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
652
710
 
653
711
  transform_handlers = ModelTransformerBuilder.build(
654
712
  dataset=dataset,
@@ -660,7 +718,7 @@ class GaussianProcessClassifier(BaseTransformer):
660
718
  output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
661
719
  inference_method=inference_method,
662
720
  input_cols=self.input_cols,
663
- expected_output_cols=self._get_output_column_names(output_cols_prefix),
721
+ expected_output_cols=expected_output_cols,
664
722
  **transform_kwargs
665
723
  )
666
724
  return output_df
@@ -692,7 +750,8 @@ class GaussianProcessClassifier(BaseTransformer):
692
750
  Output dataset with log probability of the sample for each class in the model.
693
751
  """
694
752
  super()._check_dataset_type(dataset)
695
- inference_method="predict_log_proba"
753
+ inference_method = "predict_log_proba"
754
+ expected_output_cols = self._get_output_column_names(output_cols_prefix)
696
755
 
697
756
  # This dictionary contains optional kwargs for batch inference. These kwargs
698
757
  # are specific to the type of dataset used.
@@ -703,18 +762,20 @@ class GaussianProcessClassifier(BaseTransformer):
703
762
  dataset=dataset,
704
763
  inference_method=inference_method,
705
764
  )
706
- assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
765
+ assert isinstance(
766
+ dataset._session, Session
767
+ ) # mypy does not recognize the check in _batch_inference_validate_snowpark()
707
768
  transform_kwargs = dict(
708
769
  session=dataset._session,
709
770
  dependencies=self._deps,
710
- drop_input_cols = self._drop_input_cols,
771
+ drop_input_cols=self._drop_input_cols,
711
772
  expected_output_cols_type="float",
712
773
  )
774
+ expected_output_cols = self._align_expected_output_names(
775
+ inference_method, dataset, expected_output_cols, output_cols_prefix
776
+ )
713
777
  elif isinstance(dataset, pd.DataFrame):
714
- transform_kwargs = dict(
715
- snowpark_input_cols = self._snowpark_cols,
716
- drop_input_cols = self._drop_input_cols
717
- )
778
+ transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
718
779
 
719
780
  transform_handlers = ModelTransformerBuilder.build(
720
781
  dataset=dataset,
@@ -727,7 +788,7 @@ class GaussianProcessClassifier(BaseTransformer):
727
788
  output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
728
789
  inference_method=inference_method,
729
790
  input_cols=self.input_cols,
730
- expected_output_cols=self._get_output_column_names(output_cols_prefix),
791
+ expected_output_cols=expected_output_cols,
731
792
  **transform_kwargs
732
793
  )
733
794
  return output_df
@@ -753,30 +814,34 @@ class GaussianProcessClassifier(BaseTransformer):
753
814
  Output dataset with results of the decision function for the samples in input dataset.
754
815
  """
755
816
  super()._check_dataset_type(dataset)
756
- inference_method="decision_function"
817
+ inference_method = "decision_function"
757
818
 
758
819
  # This dictionary contains optional kwargs for batch inference. These kwargs
759
820
  # are specific to the type of dataset used.
760
821
  transform_kwargs: BatchInferenceKwargsTypedDict = dict()
761
822
 
823
+ expected_output_cols = self._get_output_column_names(output_cols_prefix)
824
+
762
825
  if isinstance(dataset, DataFrame):
763
826
  self._deps = self._batch_inference_validate_snowpark(
764
827
  dataset=dataset,
765
828
  inference_method=inference_method,
766
829
  )
767
- assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
830
+ assert isinstance(
831
+ dataset._session, Session
832
+ ) # mypy does not recognize the check in _batch_inference_validate_snowpark()
768
833
  transform_kwargs = dict(
769
834
  session=dataset._session,
770
835
  dependencies=self._deps,
771
- drop_input_cols = self._drop_input_cols,
836
+ drop_input_cols=self._drop_input_cols,
772
837
  expected_output_cols_type="float",
773
838
  )
839
+ expected_output_cols = self._align_expected_output_names(
840
+ inference_method, dataset, expected_output_cols, output_cols_prefix
841
+ )
774
842
 
775
843
  elif isinstance(dataset, pd.DataFrame):
776
- transform_kwargs = dict(
777
- snowpark_input_cols = self._snowpark_cols,
778
- drop_input_cols = self._drop_input_cols
779
- )
844
+ transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
780
845
 
781
846
  transform_handlers = ModelTransformerBuilder.build(
782
847
  dataset=dataset,
@@ -789,7 +854,7 @@ class GaussianProcessClassifier(BaseTransformer):
789
854
  output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
790
855
  inference_method=inference_method,
791
856
  input_cols=self.input_cols,
792
- expected_output_cols=self._get_output_column_names(output_cols_prefix),
857
+ expected_output_cols=expected_output_cols,
793
858
  **transform_kwargs
794
859
  )
795
860
  return output_df
@@ -818,12 +883,14 @@ class GaussianProcessClassifier(BaseTransformer):
818
883
  Output dataset with probability of the sample for each class in the model.
819
884
  """
820
885
  super()._check_dataset_type(dataset)
821
- inference_method="score_samples"
886
+ inference_method = "score_samples"
822
887
 
823
888
  # This dictionary contains optional kwargs for batch inference. These kwargs
824
889
  # are specific to the type of dataset used.
825
890
  transform_kwargs: BatchInferenceKwargsTypedDict = dict()
826
891
 
892
+ expected_output_cols = self._get_output_column_names(output_cols_prefix)
893
+
827
894
  if isinstance(dataset, DataFrame):
828
895
  self._deps = self._batch_inference_validate_snowpark(
829
896
  dataset=dataset,
@@ -836,6 +903,9 @@ class GaussianProcessClassifier(BaseTransformer):
836
903
  drop_input_cols = self._drop_input_cols,
837
904
  expected_output_cols_type="float",
838
905
  )
906
+ expected_output_cols = self._align_expected_output_names(
907
+ inference_method, dataset, expected_output_cols, output_cols_prefix
908
+ )
839
909
 
840
910
  elif isinstance(dataset, pd.DataFrame):
841
911
  transform_kwargs = dict(
@@ -854,7 +924,7 @@ class GaussianProcessClassifier(BaseTransformer):
854
924
  output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
855
925
  inference_method=inference_method,
856
926
  input_cols=self.input_cols,
857
- expected_output_cols=self._get_output_column_names(output_cols_prefix),
927
+ expected_output_cols=expected_output_cols,
858
928
  **transform_kwargs
859
929
  )
860
930
  return output_df
@@ -1001,50 +1071,84 @@ class GaussianProcessClassifier(BaseTransformer):
1001
1071
  )
1002
1072
  return output_df
1003
1073
 
1074
+
1075
+
1076
+ def to_sklearn(self) -> Any:
1077
+ """Get sklearn.gaussian_process.GaussianProcessClassifier object.
1078
+ """
1079
+ if self._sklearn_object is None:
1080
+ self._sklearn_object = self._create_sklearn_object()
1081
+ return self._sklearn_object
1082
+
1083
+ def to_xgboost(self) -> Any:
1084
+ raise exceptions.SnowflakeMLException(
1085
+ error_code=error_codes.METHOD_NOT_ALLOWED,
1086
+ original_exception=AttributeError(
1087
+ modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
1088
+ "to_xgboost()",
1089
+ "to_sklearn()"
1090
+ )
1091
+ ),
1092
+ )
1093
+
1094
+ def to_lightgbm(self) -> Any:
1095
+ raise exceptions.SnowflakeMLException(
1096
+ error_code=error_codes.METHOD_NOT_ALLOWED,
1097
+ original_exception=AttributeError(
1098
+ modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
1099
+ "to_lightgbm()",
1100
+ "to_sklearn()"
1101
+ )
1102
+ ),
1103
+ )
1004
1104
 
1005
- def _get_model_signatures(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
1105
+ def _get_dependencies(self) -> List[str]:
1106
+ return self._deps
1107
+
1108
+
1109
+ def _generate_model_signatures(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
1006
1110
  self._model_signature_dict = dict()
1007
1111
 
1008
1112
  PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
1009
1113
 
1010
- inputs = list(_infer_signature(dataset[self.input_cols], "input"))
1114
+ inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
1011
1115
  outputs: List[BaseFeatureSpec] = []
1012
1116
  if hasattr(self, "predict"):
1013
1117
  # keep mypy happy
1014
- assert self._sklearn_object is not None and hasattr(self._sklearn_object, "_estimator_type")
1118
+ assert self._sklearn_object is not None and hasattr(self._sklearn_object, "_estimator_type")
1015
1119
  # For classifier, the type of predict is the same as the type of label
1016
- if self._sklearn_object._estimator_type == 'classifier':
1017
- # label columns is the desired type for output
1120
+ if self._sklearn_object._estimator_type == "classifier":
1121
+ # label columns is the desired type for output
1018
1122
  outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
1019
1123
  # rename the output columns
1020
1124
  outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
1021
- self._model_signature_dict["predict"] = ModelSignature(inputs,
1022
- ([] if self._drop_input_cols else inputs)
1023
- + outputs)
1125
+ self._model_signature_dict["predict"] = ModelSignature(
1126
+ inputs, ([] if self._drop_input_cols else inputs) + outputs
1127
+ )
1024
1128
  # For mixture models that use the density mixin, `predict` returns the argmax of the log prob.
1025
1129
  # For outlier models, returns -1 for outliers and 1 for inliers.
1026
- # Clusterer returns int64 cluster labels.
1130
+ # Clusterer returns int64 cluster labels.
1027
1131
  elif self._sklearn_object._estimator_type in ["DensityEstimator", "clusterer", "outlier_detector"]:
1028
1132
  outputs = [FeatureSpec(dtype=DataType.INT64, name=c) for c in self.output_cols]
1029
- self._model_signature_dict["predict"] = ModelSignature(inputs,
1030
- ([] if self._drop_input_cols else inputs)
1031
- + outputs)
1032
-
1133
+ self._model_signature_dict["predict"] = ModelSignature(
1134
+ inputs, ([] if self._drop_input_cols else inputs) + outputs
1135
+ )
1136
+
1033
1137
  # For regressor, the type of predict is float64
1034
- elif self._sklearn_object._estimator_type == 'regressor':
1138
+ elif self._sklearn_object._estimator_type == "regressor":
1035
1139
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1036
- self._model_signature_dict["predict"] = ModelSignature(inputs,
1037
- ([] if self._drop_input_cols else inputs)
1038
- + outputs)
1039
-
1140
+ self._model_signature_dict["predict"] = ModelSignature(
1141
+ inputs, ([] if self._drop_input_cols else inputs) + outputs
1142
+ )
1143
+
1040
1144
  for prob_func in PROB_FUNCTIONS:
1041
1145
  if hasattr(self, prob_func):
1042
1146
  output_cols_prefix: str = f"{prob_func}_"
1043
1147
  output_column_names = self._get_output_column_names(output_cols_prefix)
1044
1148
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1045
- self._model_signature_dict[prob_func] = ModelSignature(inputs,
1046
- ([] if self._drop_input_cols else inputs)
1047
- + outputs)
1149
+ self._model_signature_dict[prob_func] = ModelSignature(
1150
+ inputs, ([] if self._drop_input_cols else inputs) + outputs
1151
+ )
1048
1152
 
1049
1153
  # Output signature names may still need to be renamed, since they were not created with `_infer_signature`.
1050
1154
  items = list(self._model_signature_dict.items())
@@ -1057,10 +1161,10 @@ class GaussianProcessClassifier(BaseTransformer):
1057
1161
  """Returns model signature of current class.
1058
1162
 
1059
1163
  Raises:
1060
- exceptions.SnowflakeMLException: If estimator is not fitted, then model signature cannot be inferred
1164
+ SnowflakeMLException: If estimator is not fitted, then model signature cannot be inferred
1061
1165
 
1062
1166
  Returns:
1063
- Dict[str, ModelSignature]: each method and its input output signature
1167
+ Dict with each method and its input output signature
1064
1168
  """
1065
1169
  if self._model_signature_dict is None:
1066
1170
  raise exceptions.SnowflakeMLException(
@@ -1068,35 +1172,3 @@ class GaussianProcessClassifier(BaseTransformer):
1068
1172
  original_exception=RuntimeError("Estimator not fitted before accessing property model_signatures!"),
1069
1173
  )
1070
1174
  return self._model_signature_dict
1071
-
1072
- def to_sklearn(self) -> Any:
1073
- """Get sklearn.gaussian_process.GaussianProcessClassifier object.
1074
- """
1075
- if self._sklearn_object is None:
1076
- self._sklearn_object = self._create_sklearn_object()
1077
- return self._sklearn_object
1078
-
1079
- def to_xgboost(self) -> Any:
1080
- raise exceptions.SnowflakeMLException(
1081
- error_code=error_codes.METHOD_NOT_ALLOWED,
1082
- original_exception=AttributeError(
1083
- modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
1084
- "to_xgboost()",
1085
- "to_sklearn()"
1086
- )
1087
- ),
1088
- )
1089
-
1090
- def to_lightgbm(self) -> Any:
1091
- raise exceptions.SnowflakeMLException(
1092
- error_code=error_codes.METHOD_NOT_ALLOWED,
1093
- original_exception=AttributeError(
1094
- modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
1095
- "to_lightgbm()",
1096
- "to_sklearn()"
1097
- )
1098
- ),
1099
- )
1100
-
1101
- def _get_dependencies(self) -> List[str]:
1102
- return self._deps