snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. snowflake/ml/_internal/env_utils.py +2 -1
  2. snowflake/ml/_internal/file_utils.py +29 -7
  3. snowflake/ml/_internal/telemetry.py +5 -8
  4. snowflake/ml/_internal/utils/uri.py +7 -2
  5. snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
  6. snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
  7. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
  8. snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
  9. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
  10. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
  11. snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
  12. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
  13. snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
  14. snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
  15. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
  16. snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
  17. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
  18. snowflake/ml/model/_deployer.py +14 -27
  19. snowflake/ml/model/_env.py +4 -4
  20. snowflake/ml/model/_handlers/custom.py +14 -2
  21. snowflake/ml/model/_handlers/pytorch.py +186 -0
  22. snowflake/ml/model/_handlers/sklearn.py +14 -9
  23. snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
  24. snowflake/ml/model/_handlers/torchscript.py +180 -0
  25. snowflake/ml/model/_handlers/xgboost.py +19 -9
  26. snowflake/ml/model/_model.py +3 -2
  27. snowflake/ml/model/_model_meta.py +12 -7
  28. snowflake/ml/model/model_signature.py +446 -66
  29. snowflake/ml/model/type_hints.py +23 -4
  30. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
  31. snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
  32. snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
  33. snowflake/ml/modeling/cluster/birch.py +51 -26
  34. snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
  35. snowflake/ml/modeling/cluster/dbscan.py +51 -26
  36. snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
  37. snowflake/ml/modeling/cluster/k_means.py +51 -26
  38. snowflake/ml/modeling/cluster/mean_shift.py +51 -26
  39. snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
  40. snowflake/ml/modeling/cluster/optics.py +51 -26
  41. snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
  42. snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
  43. snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
  44. snowflake/ml/modeling/compose/column_transformer.py +51 -26
  45. snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
  46. snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
  47. snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
  48. snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
  49. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
  50. snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
  51. snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
  52. snowflake/ml/modeling/covariance/oas.py +51 -26
  53. snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
  54. snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
  55. snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
  56. snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
  57. snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
  58. snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
  59. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
  60. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
  61. snowflake/ml/modeling/decomposition/pca.py +51 -26
  62. snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
  63. snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
  64. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
  65. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
  66. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
  67. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
  68. snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
  69. snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
  70. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
  71. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
  72. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
  73. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
  74. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
  75. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
  76. snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
  77. snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
  78. snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
  79. snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
  80. snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
  81. snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
  82. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
  83. snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
  84. snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
  85. snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
  86. snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
  87. snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
  88. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
  89. snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
  90. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
  91. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
  92. snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
  93. snowflake/ml/modeling/impute/knn_imputer.py +51 -26
  94. snowflake/ml/modeling/impute/missing_indicator.py +51 -26
  95. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
  96. snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
  97. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
  98. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
  99. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
  100. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
  101. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
  102. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
  103. snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
  104. snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
  105. snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
  106. snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
  107. snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
  108. snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
  109. snowflake/ml/modeling/linear_model/lars.py +51 -26
  110. snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
  111. snowflake/ml/modeling/linear_model/lasso.py +51 -26
  112. snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
  113. snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
  114. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
  115. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
  116. snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
  117. snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
  118. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
  119. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
  120. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
  121. snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
  122. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
  123. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
  124. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
  125. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
  126. snowflake/ml/modeling/linear_model/perceptron.py +51 -26
  127. snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
  128. snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
  129. snowflake/ml/modeling/linear_model/ridge.py +51 -26
  130. snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
  131. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
  132. snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
  133. snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
  134. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
  135. snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
  136. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
  137. snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
  138. snowflake/ml/modeling/manifold/isomap.py +51 -26
  139. snowflake/ml/modeling/manifold/mds.py +51 -26
  140. snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
  141. snowflake/ml/modeling/manifold/tsne.py +51 -26
  142. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
  143. snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
  144. snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
  145. snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
  146. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
  147. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
  148. snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
  149. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
  150. snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
  151. snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
  152. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
  153. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
  154. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
  155. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
  156. snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
  157. snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
  158. snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
  159. snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
  160. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
  161. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
  162. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
  163. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
  164. snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
  165. snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
  166. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
  167. snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
  168. snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
  169. snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
  170. snowflake/ml/modeling/svm/linear_svc.py +51 -26
  171. snowflake/ml/modeling/svm/linear_svr.py +51 -26
  172. snowflake/ml/modeling/svm/nu_svc.py +51 -26
  173. snowflake/ml/modeling/svm/nu_svr.py +51 -26
  174. snowflake/ml/modeling/svm/svc.py +51 -26
  175. snowflake/ml/modeling/svm/svr.py +51 -26
  176. snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
  177. snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
  178. snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
  179. snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
  180. snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
  181. snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
  182. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
  183. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
  184. snowflake/ml/registry/model_registry.py +74 -56
  185. snowflake/ml/version.py +1 -1
  186. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
  187. snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
  188. snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
  189. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -270,7 +272,6 @@ class GaussianProcessClassifier(BaseTransformer):
270
272
  sample_weight_col: Optional[str] = None,
271
273
  ) -> None:
272
274
  super().__init__()
273
- self.id = str(uuid4()).replace("-", "_").upper()
274
275
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
275
276
 
276
277
  self._deps = list(deps)
@@ -298,6 +299,15 @@ class GaussianProcessClassifier(BaseTransformer):
298
299
  self.set_drop_input_cols(drop_input_cols)
299
300
  self.set_sample_weight_col(sample_weight_col)
300
301
 
302
+ def _get_rand_id(self) -> str:
303
+ """
304
+ Generate random id to be used in sproc and stage names.
305
+
306
+ Returns:
307
+ Random id string usable in sproc, table, and stage names.
308
+ """
309
+ return str(uuid4()).replace("-", "_").upper()
310
+
301
311
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
302
312
  """
303
313
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -376,7 +386,7 @@ class GaussianProcessClassifier(BaseTransformer):
376
386
  cp.dump(self._sklearn_object, local_transform_file)
377
387
 
378
388
  # Create temp stage to run fit.
379
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
389
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
380
390
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
381
391
  SqlResultValidator(
382
392
  session=session,
@@ -389,11 +399,12 @@ class GaussianProcessClassifier(BaseTransformer):
389
399
  expected_value=f"Stage area {transform_stage_name} successfully created."
390
400
  ).validate()
391
401
 
392
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
402
+ # Use posixpath to construct stage paths
403
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
404
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
393
405
  local_result_file_name = get_temp_file_path()
394
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
395
406
 
396
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
407
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
397
408
  statement_params = telemetry.get_function_usage_statement_params(
398
409
  project=_PROJECT,
399
410
  subproject=_SUBPROJECT,
@@ -419,6 +430,7 @@ class GaussianProcessClassifier(BaseTransformer):
419
430
  replace=True,
420
431
  session=session,
421
432
  statement_params=statement_params,
433
+ anonymous=True
422
434
  )
423
435
  def fit_wrapper_sproc(
424
436
  session: Session,
@@ -427,7 +439,8 @@ class GaussianProcessClassifier(BaseTransformer):
427
439
  stage_result_file_name: str,
428
440
  input_cols: List[str],
429
441
  label_cols: List[str],
430
- sample_weight_col: Optional[str]
442
+ sample_weight_col: Optional[str],
443
+ statement_params: Dict[str, str]
431
444
  ) -> str:
432
445
  import cloudpickle as cp
433
446
  import numpy as np
@@ -494,15 +507,15 @@ class GaussianProcessClassifier(BaseTransformer):
494
507
  api_calls=[Session.call],
495
508
  custom_tags=dict([("autogen", True)]),
496
509
  )
497
- sproc_export_file_name = session.call(
498
- fit_sproc_name,
510
+ sproc_export_file_name = fit_wrapper_sproc(
511
+ session,
499
512
  query,
500
513
  stage_transform_file_name,
501
514
  stage_result_file_name,
502
515
  identifier.get_unescaped_names(self.input_cols),
503
516
  identifier.get_unescaped_names(self.label_cols),
504
517
  identifier.get_unescaped_names(self.sample_weight_col),
505
- statement_params=statement_params,
518
+ statement_params,
506
519
  )
507
520
 
508
521
  if "|" in sproc_export_file_name:
@@ -512,7 +525,7 @@ class GaussianProcessClassifier(BaseTransformer):
512
525
  print("\n".join(fields[1:]))
513
526
 
514
527
  session.file.get(
515
- os.path.join(stage_result_file_name, sproc_export_file_name),
528
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
516
529
  local_result_file_name,
517
530
  statement_params=statement_params
518
531
  )
@@ -558,7 +571,7 @@ class GaussianProcessClassifier(BaseTransformer):
558
571
 
559
572
  # Register vectorized UDF for batch inference
560
573
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
561
- safe_id=self.id, method=inference_method)
574
+ safe_id=self._get_rand_id(), method=inference_method)
562
575
 
563
576
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
564
577
  # will try to pickle all of self which fails.
@@ -650,7 +663,7 @@ class GaussianProcessClassifier(BaseTransformer):
650
663
  return transformed_pandas_df.to_dict("records")
651
664
 
652
665
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
653
- safe_id=self.id
666
+ safe_id=self._get_rand_id()
654
667
  )
655
668
 
656
669
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -817,11 +830,18 @@ class GaussianProcessClassifier(BaseTransformer):
817
830
  Transformed dataset.
818
831
  """
819
832
  if isinstance(dataset, DataFrame):
833
+ expected_type_inferred = ""
834
+ # when it is classifier, infer the datatype from label columns
835
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
836
+ expected_type_inferred = convert_sp_to_sf_type(
837
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
838
+ )
839
+
820
840
  output_df = self._batch_inference(
821
841
  dataset=dataset,
822
842
  inference_method="predict",
823
843
  expected_output_cols_list=self.output_cols,
824
- expected_output_cols_type="",
844
+ expected_output_cols_type=expected_type_inferred,
825
845
  )
826
846
  elif isinstance(dataset, pd.DataFrame):
827
847
  output_df = self._sklearn_inference(
@@ -892,10 +912,10 @@ class GaussianProcessClassifier(BaseTransformer):
892
912
 
893
913
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
894
914
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
895
- Returns an empty list if current object is not a classifier or not yet fitted.
915
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
896
916
  """
897
917
  if getattr(self._sklearn_object, "classes_", None) is None:
898
- return []
918
+ return [output_cols_prefix]
899
919
 
900
920
  classes = self._sklearn_object.classes_
901
921
  if isinstance(classes, numpy.ndarray):
@@ -1124,7 +1144,7 @@ class GaussianProcessClassifier(BaseTransformer):
1124
1144
  cp.dump(self._sklearn_object, local_score_file)
1125
1145
 
1126
1146
  # Create temp stage to run score.
1127
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1147
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1128
1148
  session = dataset._session
1129
1149
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1130
1150
  SqlResultValidator(
@@ -1138,8 +1158,9 @@ class GaussianProcessClassifier(BaseTransformer):
1138
1158
  expected_value=f"Stage area {score_stage_name} successfully created."
1139
1159
  ).validate()
1140
1160
 
1141
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1142
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1161
+ # Use posixpath to construct stage paths
1162
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1163
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1143
1164
  statement_params = telemetry.get_function_usage_statement_params(
1144
1165
  project=_PROJECT,
1145
1166
  subproject=_SUBPROJECT,
@@ -1165,6 +1186,7 @@ class GaussianProcessClassifier(BaseTransformer):
1165
1186
  replace=True,
1166
1187
  session=session,
1167
1188
  statement_params=statement_params,
1189
+ anonymous=True
1168
1190
  )
1169
1191
  def score_wrapper_sproc(
1170
1192
  session: Session,
@@ -1172,7 +1194,8 @@ class GaussianProcessClassifier(BaseTransformer):
1172
1194
  stage_score_file_name: str,
1173
1195
  input_cols: List[str],
1174
1196
  label_cols: List[str],
1175
- sample_weight_col: Optional[str]
1197
+ sample_weight_col: Optional[str],
1198
+ statement_params: Dict[str, str]
1176
1199
  ) -> float:
1177
1200
  import cloudpickle as cp
1178
1201
  import numpy as np
@@ -1222,14 +1245,14 @@ class GaussianProcessClassifier(BaseTransformer):
1222
1245
  api_calls=[Session.call],
1223
1246
  custom_tags=dict([("autogen", True)]),
1224
1247
  )
1225
- score = session.call(
1226
- score_sproc_name,
1248
+ score = score_wrapper_sproc(
1249
+ session,
1227
1250
  query,
1228
1251
  stage_score_file_name,
1229
1252
  identifier.get_unescaped_names(self.input_cols),
1230
1253
  identifier.get_unescaped_names(self.label_cols),
1231
1254
  identifier.get_unescaped_names(self.sample_weight_col),
1232
- statement_params=statement_params,
1255
+ statement_params,
1233
1256
  )
1234
1257
 
1235
1258
  cleanup_temp_files([local_score_file_name])
@@ -1247,18 +1270,20 @@ class GaussianProcessClassifier(BaseTransformer):
1247
1270
  if self._sklearn_object._estimator_type == 'classifier':
1248
1271
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1249
1272
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1250
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1273
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1274
+ ([] if self._drop_input_cols else inputs) + outputs)
1251
1275
  # For regressor, the type of predict is float64
1252
1276
  elif self._sklearn_object._estimator_type == 'regressor':
1253
1277
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1254
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1255
-
1278
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1279
+ ([] if self._drop_input_cols else inputs) + outputs)
1256
1280
  for prob_func in PROB_FUNCTIONS:
1257
1281
  if hasattr(self, prob_func):
1258
1282
  output_cols_prefix: str = f"{prob_func}_"
1259
1283
  output_column_names = self._get_output_column_names(output_cols_prefix)
1260
1284
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1261
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1285
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1286
+ ([] if self._drop_input_cols else inputs) + outputs)
1262
1287
 
1263
1288
  @property
1264
1289
  def model_signatures(self) -> Dict[str, ModelSignature]:
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -255,7 +257,6 @@ class GaussianProcessRegressor(BaseTransformer):
255
257
  sample_weight_col: Optional[str] = None,
256
258
  ) -> None:
257
259
  super().__init__()
258
- self.id = str(uuid4()).replace("-", "_").upper()
259
260
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
260
261
 
261
262
  self._deps = list(deps)
@@ -281,6 +282,15 @@ class GaussianProcessRegressor(BaseTransformer):
281
282
  self.set_drop_input_cols(drop_input_cols)
282
283
  self.set_sample_weight_col(sample_weight_col)
283
284
 
285
+ def _get_rand_id(self) -> str:
286
+ """
287
+ Generate random id to be used in sproc and stage names.
288
+
289
+ Returns:
290
+ Random id string usable in sproc, table, and stage names.
291
+ """
292
+ return str(uuid4()).replace("-", "_").upper()
293
+
284
294
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
285
295
  """
286
296
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -359,7 +369,7 @@ class GaussianProcessRegressor(BaseTransformer):
359
369
  cp.dump(self._sklearn_object, local_transform_file)
360
370
 
361
371
  # Create temp stage to run fit.
362
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
372
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
363
373
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
364
374
  SqlResultValidator(
365
375
  session=session,
@@ -372,11 +382,12 @@ class GaussianProcessRegressor(BaseTransformer):
372
382
  expected_value=f"Stage area {transform_stage_name} successfully created."
373
383
  ).validate()
374
384
 
375
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
385
+ # Use posixpath to construct stage paths
386
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
387
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
376
388
  local_result_file_name = get_temp_file_path()
377
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
378
389
 
379
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
390
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
380
391
  statement_params = telemetry.get_function_usage_statement_params(
381
392
  project=_PROJECT,
382
393
  subproject=_SUBPROJECT,
@@ -402,6 +413,7 @@ class GaussianProcessRegressor(BaseTransformer):
402
413
  replace=True,
403
414
  session=session,
404
415
  statement_params=statement_params,
416
+ anonymous=True
405
417
  )
406
418
  def fit_wrapper_sproc(
407
419
  session: Session,
@@ -410,7 +422,8 @@ class GaussianProcessRegressor(BaseTransformer):
410
422
  stage_result_file_name: str,
411
423
  input_cols: List[str],
412
424
  label_cols: List[str],
413
- sample_weight_col: Optional[str]
425
+ sample_weight_col: Optional[str],
426
+ statement_params: Dict[str, str]
414
427
  ) -> str:
415
428
  import cloudpickle as cp
416
429
  import numpy as np
@@ -477,15 +490,15 @@ class GaussianProcessRegressor(BaseTransformer):
477
490
  api_calls=[Session.call],
478
491
  custom_tags=dict([("autogen", True)]),
479
492
  )
480
- sproc_export_file_name = session.call(
481
- fit_sproc_name,
493
+ sproc_export_file_name = fit_wrapper_sproc(
494
+ session,
482
495
  query,
483
496
  stage_transform_file_name,
484
497
  stage_result_file_name,
485
498
  identifier.get_unescaped_names(self.input_cols),
486
499
  identifier.get_unescaped_names(self.label_cols),
487
500
  identifier.get_unescaped_names(self.sample_weight_col),
488
- statement_params=statement_params,
501
+ statement_params,
489
502
  )
490
503
 
491
504
  if "|" in sproc_export_file_name:
@@ -495,7 +508,7 @@ class GaussianProcessRegressor(BaseTransformer):
495
508
  print("\n".join(fields[1:]))
496
509
 
497
510
  session.file.get(
498
- os.path.join(stage_result_file_name, sproc_export_file_name),
511
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
499
512
  local_result_file_name,
500
513
  statement_params=statement_params
501
514
  )
@@ -541,7 +554,7 @@ class GaussianProcessRegressor(BaseTransformer):
541
554
 
542
555
  # Register vectorized UDF for batch inference
543
556
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
544
- safe_id=self.id, method=inference_method)
557
+ safe_id=self._get_rand_id(), method=inference_method)
545
558
 
546
559
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
547
560
  # will try to pickle all of self which fails.
@@ -633,7 +646,7 @@ class GaussianProcessRegressor(BaseTransformer):
633
646
  return transformed_pandas_df.to_dict("records")
634
647
 
635
648
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
636
- safe_id=self.id
649
+ safe_id=self._get_rand_id()
637
650
  )
638
651
 
639
652
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -800,11 +813,18 @@ class GaussianProcessRegressor(BaseTransformer):
800
813
  Transformed dataset.
801
814
  """
802
815
  if isinstance(dataset, DataFrame):
816
+ expected_type_inferred = "float"
817
+ # when it is classifier, infer the datatype from label columns
818
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
819
+ expected_type_inferred = convert_sp_to_sf_type(
820
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
821
+ )
822
+
803
823
  output_df = self._batch_inference(
804
824
  dataset=dataset,
805
825
  inference_method="predict",
806
826
  expected_output_cols_list=self.output_cols,
807
- expected_output_cols_type="float",
827
+ expected_output_cols_type=expected_type_inferred,
808
828
  )
809
829
  elif isinstance(dataset, pd.DataFrame):
810
830
  output_df = self._sklearn_inference(
@@ -875,10 +895,10 @@ class GaussianProcessRegressor(BaseTransformer):
875
895
 
876
896
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
877
897
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
878
- Returns an empty list if current object is not a classifier or not yet fitted.
898
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
879
899
  """
880
900
  if getattr(self._sklearn_object, "classes_", None) is None:
881
- return []
901
+ return [output_cols_prefix]
882
902
 
883
903
  classes = self._sklearn_object.classes_
884
904
  if isinstance(classes, numpy.ndarray):
@@ -1103,7 +1123,7 @@ class GaussianProcessRegressor(BaseTransformer):
1103
1123
  cp.dump(self._sklearn_object, local_score_file)
1104
1124
 
1105
1125
  # Create temp stage to run score.
1106
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1126
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1107
1127
  session = dataset._session
1108
1128
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1109
1129
  SqlResultValidator(
@@ -1117,8 +1137,9 @@ class GaussianProcessRegressor(BaseTransformer):
1117
1137
  expected_value=f"Stage area {score_stage_name} successfully created."
1118
1138
  ).validate()
1119
1139
 
1120
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1121
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1140
+ # Use posixpath to construct stage paths
1141
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1142
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1122
1143
  statement_params = telemetry.get_function_usage_statement_params(
1123
1144
  project=_PROJECT,
1124
1145
  subproject=_SUBPROJECT,
@@ -1144,6 +1165,7 @@ class GaussianProcessRegressor(BaseTransformer):
1144
1165
  replace=True,
1145
1166
  session=session,
1146
1167
  statement_params=statement_params,
1168
+ anonymous=True
1147
1169
  )
1148
1170
  def score_wrapper_sproc(
1149
1171
  session: Session,
@@ -1151,7 +1173,8 @@ class GaussianProcessRegressor(BaseTransformer):
1151
1173
  stage_score_file_name: str,
1152
1174
  input_cols: List[str],
1153
1175
  label_cols: List[str],
1154
- sample_weight_col: Optional[str]
1176
+ sample_weight_col: Optional[str],
1177
+ statement_params: Dict[str, str]
1155
1178
  ) -> float:
1156
1179
  import cloudpickle as cp
1157
1180
  import numpy as np
@@ -1201,14 +1224,14 @@ class GaussianProcessRegressor(BaseTransformer):
1201
1224
  api_calls=[Session.call],
1202
1225
  custom_tags=dict([("autogen", True)]),
1203
1226
  )
1204
- score = session.call(
1205
- score_sproc_name,
1227
+ score = score_wrapper_sproc(
1228
+ session,
1206
1229
  query,
1207
1230
  stage_score_file_name,
1208
1231
  identifier.get_unescaped_names(self.input_cols),
1209
1232
  identifier.get_unescaped_names(self.label_cols),
1210
1233
  identifier.get_unescaped_names(self.sample_weight_col),
1211
- statement_params=statement_params,
1234
+ statement_params,
1212
1235
  )
1213
1236
 
1214
1237
  cleanup_temp_files([local_score_file_name])
@@ -1226,18 +1249,20 @@ class GaussianProcessRegressor(BaseTransformer):
1226
1249
  if self._sklearn_object._estimator_type == 'classifier':
1227
1250
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1228
1251
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1229
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1252
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1253
+ ([] if self._drop_input_cols else inputs) + outputs)
1230
1254
  # For regressor, the type of predict is float64
1231
1255
  elif self._sklearn_object._estimator_type == 'regressor':
1232
1256
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1233
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1234
-
1257
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1258
+ ([] if self._drop_input_cols else inputs) + outputs)
1235
1259
  for prob_func in PROB_FUNCTIONS:
1236
1260
  if hasattr(self, prob_func):
1237
1261
  output_cols_prefix: str = f"{prob_func}_"
1238
1262
  output_column_names = self._get_output_column_names(output_cols_prefix)
1239
1263
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1240
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1264
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1265
+ ([] if self._drop_input_cols else inputs) + outputs)
1241
1266
 
1242
1267
  @property
1243
1268
  def model_signatures(self) -> Dict[str, ModelSignature]: