snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. snowflake/ml/_internal/env_utils.py +2 -1
  2. snowflake/ml/_internal/file_utils.py +29 -7
  3. snowflake/ml/_internal/telemetry.py +5 -8
  4. snowflake/ml/_internal/utils/uri.py +7 -2
  5. snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
  6. snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
  7. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
  8. snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
  9. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
  10. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
  11. snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
  12. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
  13. snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
  14. snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
  15. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
  16. snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
  17. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
  18. snowflake/ml/model/_deployer.py +14 -27
  19. snowflake/ml/model/_env.py +4 -4
  20. snowflake/ml/model/_handlers/custom.py +14 -2
  21. snowflake/ml/model/_handlers/pytorch.py +186 -0
  22. snowflake/ml/model/_handlers/sklearn.py +14 -9
  23. snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
  24. snowflake/ml/model/_handlers/torchscript.py +180 -0
  25. snowflake/ml/model/_handlers/xgboost.py +19 -9
  26. snowflake/ml/model/_model.py +3 -2
  27. snowflake/ml/model/_model_meta.py +12 -7
  28. snowflake/ml/model/model_signature.py +446 -66
  29. snowflake/ml/model/type_hints.py +23 -4
  30. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
  31. snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
  32. snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
  33. snowflake/ml/modeling/cluster/birch.py +51 -26
  34. snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
  35. snowflake/ml/modeling/cluster/dbscan.py +51 -26
  36. snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
  37. snowflake/ml/modeling/cluster/k_means.py +51 -26
  38. snowflake/ml/modeling/cluster/mean_shift.py +51 -26
  39. snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
  40. snowflake/ml/modeling/cluster/optics.py +51 -26
  41. snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
  42. snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
  43. snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
  44. snowflake/ml/modeling/compose/column_transformer.py +51 -26
  45. snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
  46. snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
  47. snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
  48. snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
  49. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
  50. snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
  51. snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
  52. snowflake/ml/modeling/covariance/oas.py +51 -26
  53. snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
  54. snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
  55. snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
  56. snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
  57. snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
  58. snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
  59. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
  60. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
  61. snowflake/ml/modeling/decomposition/pca.py +51 -26
  62. snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
  63. snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
  64. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
  65. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
  66. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
  67. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
  68. snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
  69. snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
  70. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
  71. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
  72. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
  73. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
  74. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
  75. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
  76. snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
  77. snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
  78. snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
  79. snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
  80. snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
  81. snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
  82. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
  83. snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
  84. snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
  85. snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
  86. snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
  87. snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
  88. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
  89. snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
  90. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
  91. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
  92. snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
  93. snowflake/ml/modeling/impute/knn_imputer.py +51 -26
  94. snowflake/ml/modeling/impute/missing_indicator.py +51 -26
  95. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
  96. snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
  97. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
  98. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
  99. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
  100. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
  101. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
  102. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
  103. snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
  104. snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
  105. snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
  106. snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
  107. snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
  108. snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
  109. snowflake/ml/modeling/linear_model/lars.py +51 -26
  110. snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
  111. snowflake/ml/modeling/linear_model/lasso.py +51 -26
  112. snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
  113. snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
  114. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
  115. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
  116. snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
  117. snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
  118. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
  119. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
  120. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
  121. snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
  122. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
  123. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
  124. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
  125. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
  126. snowflake/ml/modeling/linear_model/perceptron.py +51 -26
  127. snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
  128. snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
  129. snowflake/ml/modeling/linear_model/ridge.py +51 -26
  130. snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
  131. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
  132. snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
  133. snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
  134. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
  135. snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
  136. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
  137. snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
  138. snowflake/ml/modeling/manifold/isomap.py +51 -26
  139. snowflake/ml/modeling/manifold/mds.py +51 -26
  140. snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
  141. snowflake/ml/modeling/manifold/tsne.py +51 -26
  142. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
  143. snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
  144. snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
  145. snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
  146. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
  147. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
  148. snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
  149. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
  150. snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
  151. snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
  152. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
  153. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
  154. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
  155. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
  156. snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
  157. snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
  158. snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
  159. snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
  160. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
  161. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
  162. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
  163. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
  164. snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
  165. snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
  166. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
  167. snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
  168. snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
  169. snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
  170. snowflake/ml/modeling/svm/linear_svc.py +51 -26
  171. snowflake/ml/modeling/svm/linear_svr.py +51 -26
  172. snowflake/ml/modeling/svm/nu_svc.py +51 -26
  173. snowflake/ml/modeling/svm/nu_svr.py +51 -26
  174. snowflake/ml/modeling/svm/svc.py +51 -26
  175. snowflake/ml/modeling/svm/svr.py +51 -26
  176. snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
  177. snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
  178. snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
  179. snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
  180. snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
  181. snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
  182. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
  183. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
  184. snowflake/ml/registry/model_registry.py +74 -56
  185. snowflake/ml/version.py +1 -1
  186. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
  187. snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
  188. snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
  189. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -360,7 +362,6 @@ class GradientBoostingClassifier(BaseTransformer):
360
362
  sample_weight_col: Optional[str] = None,
361
363
  ) -> None:
362
364
  super().__init__()
363
- self.id = str(uuid4()).replace("-", "_").upper()
364
365
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
365
366
 
366
367
  self._deps = list(deps)
@@ -399,6 +400,15 @@ class GradientBoostingClassifier(BaseTransformer):
399
400
  self.set_drop_input_cols(drop_input_cols)
400
401
  self.set_sample_weight_col(sample_weight_col)
401
402
 
403
+ def _get_rand_id(self) -> str:
404
+ """
405
+ Generate random id to be used in sproc and stage names.
406
+
407
+ Returns:
408
+ Random id string usable in sproc, table, and stage names.
409
+ """
410
+ return str(uuid4()).replace("-", "_").upper()
411
+
402
412
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
403
413
  """
404
414
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -477,7 +487,7 @@ class GradientBoostingClassifier(BaseTransformer):
477
487
  cp.dump(self._sklearn_object, local_transform_file)
478
488
 
479
489
  # Create temp stage to run fit.
480
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
490
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
481
491
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
482
492
  SqlResultValidator(
483
493
  session=session,
@@ -490,11 +500,12 @@ class GradientBoostingClassifier(BaseTransformer):
490
500
  expected_value=f"Stage area {transform_stage_name} successfully created."
491
501
  ).validate()
492
502
 
493
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
503
+ # Use posixpath to construct stage paths
504
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
505
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
494
506
  local_result_file_name = get_temp_file_path()
495
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
496
507
 
497
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
508
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
498
509
  statement_params = telemetry.get_function_usage_statement_params(
499
510
  project=_PROJECT,
500
511
  subproject=_SUBPROJECT,
@@ -520,6 +531,7 @@ class GradientBoostingClassifier(BaseTransformer):
520
531
  replace=True,
521
532
  session=session,
522
533
  statement_params=statement_params,
534
+ anonymous=True
523
535
  )
524
536
  def fit_wrapper_sproc(
525
537
  session: Session,
@@ -528,7 +540,8 @@ class GradientBoostingClassifier(BaseTransformer):
528
540
  stage_result_file_name: str,
529
541
  input_cols: List[str],
530
542
  label_cols: List[str],
531
- sample_weight_col: Optional[str]
543
+ sample_weight_col: Optional[str],
544
+ statement_params: Dict[str, str]
532
545
  ) -> str:
533
546
  import cloudpickle as cp
534
547
  import numpy as np
@@ -595,15 +608,15 @@ class GradientBoostingClassifier(BaseTransformer):
595
608
  api_calls=[Session.call],
596
609
  custom_tags=dict([("autogen", True)]),
597
610
  )
598
- sproc_export_file_name = session.call(
599
- fit_sproc_name,
611
+ sproc_export_file_name = fit_wrapper_sproc(
612
+ session,
600
613
  query,
601
614
  stage_transform_file_name,
602
615
  stage_result_file_name,
603
616
  identifier.get_unescaped_names(self.input_cols),
604
617
  identifier.get_unescaped_names(self.label_cols),
605
618
  identifier.get_unescaped_names(self.sample_weight_col),
606
- statement_params=statement_params,
619
+ statement_params,
607
620
  )
608
621
 
609
622
  if "|" in sproc_export_file_name:
@@ -613,7 +626,7 @@ class GradientBoostingClassifier(BaseTransformer):
613
626
  print("\n".join(fields[1:]))
614
627
 
615
628
  session.file.get(
616
- os.path.join(stage_result_file_name, sproc_export_file_name),
629
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
617
630
  local_result_file_name,
618
631
  statement_params=statement_params
619
632
  )
@@ -659,7 +672,7 @@ class GradientBoostingClassifier(BaseTransformer):
659
672
 
660
673
  # Register vectorized UDF for batch inference
661
674
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
662
- safe_id=self.id, method=inference_method)
675
+ safe_id=self._get_rand_id(), method=inference_method)
663
676
 
664
677
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
665
678
  # will try to pickle all of self which fails.
@@ -751,7 +764,7 @@ class GradientBoostingClassifier(BaseTransformer):
751
764
  return transformed_pandas_df.to_dict("records")
752
765
 
753
766
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
754
- safe_id=self.id
767
+ safe_id=self._get_rand_id()
755
768
  )
756
769
 
757
770
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -918,11 +931,18 @@ class GradientBoostingClassifier(BaseTransformer):
918
931
  Transformed dataset.
919
932
  """
920
933
  if isinstance(dataset, DataFrame):
934
+ expected_type_inferred = ""
935
+ # when it is classifier, infer the datatype from label columns
936
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
937
+ expected_type_inferred = convert_sp_to_sf_type(
938
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
939
+ )
940
+
921
941
  output_df = self._batch_inference(
922
942
  dataset=dataset,
923
943
  inference_method="predict",
924
944
  expected_output_cols_list=self.output_cols,
925
- expected_output_cols_type="",
945
+ expected_output_cols_type=expected_type_inferred,
926
946
  )
927
947
  elif isinstance(dataset, pd.DataFrame):
928
948
  output_df = self._sklearn_inference(
@@ -993,10 +1013,10 @@ class GradientBoostingClassifier(BaseTransformer):
993
1013
 
994
1014
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
995
1015
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
996
- Returns an empty list if current object is not a classifier or not yet fitted.
1016
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
997
1017
  """
998
1018
  if getattr(self._sklearn_object, "classes_", None) is None:
999
- return []
1019
+ return [output_cols_prefix]
1000
1020
 
1001
1021
  classes = self._sklearn_object.classes_
1002
1022
  if isinstance(classes, numpy.ndarray):
@@ -1227,7 +1247,7 @@ class GradientBoostingClassifier(BaseTransformer):
1227
1247
  cp.dump(self._sklearn_object, local_score_file)
1228
1248
 
1229
1249
  # Create temp stage to run score.
1230
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1250
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1231
1251
  session = dataset._session
1232
1252
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1233
1253
  SqlResultValidator(
@@ -1241,8 +1261,9 @@ class GradientBoostingClassifier(BaseTransformer):
1241
1261
  expected_value=f"Stage area {score_stage_name} successfully created."
1242
1262
  ).validate()
1243
1263
 
1244
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1245
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1264
+ # Use posixpath to construct stage paths
1265
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1266
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1246
1267
  statement_params = telemetry.get_function_usage_statement_params(
1247
1268
  project=_PROJECT,
1248
1269
  subproject=_SUBPROJECT,
@@ -1268,6 +1289,7 @@ class GradientBoostingClassifier(BaseTransformer):
1268
1289
  replace=True,
1269
1290
  session=session,
1270
1291
  statement_params=statement_params,
1292
+ anonymous=True
1271
1293
  )
1272
1294
  def score_wrapper_sproc(
1273
1295
  session: Session,
@@ -1275,7 +1297,8 @@ class GradientBoostingClassifier(BaseTransformer):
1275
1297
  stage_score_file_name: str,
1276
1298
  input_cols: List[str],
1277
1299
  label_cols: List[str],
1278
- sample_weight_col: Optional[str]
1300
+ sample_weight_col: Optional[str],
1301
+ statement_params: Dict[str, str]
1279
1302
  ) -> float:
1280
1303
  import cloudpickle as cp
1281
1304
  import numpy as np
@@ -1325,14 +1348,14 @@ class GradientBoostingClassifier(BaseTransformer):
1325
1348
  api_calls=[Session.call],
1326
1349
  custom_tags=dict([("autogen", True)]),
1327
1350
  )
1328
- score = session.call(
1329
- score_sproc_name,
1351
+ score = score_wrapper_sproc(
1352
+ session,
1330
1353
  query,
1331
1354
  stage_score_file_name,
1332
1355
  identifier.get_unescaped_names(self.input_cols),
1333
1356
  identifier.get_unescaped_names(self.label_cols),
1334
1357
  identifier.get_unescaped_names(self.sample_weight_col),
1335
- statement_params=statement_params,
1358
+ statement_params,
1336
1359
  )
1337
1360
 
1338
1361
  cleanup_temp_files([local_score_file_name])
@@ -1350,18 +1373,20 @@ class GradientBoostingClassifier(BaseTransformer):
1350
1373
  if self._sklearn_object._estimator_type == 'classifier':
1351
1374
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1352
1375
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1353
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1376
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1377
+ ([] if self._drop_input_cols else inputs) + outputs)
1354
1378
  # For regressor, the type of predict is float64
1355
1379
  elif self._sklearn_object._estimator_type == 'regressor':
1356
1380
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1357
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1358
-
1381
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1382
+ ([] if self._drop_input_cols else inputs) + outputs)
1359
1383
  for prob_func in PROB_FUNCTIONS:
1360
1384
  if hasattr(self, prob_func):
1361
1385
  output_cols_prefix: str = f"{prob_func}_"
1362
1386
  output_column_names = self._get_output_column_names(output_cols_prefix)
1363
1387
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1364
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1388
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1389
+ ([] if self._drop_input_cols else inputs) + outputs)
1365
1390
 
1366
1391
  @property
1367
1392
  def model_signatures(self) -> Dict[str, ModelSignature]:
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -368,7 +370,6 @@ class GradientBoostingRegressor(BaseTransformer):
368
370
  sample_weight_col: Optional[str] = None,
369
371
  ) -> None:
370
372
  super().__init__()
371
- self.id = str(uuid4()).replace("-", "_").upper()
372
373
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
373
374
 
374
375
  self._deps = list(deps)
@@ -408,6 +409,15 @@ class GradientBoostingRegressor(BaseTransformer):
408
409
  self.set_drop_input_cols(drop_input_cols)
409
410
  self.set_sample_weight_col(sample_weight_col)
410
411
 
412
+ def _get_rand_id(self) -> str:
413
+ """
414
+ Generate random id to be used in sproc and stage names.
415
+
416
+ Returns:
417
+ Random id string usable in sproc, table, and stage names.
418
+ """
419
+ return str(uuid4()).replace("-", "_").upper()
420
+
411
421
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
412
422
  """
413
423
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -486,7 +496,7 @@ class GradientBoostingRegressor(BaseTransformer):
486
496
  cp.dump(self._sklearn_object, local_transform_file)
487
497
 
488
498
  # Create temp stage to run fit.
489
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
499
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
490
500
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
491
501
  SqlResultValidator(
492
502
  session=session,
@@ -499,11 +509,12 @@ class GradientBoostingRegressor(BaseTransformer):
499
509
  expected_value=f"Stage area {transform_stage_name} successfully created."
500
510
  ).validate()
501
511
 
502
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
512
+ # Use posixpath to construct stage paths
513
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
514
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
503
515
  local_result_file_name = get_temp_file_path()
504
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
505
516
 
506
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
517
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
507
518
  statement_params = telemetry.get_function_usage_statement_params(
508
519
  project=_PROJECT,
509
520
  subproject=_SUBPROJECT,
@@ -529,6 +540,7 @@ class GradientBoostingRegressor(BaseTransformer):
529
540
  replace=True,
530
541
  session=session,
531
542
  statement_params=statement_params,
543
+ anonymous=True
532
544
  )
533
545
  def fit_wrapper_sproc(
534
546
  session: Session,
@@ -537,7 +549,8 @@ class GradientBoostingRegressor(BaseTransformer):
537
549
  stage_result_file_name: str,
538
550
  input_cols: List[str],
539
551
  label_cols: List[str],
540
- sample_weight_col: Optional[str]
552
+ sample_weight_col: Optional[str],
553
+ statement_params: Dict[str, str]
541
554
  ) -> str:
542
555
  import cloudpickle as cp
543
556
  import numpy as np
@@ -604,15 +617,15 @@ class GradientBoostingRegressor(BaseTransformer):
604
617
  api_calls=[Session.call],
605
618
  custom_tags=dict([("autogen", True)]),
606
619
  )
607
- sproc_export_file_name = session.call(
608
- fit_sproc_name,
620
+ sproc_export_file_name = fit_wrapper_sproc(
621
+ session,
609
622
  query,
610
623
  stage_transform_file_name,
611
624
  stage_result_file_name,
612
625
  identifier.get_unescaped_names(self.input_cols),
613
626
  identifier.get_unescaped_names(self.label_cols),
614
627
  identifier.get_unescaped_names(self.sample_weight_col),
615
- statement_params=statement_params,
628
+ statement_params,
616
629
  )
617
630
 
618
631
  if "|" in sproc_export_file_name:
@@ -622,7 +635,7 @@ class GradientBoostingRegressor(BaseTransformer):
622
635
  print("\n".join(fields[1:]))
623
636
 
624
637
  session.file.get(
625
- os.path.join(stage_result_file_name, sproc_export_file_name),
638
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
626
639
  local_result_file_name,
627
640
  statement_params=statement_params
628
641
  )
@@ -668,7 +681,7 @@ class GradientBoostingRegressor(BaseTransformer):
668
681
 
669
682
  # Register vectorized UDF for batch inference
670
683
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
671
- safe_id=self.id, method=inference_method)
684
+ safe_id=self._get_rand_id(), method=inference_method)
672
685
 
673
686
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
674
687
  # will try to pickle all of self which fails.
@@ -760,7 +773,7 @@ class GradientBoostingRegressor(BaseTransformer):
760
773
  return transformed_pandas_df.to_dict("records")
761
774
 
762
775
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
763
- safe_id=self.id
776
+ safe_id=self._get_rand_id()
764
777
  )
765
778
 
766
779
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -927,11 +940,18 @@ class GradientBoostingRegressor(BaseTransformer):
927
940
  Transformed dataset.
928
941
  """
929
942
  if isinstance(dataset, DataFrame):
943
+ expected_type_inferred = "float"
944
+ # when it is classifier, infer the datatype from label columns
945
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
946
+ expected_type_inferred = convert_sp_to_sf_type(
947
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
948
+ )
949
+
930
950
  output_df = self._batch_inference(
931
951
  dataset=dataset,
932
952
  inference_method="predict",
933
953
  expected_output_cols_list=self.output_cols,
934
- expected_output_cols_type="float",
954
+ expected_output_cols_type=expected_type_inferred,
935
955
  )
936
956
  elif isinstance(dataset, pd.DataFrame):
937
957
  output_df = self._sklearn_inference(
@@ -1002,10 +1022,10 @@ class GradientBoostingRegressor(BaseTransformer):
1002
1022
 
1003
1023
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
1004
1024
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
1005
- Returns an empty list if current object is not a classifier or not yet fitted.
1025
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
1006
1026
  """
1007
1027
  if getattr(self._sklearn_object, "classes_", None) is None:
1008
- return []
1028
+ return [output_cols_prefix]
1009
1029
 
1010
1030
  classes = self._sklearn_object.classes_
1011
1031
  if isinstance(classes, numpy.ndarray):
@@ -1230,7 +1250,7 @@ class GradientBoostingRegressor(BaseTransformer):
1230
1250
  cp.dump(self._sklearn_object, local_score_file)
1231
1251
 
1232
1252
  # Create temp stage to run score.
1233
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1253
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1234
1254
  session = dataset._session
1235
1255
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1236
1256
  SqlResultValidator(
@@ -1244,8 +1264,9 @@ class GradientBoostingRegressor(BaseTransformer):
1244
1264
  expected_value=f"Stage area {score_stage_name} successfully created."
1245
1265
  ).validate()
1246
1266
 
1247
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1248
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1267
+ # Use posixpath to construct stage paths
1268
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1269
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1249
1270
  statement_params = telemetry.get_function_usage_statement_params(
1250
1271
  project=_PROJECT,
1251
1272
  subproject=_SUBPROJECT,
@@ -1271,6 +1292,7 @@ class GradientBoostingRegressor(BaseTransformer):
1271
1292
  replace=True,
1272
1293
  session=session,
1273
1294
  statement_params=statement_params,
1295
+ anonymous=True
1274
1296
  )
1275
1297
  def score_wrapper_sproc(
1276
1298
  session: Session,
@@ -1278,7 +1300,8 @@ class GradientBoostingRegressor(BaseTransformer):
1278
1300
  stage_score_file_name: str,
1279
1301
  input_cols: List[str],
1280
1302
  label_cols: List[str],
1281
- sample_weight_col: Optional[str]
1303
+ sample_weight_col: Optional[str],
1304
+ statement_params: Dict[str, str]
1282
1305
  ) -> float:
1283
1306
  import cloudpickle as cp
1284
1307
  import numpy as np
@@ -1328,14 +1351,14 @@ class GradientBoostingRegressor(BaseTransformer):
1328
1351
  api_calls=[Session.call],
1329
1352
  custom_tags=dict([("autogen", True)]),
1330
1353
  )
1331
- score = session.call(
1332
- score_sproc_name,
1354
+ score = score_wrapper_sproc(
1355
+ session,
1333
1356
  query,
1334
1357
  stage_score_file_name,
1335
1358
  identifier.get_unescaped_names(self.input_cols),
1336
1359
  identifier.get_unescaped_names(self.label_cols),
1337
1360
  identifier.get_unescaped_names(self.sample_weight_col),
1338
- statement_params=statement_params,
1361
+ statement_params,
1339
1362
  )
1340
1363
 
1341
1364
  cleanup_temp_files([local_score_file_name])
@@ -1353,18 +1376,20 @@ class GradientBoostingRegressor(BaseTransformer):
1353
1376
  if self._sklearn_object._estimator_type == 'classifier':
1354
1377
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1355
1378
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1356
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1379
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1380
+ ([] if self._drop_input_cols else inputs) + outputs)
1357
1381
  # For regressor, the type of predict is float64
1358
1382
  elif self._sklearn_object._estimator_type == 'regressor':
1359
1383
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1360
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1361
-
1384
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1385
+ ([] if self._drop_input_cols else inputs) + outputs)
1362
1386
  for prob_func in PROB_FUNCTIONS:
1363
1387
  if hasattr(self, prob_func):
1364
1388
  output_cols_prefix: str = f"{prob_func}_"
1365
1389
  output_column_names = self._get_output_column_names(output_cols_prefix)
1366
1390
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1367
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1391
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1392
+ ([] if self._drop_input_cols else inputs) + outputs)
1368
1393
 
1369
1394
  @property
1370
1395
  def model_signatures(self) -> Dict[str, ModelSignature]: