snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. snowflake/ml/_internal/env_utils.py +2 -1
  2. snowflake/ml/_internal/file_utils.py +29 -7
  3. snowflake/ml/_internal/telemetry.py +5 -8
  4. snowflake/ml/_internal/utils/uri.py +7 -2
  5. snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
  6. snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
  7. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
  8. snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
  9. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
  10. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
  11. snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
  12. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
  13. snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
  14. snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
  15. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
  16. snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
  17. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
  18. snowflake/ml/model/_deployer.py +14 -27
  19. snowflake/ml/model/_env.py +4 -4
  20. snowflake/ml/model/_handlers/custom.py +14 -2
  21. snowflake/ml/model/_handlers/pytorch.py +186 -0
  22. snowflake/ml/model/_handlers/sklearn.py +14 -9
  23. snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
  24. snowflake/ml/model/_handlers/torchscript.py +180 -0
  25. snowflake/ml/model/_handlers/xgboost.py +19 -9
  26. snowflake/ml/model/_model.py +3 -2
  27. snowflake/ml/model/_model_meta.py +12 -7
  28. snowflake/ml/model/model_signature.py +446 -66
  29. snowflake/ml/model/type_hints.py +23 -4
  30. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
  31. snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
  32. snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
  33. snowflake/ml/modeling/cluster/birch.py +51 -26
  34. snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
  35. snowflake/ml/modeling/cluster/dbscan.py +51 -26
  36. snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
  37. snowflake/ml/modeling/cluster/k_means.py +51 -26
  38. snowflake/ml/modeling/cluster/mean_shift.py +51 -26
  39. snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
  40. snowflake/ml/modeling/cluster/optics.py +51 -26
  41. snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
  42. snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
  43. snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
  44. snowflake/ml/modeling/compose/column_transformer.py +51 -26
  45. snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
  46. snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
  47. snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
  48. snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
  49. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
  50. snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
  51. snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
  52. snowflake/ml/modeling/covariance/oas.py +51 -26
  53. snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
  54. snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
  55. snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
  56. snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
  57. snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
  58. snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
  59. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
  60. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
  61. snowflake/ml/modeling/decomposition/pca.py +51 -26
  62. snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
  63. snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
  64. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
  65. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
  66. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
  67. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
  68. snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
  69. snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
  70. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
  71. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
  72. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
  73. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
  74. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
  75. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
  76. snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
  77. snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
  78. snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
  79. snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
  80. snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
  81. snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
  82. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
  83. snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
  84. snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
  85. snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
  86. snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
  87. snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
  88. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
  89. snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
  90. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
  91. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
  92. snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
  93. snowflake/ml/modeling/impute/knn_imputer.py +51 -26
  94. snowflake/ml/modeling/impute/missing_indicator.py +51 -26
  95. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
  96. snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
  97. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
  98. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
  99. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
  100. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
  101. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
  102. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
  103. snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
  104. snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
  105. snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
  106. snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
  107. snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
  108. snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
  109. snowflake/ml/modeling/linear_model/lars.py +51 -26
  110. snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
  111. snowflake/ml/modeling/linear_model/lasso.py +51 -26
  112. snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
  113. snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
  114. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
  115. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
  116. snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
  117. snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
  118. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
  119. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
  120. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
  121. snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
  122. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
  123. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
  124. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
  125. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
  126. snowflake/ml/modeling/linear_model/perceptron.py +51 -26
  127. snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
  128. snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
  129. snowflake/ml/modeling/linear_model/ridge.py +51 -26
  130. snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
  131. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
  132. snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
  133. snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
  134. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
  135. snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
  136. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
  137. snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
  138. snowflake/ml/modeling/manifold/isomap.py +51 -26
  139. snowflake/ml/modeling/manifold/mds.py +51 -26
  140. snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
  141. snowflake/ml/modeling/manifold/tsne.py +51 -26
  142. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
  143. snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
  144. snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
  145. snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
  146. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
  147. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
  148. snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
  149. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
  150. snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
  151. snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
  152. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
  153. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
  154. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
  155. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
  156. snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
  157. snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
  158. snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
  159. snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
  160. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
  161. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
  162. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
  163. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
  164. snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
  165. snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
  166. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
  167. snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
  168. snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
  169. snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
  170. snowflake/ml/modeling/svm/linear_svc.py +51 -26
  171. snowflake/ml/modeling/svm/linear_svr.py +51 -26
  172. snowflake/ml/modeling/svm/nu_svc.py +51 -26
  173. snowflake/ml/modeling/svm/nu_svr.py +51 -26
  174. snowflake/ml/modeling/svm/svc.py +51 -26
  175. snowflake/ml/modeling/svm/svr.py +51 -26
  176. snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
  177. snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
  178. snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
  179. snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
  180. snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
  181. snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
  182. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
  183. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
  184. snowflake/ml/registry/model_registry.py +74 -56
  185. snowflake/ml/version.py +1 -1
  186. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
  187. snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
  188. snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
  189. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -338,7 +340,6 @@ class HistGradientBoostingClassifier(BaseTransformer):
338
340
  sample_weight_col: Optional[str] = None,
339
341
  ) -> None:
340
342
  super().__init__()
341
- self.id = str(uuid4()).replace("-", "_").upper()
342
343
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
343
344
 
344
345
  self._deps = list(deps)
@@ -377,6 +378,15 @@ class HistGradientBoostingClassifier(BaseTransformer):
377
378
  self.set_drop_input_cols(drop_input_cols)
378
379
  self.set_sample_weight_col(sample_weight_col)
379
380
 
381
+ def _get_rand_id(self) -> str:
382
+ """
383
+ Generate random id to be used in sproc and stage names.
384
+
385
+ Returns:
386
+ Random id string usable in sproc, table, and stage names.
387
+ """
388
+ return str(uuid4()).replace("-", "_").upper()
389
+
380
390
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
381
391
  """
382
392
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -455,7 +465,7 @@ class HistGradientBoostingClassifier(BaseTransformer):
455
465
  cp.dump(self._sklearn_object, local_transform_file)
456
466
 
457
467
  # Create temp stage to run fit.
458
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
468
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
459
469
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
460
470
  SqlResultValidator(
461
471
  session=session,
@@ -468,11 +478,12 @@ class HistGradientBoostingClassifier(BaseTransformer):
468
478
  expected_value=f"Stage area {transform_stage_name} successfully created."
469
479
  ).validate()
470
480
 
471
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
481
+ # Use posixpath to construct stage paths
482
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
483
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
472
484
  local_result_file_name = get_temp_file_path()
473
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
474
485
 
475
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
486
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
476
487
  statement_params = telemetry.get_function_usage_statement_params(
477
488
  project=_PROJECT,
478
489
  subproject=_SUBPROJECT,
@@ -498,6 +509,7 @@ class HistGradientBoostingClassifier(BaseTransformer):
498
509
  replace=True,
499
510
  session=session,
500
511
  statement_params=statement_params,
512
+ anonymous=True
501
513
  )
502
514
  def fit_wrapper_sproc(
503
515
  session: Session,
@@ -506,7 +518,8 @@ class HistGradientBoostingClassifier(BaseTransformer):
506
518
  stage_result_file_name: str,
507
519
  input_cols: List[str],
508
520
  label_cols: List[str],
509
- sample_weight_col: Optional[str]
521
+ sample_weight_col: Optional[str],
522
+ statement_params: Dict[str, str]
510
523
  ) -> str:
511
524
  import cloudpickle as cp
512
525
  import numpy as np
@@ -573,15 +586,15 @@ class HistGradientBoostingClassifier(BaseTransformer):
573
586
  api_calls=[Session.call],
574
587
  custom_tags=dict([("autogen", True)]),
575
588
  )
576
- sproc_export_file_name = session.call(
577
- fit_sproc_name,
589
+ sproc_export_file_name = fit_wrapper_sproc(
590
+ session,
578
591
  query,
579
592
  stage_transform_file_name,
580
593
  stage_result_file_name,
581
594
  identifier.get_unescaped_names(self.input_cols),
582
595
  identifier.get_unescaped_names(self.label_cols),
583
596
  identifier.get_unescaped_names(self.sample_weight_col),
584
- statement_params=statement_params,
597
+ statement_params,
585
598
  )
586
599
 
587
600
  if "|" in sproc_export_file_name:
@@ -591,7 +604,7 @@ class HistGradientBoostingClassifier(BaseTransformer):
591
604
  print("\n".join(fields[1:]))
592
605
 
593
606
  session.file.get(
594
- os.path.join(stage_result_file_name, sproc_export_file_name),
607
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
595
608
  local_result_file_name,
596
609
  statement_params=statement_params
597
610
  )
@@ -637,7 +650,7 @@ class HistGradientBoostingClassifier(BaseTransformer):
637
650
 
638
651
  # Register vectorized UDF for batch inference
639
652
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
640
- safe_id=self.id, method=inference_method)
653
+ safe_id=self._get_rand_id(), method=inference_method)
641
654
 
642
655
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
643
656
  # will try to pickle all of self which fails.
@@ -729,7 +742,7 @@ class HistGradientBoostingClassifier(BaseTransformer):
729
742
  return transformed_pandas_df.to_dict("records")
730
743
 
731
744
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
732
- safe_id=self.id
745
+ safe_id=self._get_rand_id()
733
746
  )
734
747
 
735
748
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -896,11 +909,18 @@ class HistGradientBoostingClassifier(BaseTransformer):
896
909
  Transformed dataset.
897
910
  """
898
911
  if isinstance(dataset, DataFrame):
912
+ expected_type_inferred = ""
913
+ # when it is classifier, infer the datatype from label columns
914
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
915
+ expected_type_inferred = convert_sp_to_sf_type(
916
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
917
+ )
918
+
899
919
  output_df = self._batch_inference(
900
920
  dataset=dataset,
901
921
  inference_method="predict",
902
922
  expected_output_cols_list=self.output_cols,
903
- expected_output_cols_type="",
923
+ expected_output_cols_type=expected_type_inferred,
904
924
  )
905
925
  elif isinstance(dataset, pd.DataFrame):
906
926
  output_df = self._sklearn_inference(
@@ -971,10 +991,10 @@ class HistGradientBoostingClassifier(BaseTransformer):
971
991
 
972
992
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
973
993
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
974
- Returns an empty list if current object is not a classifier or not yet fitted.
994
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
975
995
  """
976
996
  if getattr(self._sklearn_object, "classes_", None) is None:
977
- return []
997
+ return [output_cols_prefix]
978
998
 
979
999
  classes = self._sklearn_object.classes_
980
1000
  if isinstance(classes, numpy.ndarray):
@@ -1205,7 +1225,7 @@ class HistGradientBoostingClassifier(BaseTransformer):
1205
1225
  cp.dump(self._sklearn_object, local_score_file)
1206
1226
 
1207
1227
  # Create temp stage to run score.
1208
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1228
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1209
1229
  session = dataset._session
1210
1230
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1211
1231
  SqlResultValidator(
@@ -1219,8 +1239,9 @@ class HistGradientBoostingClassifier(BaseTransformer):
1219
1239
  expected_value=f"Stage area {score_stage_name} successfully created."
1220
1240
  ).validate()
1221
1241
 
1222
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1223
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1242
+ # Use posixpath to construct stage paths
1243
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1244
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1224
1245
  statement_params = telemetry.get_function_usage_statement_params(
1225
1246
  project=_PROJECT,
1226
1247
  subproject=_SUBPROJECT,
@@ -1246,6 +1267,7 @@ class HistGradientBoostingClassifier(BaseTransformer):
1246
1267
  replace=True,
1247
1268
  session=session,
1248
1269
  statement_params=statement_params,
1270
+ anonymous=True
1249
1271
  )
1250
1272
  def score_wrapper_sproc(
1251
1273
  session: Session,
@@ -1253,7 +1275,8 @@ class HistGradientBoostingClassifier(BaseTransformer):
1253
1275
  stage_score_file_name: str,
1254
1276
  input_cols: List[str],
1255
1277
  label_cols: List[str],
1256
- sample_weight_col: Optional[str]
1278
+ sample_weight_col: Optional[str],
1279
+ statement_params: Dict[str, str]
1257
1280
  ) -> float:
1258
1281
  import cloudpickle as cp
1259
1282
  import numpy as np
@@ -1303,14 +1326,14 @@ class HistGradientBoostingClassifier(BaseTransformer):
1303
1326
  api_calls=[Session.call],
1304
1327
  custom_tags=dict([("autogen", True)]),
1305
1328
  )
1306
- score = session.call(
1307
- score_sproc_name,
1329
+ score = score_wrapper_sproc(
1330
+ session,
1308
1331
  query,
1309
1332
  stage_score_file_name,
1310
1333
  identifier.get_unescaped_names(self.input_cols),
1311
1334
  identifier.get_unescaped_names(self.label_cols),
1312
1335
  identifier.get_unescaped_names(self.sample_weight_col),
1313
- statement_params=statement_params,
1336
+ statement_params,
1314
1337
  )
1315
1338
 
1316
1339
  cleanup_temp_files([local_score_file_name])
@@ -1328,18 +1351,20 @@ class HistGradientBoostingClassifier(BaseTransformer):
1328
1351
  if self._sklearn_object._estimator_type == 'classifier':
1329
1352
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1330
1353
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1331
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1354
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1355
+ ([] if self._drop_input_cols else inputs) + outputs)
1332
1356
  # For regressor, the type of predict is float64
1333
1357
  elif self._sklearn_object._estimator_type == 'regressor':
1334
1358
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1335
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1336
-
1359
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1360
+ ([] if self._drop_input_cols else inputs) + outputs)
1337
1361
  for prob_func in PROB_FUNCTIONS:
1338
1362
  if hasattr(self, prob_func):
1339
1363
  output_cols_prefix: str = f"{prob_func}_"
1340
1364
  output_column_names = self._get_output_column_names(output_cols_prefix)
1341
1365
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1342
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1366
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1367
+ ([] if self._drop_input_cols else inputs) + outputs)
1343
1368
 
1344
1369
  @property
1345
1370
  def model_signatures(self) -> Dict[str, ModelSignature]:
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -328,7 +330,6 @@ class HistGradientBoostingRegressor(BaseTransformer):
328
330
  sample_weight_col: Optional[str] = None,
329
331
  ) -> None:
330
332
  super().__init__()
331
- self.id = str(uuid4()).replace("-", "_").upper()
332
333
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
333
334
 
334
335
  self._deps = list(deps)
@@ -367,6 +368,15 @@ class HistGradientBoostingRegressor(BaseTransformer):
367
368
  self.set_drop_input_cols(drop_input_cols)
368
369
  self.set_sample_weight_col(sample_weight_col)
369
370
 
371
+ def _get_rand_id(self) -> str:
372
+ """
373
+ Generate random id to be used in sproc and stage names.
374
+
375
+ Returns:
376
+ Random id string usable in sproc, table, and stage names.
377
+ """
378
+ return str(uuid4()).replace("-", "_").upper()
379
+
370
380
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
371
381
  """
372
382
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -445,7 +455,7 @@ class HistGradientBoostingRegressor(BaseTransformer):
445
455
  cp.dump(self._sklearn_object, local_transform_file)
446
456
 
447
457
  # Create temp stage to run fit.
448
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
458
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
449
459
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
450
460
  SqlResultValidator(
451
461
  session=session,
@@ -458,11 +468,12 @@ class HistGradientBoostingRegressor(BaseTransformer):
458
468
  expected_value=f"Stage area {transform_stage_name} successfully created."
459
469
  ).validate()
460
470
 
461
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
471
+ # Use posixpath to construct stage paths
472
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
473
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
462
474
  local_result_file_name = get_temp_file_path()
463
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
464
475
 
465
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
476
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
466
477
  statement_params = telemetry.get_function_usage_statement_params(
467
478
  project=_PROJECT,
468
479
  subproject=_SUBPROJECT,
@@ -488,6 +499,7 @@ class HistGradientBoostingRegressor(BaseTransformer):
488
499
  replace=True,
489
500
  session=session,
490
501
  statement_params=statement_params,
502
+ anonymous=True
491
503
  )
492
504
  def fit_wrapper_sproc(
493
505
  session: Session,
@@ -496,7 +508,8 @@ class HistGradientBoostingRegressor(BaseTransformer):
496
508
  stage_result_file_name: str,
497
509
  input_cols: List[str],
498
510
  label_cols: List[str],
499
- sample_weight_col: Optional[str]
511
+ sample_weight_col: Optional[str],
512
+ statement_params: Dict[str, str]
500
513
  ) -> str:
501
514
  import cloudpickle as cp
502
515
  import numpy as np
@@ -563,15 +576,15 @@ class HistGradientBoostingRegressor(BaseTransformer):
563
576
  api_calls=[Session.call],
564
577
  custom_tags=dict([("autogen", True)]),
565
578
  )
566
- sproc_export_file_name = session.call(
567
- fit_sproc_name,
579
+ sproc_export_file_name = fit_wrapper_sproc(
580
+ session,
568
581
  query,
569
582
  stage_transform_file_name,
570
583
  stage_result_file_name,
571
584
  identifier.get_unescaped_names(self.input_cols),
572
585
  identifier.get_unescaped_names(self.label_cols),
573
586
  identifier.get_unescaped_names(self.sample_weight_col),
574
- statement_params=statement_params,
587
+ statement_params,
575
588
  )
576
589
 
577
590
  if "|" in sproc_export_file_name:
@@ -581,7 +594,7 @@ class HistGradientBoostingRegressor(BaseTransformer):
581
594
  print("\n".join(fields[1:]))
582
595
 
583
596
  session.file.get(
584
- os.path.join(stage_result_file_name, sproc_export_file_name),
597
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
585
598
  local_result_file_name,
586
599
  statement_params=statement_params
587
600
  )
@@ -627,7 +640,7 @@ class HistGradientBoostingRegressor(BaseTransformer):
627
640
 
628
641
  # Register vectorized UDF for batch inference
629
642
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
630
- safe_id=self.id, method=inference_method)
643
+ safe_id=self._get_rand_id(), method=inference_method)
631
644
 
632
645
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
633
646
  # will try to pickle all of self which fails.
@@ -719,7 +732,7 @@ class HistGradientBoostingRegressor(BaseTransformer):
719
732
  return transformed_pandas_df.to_dict("records")
720
733
 
721
734
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
722
- safe_id=self.id
735
+ safe_id=self._get_rand_id()
723
736
  )
724
737
 
725
738
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -886,11 +899,18 @@ class HistGradientBoostingRegressor(BaseTransformer):
886
899
  Transformed dataset.
887
900
  """
888
901
  if isinstance(dataset, DataFrame):
902
+ expected_type_inferred = "float"
903
+ # when it is classifier, infer the datatype from label columns
904
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
905
+ expected_type_inferred = convert_sp_to_sf_type(
906
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
907
+ )
908
+
889
909
  output_df = self._batch_inference(
890
910
  dataset=dataset,
891
911
  inference_method="predict",
892
912
  expected_output_cols_list=self.output_cols,
893
- expected_output_cols_type="float",
913
+ expected_output_cols_type=expected_type_inferred,
894
914
  )
895
915
  elif isinstance(dataset, pd.DataFrame):
896
916
  output_df = self._sklearn_inference(
@@ -961,10 +981,10 @@ class HistGradientBoostingRegressor(BaseTransformer):
961
981
 
962
982
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
963
983
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
964
- Returns an empty list if current object is not a classifier or not yet fitted.
984
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
965
985
  """
966
986
  if getattr(self._sklearn_object, "classes_", None) is None:
967
- return []
987
+ return [output_cols_prefix]
968
988
 
969
989
  classes = self._sklearn_object.classes_
970
990
  if isinstance(classes, numpy.ndarray):
@@ -1189,7 +1209,7 @@ class HistGradientBoostingRegressor(BaseTransformer):
1189
1209
  cp.dump(self._sklearn_object, local_score_file)
1190
1210
 
1191
1211
  # Create temp stage to run score.
1192
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1212
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1193
1213
  session = dataset._session
1194
1214
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1195
1215
  SqlResultValidator(
@@ -1203,8 +1223,9 @@ class HistGradientBoostingRegressor(BaseTransformer):
1203
1223
  expected_value=f"Stage area {score_stage_name} successfully created."
1204
1224
  ).validate()
1205
1225
 
1206
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1207
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1226
+ # Use posixpath to construct stage paths
1227
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1228
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1208
1229
  statement_params = telemetry.get_function_usage_statement_params(
1209
1230
  project=_PROJECT,
1210
1231
  subproject=_SUBPROJECT,
@@ -1230,6 +1251,7 @@ class HistGradientBoostingRegressor(BaseTransformer):
1230
1251
  replace=True,
1231
1252
  session=session,
1232
1253
  statement_params=statement_params,
1254
+ anonymous=True
1233
1255
  )
1234
1256
  def score_wrapper_sproc(
1235
1257
  session: Session,
@@ -1237,7 +1259,8 @@ class HistGradientBoostingRegressor(BaseTransformer):
1237
1259
  stage_score_file_name: str,
1238
1260
  input_cols: List[str],
1239
1261
  label_cols: List[str],
1240
- sample_weight_col: Optional[str]
1262
+ sample_weight_col: Optional[str],
1263
+ statement_params: Dict[str, str]
1241
1264
  ) -> float:
1242
1265
  import cloudpickle as cp
1243
1266
  import numpy as np
@@ -1287,14 +1310,14 @@ class HistGradientBoostingRegressor(BaseTransformer):
1287
1310
  api_calls=[Session.call],
1288
1311
  custom_tags=dict([("autogen", True)]),
1289
1312
  )
1290
- score = session.call(
1291
- score_sproc_name,
1313
+ score = score_wrapper_sproc(
1314
+ session,
1292
1315
  query,
1293
1316
  stage_score_file_name,
1294
1317
  identifier.get_unescaped_names(self.input_cols),
1295
1318
  identifier.get_unescaped_names(self.label_cols),
1296
1319
  identifier.get_unescaped_names(self.sample_weight_col),
1297
- statement_params=statement_params,
1320
+ statement_params,
1298
1321
  )
1299
1322
 
1300
1323
  cleanup_temp_files([local_score_file_name])
@@ -1312,18 +1335,20 @@ class HistGradientBoostingRegressor(BaseTransformer):
1312
1335
  if self._sklearn_object._estimator_type == 'classifier':
1313
1336
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1314
1337
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1315
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1338
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1339
+ ([] if self._drop_input_cols else inputs) + outputs)
1316
1340
  # For regressor, the type of predict is float64
1317
1341
  elif self._sklearn_object._estimator_type == 'regressor':
1318
1342
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1319
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1320
-
1343
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1344
+ ([] if self._drop_input_cols else inputs) + outputs)
1321
1345
  for prob_func in PROB_FUNCTIONS:
1322
1346
  if hasattr(self, prob_func):
1323
1347
  output_cols_prefix: str = f"{prob_func}_"
1324
1348
  output_column_names = self._get_output_column_names(output_cols_prefix)
1325
1349
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1326
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1350
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1351
+ ([] if self._drop_input_cols else inputs) + outputs)
1327
1352
 
1328
1353
  @property
1329
1354
  def model_signatures(self) -> Dict[str, ModelSignature]: