snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. snowflake/ml/_internal/env_utils.py +2 -1
  2. snowflake/ml/_internal/file_utils.py +29 -7
  3. snowflake/ml/_internal/telemetry.py +5 -8
  4. snowflake/ml/_internal/utils/uri.py +7 -2
  5. snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
  6. snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
  7. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
  8. snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
  9. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
  10. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
  11. snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
  12. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
  13. snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
  14. snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
  15. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
  16. snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
  17. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
  18. snowflake/ml/model/_deployer.py +14 -27
  19. snowflake/ml/model/_env.py +4 -4
  20. snowflake/ml/model/_handlers/custom.py +14 -2
  21. snowflake/ml/model/_handlers/pytorch.py +186 -0
  22. snowflake/ml/model/_handlers/sklearn.py +14 -9
  23. snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
  24. snowflake/ml/model/_handlers/torchscript.py +180 -0
  25. snowflake/ml/model/_handlers/xgboost.py +19 -9
  26. snowflake/ml/model/_model.py +3 -2
  27. snowflake/ml/model/_model_meta.py +12 -7
  28. snowflake/ml/model/model_signature.py +446 -66
  29. snowflake/ml/model/type_hints.py +23 -4
  30. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
  31. snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
  32. snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
  33. snowflake/ml/modeling/cluster/birch.py +51 -26
  34. snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
  35. snowflake/ml/modeling/cluster/dbscan.py +51 -26
  36. snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
  37. snowflake/ml/modeling/cluster/k_means.py +51 -26
  38. snowflake/ml/modeling/cluster/mean_shift.py +51 -26
  39. snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
  40. snowflake/ml/modeling/cluster/optics.py +51 -26
  41. snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
  42. snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
  43. snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
  44. snowflake/ml/modeling/compose/column_transformer.py +51 -26
  45. snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
  46. snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
  47. snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
  48. snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
  49. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
  50. snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
  51. snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
  52. snowflake/ml/modeling/covariance/oas.py +51 -26
  53. snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
  54. snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
  55. snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
  56. snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
  57. snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
  58. snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
  59. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
  60. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
  61. snowflake/ml/modeling/decomposition/pca.py +51 -26
  62. snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
  63. snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
  64. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
  65. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
  66. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
  67. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
  68. snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
  69. snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
  70. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
  71. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
  72. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
  73. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
  74. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
  75. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
  76. snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
  77. snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
  78. snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
  79. snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
  80. snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
  81. snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
  82. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
  83. snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
  84. snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
  85. snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
  86. snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
  87. snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
  88. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
  89. snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
  90. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
  91. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
  92. snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
  93. snowflake/ml/modeling/impute/knn_imputer.py +51 -26
  94. snowflake/ml/modeling/impute/missing_indicator.py +51 -26
  95. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
  96. snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
  97. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
  98. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
  99. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
  100. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
  101. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
  102. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
  103. snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
  104. snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
  105. snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
  106. snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
  107. snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
  108. snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
  109. snowflake/ml/modeling/linear_model/lars.py +51 -26
  110. snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
  111. snowflake/ml/modeling/linear_model/lasso.py +51 -26
  112. snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
  113. snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
  114. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
  115. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
  116. snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
  117. snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
  118. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
  119. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
  120. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
  121. snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
  122. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
  123. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
  124. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
  125. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
  126. snowflake/ml/modeling/linear_model/perceptron.py +51 -26
  127. snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
  128. snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
  129. snowflake/ml/modeling/linear_model/ridge.py +51 -26
  130. snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
  131. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
  132. snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
  133. snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
  134. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
  135. snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
  136. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
  137. snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
  138. snowflake/ml/modeling/manifold/isomap.py +51 -26
  139. snowflake/ml/modeling/manifold/mds.py +51 -26
  140. snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
  141. snowflake/ml/modeling/manifold/tsne.py +51 -26
  142. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
  143. snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
  144. snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
  145. snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
  146. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
  147. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
  148. snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
  149. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
  150. snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
  151. snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
  152. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
  153. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
  154. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
  155. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
  156. snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
  157. snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
  158. snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
  159. snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
  160. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
  161. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
  162. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
  163. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
  164. snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
  165. snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
  166. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
  167. snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
  168. snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
  169. snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
  170. snowflake/ml/modeling/svm/linear_svc.py +51 -26
  171. snowflake/ml/modeling/svm/linear_svr.py +51 -26
  172. snowflake/ml/modeling/svm/nu_svc.py +51 -26
  173. snowflake/ml/modeling/svm/nu_svr.py +51 -26
  174. snowflake/ml/modeling/svm/svc.py +51 -26
  175. snowflake/ml/modeling/svm/svr.py +51 -26
  176. snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
  177. snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
  178. snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
  179. snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
  180. snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
  181. snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
  182. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
  183. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
  184. snowflake/ml/registry/model_registry.py +74 -56
  185. snowflake/ml/version.py +1 -1
  186. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
  187. snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
  188. snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
  189. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -26,6 +27,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
26
27
  from snowflake.snowpark import DataFrame, Session
27
28
  from snowflake.snowpark.functions import pandas_udf, sproc
28
29
  from snowflake.snowpark.types import PandasSeries
30
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
29
31
 
30
32
  from snowflake.ml.model.model_signature import (
31
33
  DataType,
@@ -390,7 +392,6 @@ class XGBRegressor(BaseTransformer):
390
392
  **kwargs,
391
393
  ) -> None:
392
394
  super().__init__()
393
- self.id = str(uuid4()).replace("-", "_").upper()
394
395
  deps: Set[str] = set([f'numpy=={np.__version__}', f'xgboost=={xgboost.__version__}', f'cloudpickle=={cp.__version__}'])
395
396
 
396
397
  self._deps = list(deps)
@@ -411,6 +412,15 @@ class XGBRegressor(BaseTransformer):
411
412
  self.set_drop_input_cols(drop_input_cols)
412
413
  self.set_sample_weight_col(sample_weight_col)
413
414
 
415
+ def _get_rand_id(self) -> str:
416
+ """
417
+ Generate random id to be used in sproc and stage names.
418
+
419
+ Returns:
420
+ Random id string usable in sproc, table, and stage names.
421
+ """
422
+ return str(uuid4()).replace("-", "_").upper()
423
+
414
424
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
415
425
  """
416
426
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -489,7 +499,7 @@ class XGBRegressor(BaseTransformer):
489
499
  cp.dump(self._sklearn_object, local_transform_file)
490
500
 
491
501
  # Create temp stage to run fit.
492
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
502
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
493
503
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
494
504
  SqlResultValidator(
495
505
  session=session,
@@ -502,11 +512,12 @@ class XGBRegressor(BaseTransformer):
502
512
  expected_value=f"Stage area {transform_stage_name} successfully created."
503
513
  ).validate()
504
514
 
505
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
515
+ # Use posixpath to construct stage paths
516
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
517
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
506
518
  local_result_file_name = get_temp_file_path()
507
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
508
519
 
509
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
520
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
510
521
  statement_params = telemetry.get_function_usage_statement_params(
511
522
  project=_PROJECT,
512
523
  subproject=_SUBPROJECT,
@@ -532,6 +543,7 @@ class XGBRegressor(BaseTransformer):
532
543
  replace=True,
533
544
  session=session,
534
545
  statement_params=statement_params,
546
+ anonymous=True
535
547
  )
536
548
  def fit_wrapper_sproc(
537
549
  session: Session,
@@ -540,7 +552,8 @@ class XGBRegressor(BaseTransformer):
540
552
  stage_result_file_name: str,
541
553
  input_cols: List[str],
542
554
  label_cols: List[str],
543
- sample_weight_col: Optional[str]
555
+ sample_weight_col: Optional[str],
556
+ statement_params: Dict[str, str]
544
557
  ) -> str:
545
558
  import cloudpickle as cp
546
559
  import numpy as np
@@ -607,15 +620,15 @@ class XGBRegressor(BaseTransformer):
607
620
  api_calls=[Session.call],
608
621
  custom_tags=dict([("autogen", True)]),
609
622
  )
610
- sproc_export_file_name = session.call(
611
- fit_sproc_name,
623
+ sproc_export_file_name = fit_wrapper_sproc(
624
+ session,
612
625
  query,
613
626
  stage_transform_file_name,
614
627
  stage_result_file_name,
615
628
  identifier.get_unescaped_names(self.input_cols),
616
629
  identifier.get_unescaped_names(self.label_cols),
617
630
  identifier.get_unescaped_names(self.sample_weight_col),
618
- statement_params=statement_params,
631
+ statement_params,
619
632
  )
620
633
 
621
634
  if "|" in sproc_export_file_name:
@@ -625,7 +638,7 @@ class XGBRegressor(BaseTransformer):
625
638
  print("\n".join(fields[1:]))
626
639
 
627
640
  session.file.get(
628
- os.path.join(stage_result_file_name, sproc_export_file_name),
641
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
629
642
  local_result_file_name,
630
643
  statement_params=statement_params
631
644
  )
@@ -671,7 +684,7 @@ class XGBRegressor(BaseTransformer):
671
684
 
672
685
  # Register vectorized UDF for batch inference
673
686
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
674
- safe_id=self.id, method=inference_method)
687
+ safe_id=self._get_rand_id(), method=inference_method)
675
688
 
676
689
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
677
690
  # will try to pickle all of self which fails.
@@ -763,7 +776,7 @@ class XGBRegressor(BaseTransformer):
763
776
  return transformed_pandas_df.to_dict("records")
764
777
 
765
778
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
766
- safe_id=self.id
779
+ safe_id=self._get_rand_id()
767
780
  )
768
781
 
769
782
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -930,11 +943,18 @@ class XGBRegressor(BaseTransformer):
930
943
  Transformed dataset.
931
944
  """
932
945
  if isinstance(dataset, DataFrame):
946
+ expected_type_inferred = "float"
947
+ # when it is classifier, infer the datatype from label columns
948
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
949
+ expected_type_inferred = convert_sp_to_sf_type(
950
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
951
+ )
952
+
933
953
  output_df = self._batch_inference(
934
954
  dataset=dataset,
935
955
  inference_method="predict",
936
956
  expected_output_cols_list=self.output_cols,
937
- expected_output_cols_type="float",
957
+ expected_output_cols_type=expected_type_inferred,
938
958
  )
939
959
  elif isinstance(dataset, pd.DataFrame):
940
960
  output_df = self._sklearn_inference(
@@ -1005,10 +1025,10 @@ class XGBRegressor(BaseTransformer):
1005
1025
 
1006
1026
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
1007
1027
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
1008
- Returns an empty list if current object is not a classifier or not yet fitted.
1028
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
1009
1029
  """
1010
1030
  if getattr(self._sklearn_object, "classes_", None) is None:
1011
- return []
1031
+ return [output_cols_prefix]
1012
1032
 
1013
1033
  classes = self._sklearn_object.classes_
1014
1034
  if isinstance(classes, numpy.ndarray):
@@ -1233,7 +1253,7 @@ class XGBRegressor(BaseTransformer):
1233
1253
  cp.dump(self._sklearn_object, local_score_file)
1234
1254
 
1235
1255
  # Create temp stage to run score.
1236
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1256
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1237
1257
  session = dataset._session
1238
1258
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1239
1259
  SqlResultValidator(
@@ -1247,8 +1267,9 @@ class XGBRegressor(BaseTransformer):
1247
1267
  expected_value=f"Stage area {score_stage_name} successfully created."
1248
1268
  ).validate()
1249
1269
 
1250
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1251
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1270
+ # Use posixpath to construct stage paths
1271
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1272
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1252
1273
  statement_params = telemetry.get_function_usage_statement_params(
1253
1274
  project=_PROJECT,
1254
1275
  subproject=_SUBPROJECT,
@@ -1274,6 +1295,7 @@ class XGBRegressor(BaseTransformer):
1274
1295
  replace=True,
1275
1296
  session=session,
1276
1297
  statement_params=statement_params,
1298
+ anonymous=True
1277
1299
  )
1278
1300
  def score_wrapper_sproc(
1279
1301
  session: Session,
@@ -1281,7 +1303,8 @@ class XGBRegressor(BaseTransformer):
1281
1303
  stage_score_file_name: str,
1282
1304
  input_cols: List[str],
1283
1305
  label_cols: List[str],
1284
- sample_weight_col: Optional[str]
1306
+ sample_weight_col: Optional[str],
1307
+ statement_params: Dict[str, str]
1285
1308
  ) -> float:
1286
1309
  import cloudpickle as cp
1287
1310
  import numpy as np
@@ -1331,14 +1354,14 @@ class XGBRegressor(BaseTransformer):
1331
1354
  api_calls=[Session.call],
1332
1355
  custom_tags=dict([("autogen", True)]),
1333
1356
  )
1334
- score = session.call(
1335
- score_sproc_name,
1357
+ score = score_wrapper_sproc(
1358
+ session,
1336
1359
  query,
1337
1360
  stage_score_file_name,
1338
1361
  identifier.get_unescaped_names(self.input_cols),
1339
1362
  identifier.get_unescaped_names(self.label_cols),
1340
1363
  identifier.get_unescaped_names(self.sample_weight_col),
1341
- statement_params=statement_params,
1364
+ statement_params,
1342
1365
  )
1343
1366
 
1344
1367
  cleanup_temp_files([local_score_file_name])
@@ -1356,18 +1379,20 @@ class XGBRegressor(BaseTransformer):
1356
1379
  if self._sklearn_object._estimator_type == 'classifier':
1357
1380
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1358
1381
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1359
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1382
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1383
+ ([] if self._drop_input_cols else inputs) + outputs)
1360
1384
  # For regressor, the type of predict is float64
1361
1385
  elif self._sklearn_object._estimator_type == 'regressor':
1362
1386
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1363
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1364
-
1387
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1388
+ ([] if self._drop_input_cols else inputs) + outputs)
1365
1389
  for prob_func in PROB_FUNCTIONS:
1366
1390
  if hasattr(self, prob_func):
1367
1391
  output_cols_prefix: str = f"{prob_func}_"
1368
1392
  output_column_names = self._get_output_column_names(output_cols_prefix)
1369
1393
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1370
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1394
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1395
+ ([] if self._drop_input_cols else inputs) + outputs)
1371
1396
 
1372
1397
  @property
1373
1398
  def model_signatures(self) -> Dict[str, ModelSignature]:
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -26,6 +27,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
26
27
  from snowflake.snowpark import DataFrame, Session
27
28
  from snowflake.snowpark.functions import pandas_udf, sproc
28
29
  from snowflake.snowpark.types import PandasSeries
30
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
29
31
 
30
32
  from snowflake.ml.model.model_signature import (
31
33
  DataType,
@@ -392,7 +394,6 @@ class XGBRFClassifier(BaseTransformer):
392
394
  **kwargs,
393
395
  ) -> None:
394
396
  super().__init__()
395
- self.id = str(uuid4()).replace("-", "_").upper()
396
397
  deps: Set[str] = set([f'numpy=={np.__version__}', f'xgboost=={xgboost.__version__}', f'cloudpickle=={cp.__version__}'])
397
398
 
398
399
  self._deps = list(deps)
@@ -416,6 +417,15 @@ class XGBRFClassifier(BaseTransformer):
416
417
  self.set_drop_input_cols(drop_input_cols)
417
418
  self.set_sample_weight_col(sample_weight_col)
418
419
 
420
+ def _get_rand_id(self) -> str:
421
+ """
422
+ Generate random id to be used in sproc and stage names.
423
+
424
+ Returns:
425
+ Random id string usable in sproc, table, and stage names.
426
+ """
427
+ return str(uuid4()).replace("-", "_").upper()
428
+
419
429
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
420
430
  """
421
431
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -494,7 +504,7 @@ class XGBRFClassifier(BaseTransformer):
494
504
  cp.dump(self._sklearn_object, local_transform_file)
495
505
 
496
506
  # Create temp stage to run fit.
497
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
507
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
498
508
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
499
509
  SqlResultValidator(
500
510
  session=session,
@@ -507,11 +517,12 @@ class XGBRFClassifier(BaseTransformer):
507
517
  expected_value=f"Stage area {transform_stage_name} successfully created."
508
518
  ).validate()
509
519
 
510
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
520
+ # Use posixpath to construct stage paths
521
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
522
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
511
523
  local_result_file_name = get_temp_file_path()
512
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
513
524
 
514
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
525
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
515
526
  statement_params = telemetry.get_function_usage_statement_params(
516
527
  project=_PROJECT,
517
528
  subproject=_SUBPROJECT,
@@ -537,6 +548,7 @@ class XGBRFClassifier(BaseTransformer):
537
548
  replace=True,
538
549
  session=session,
539
550
  statement_params=statement_params,
551
+ anonymous=True
540
552
  )
541
553
  def fit_wrapper_sproc(
542
554
  session: Session,
@@ -545,7 +557,8 @@ class XGBRFClassifier(BaseTransformer):
545
557
  stage_result_file_name: str,
546
558
  input_cols: List[str],
547
559
  label_cols: List[str],
548
- sample_weight_col: Optional[str]
560
+ sample_weight_col: Optional[str],
561
+ statement_params: Dict[str, str]
549
562
  ) -> str:
550
563
  import cloudpickle as cp
551
564
  import numpy as np
@@ -612,15 +625,15 @@ class XGBRFClassifier(BaseTransformer):
612
625
  api_calls=[Session.call],
613
626
  custom_tags=dict([("autogen", True)]),
614
627
  )
615
- sproc_export_file_name = session.call(
616
- fit_sproc_name,
628
+ sproc_export_file_name = fit_wrapper_sproc(
629
+ session,
617
630
  query,
618
631
  stage_transform_file_name,
619
632
  stage_result_file_name,
620
633
  identifier.get_unescaped_names(self.input_cols),
621
634
  identifier.get_unescaped_names(self.label_cols),
622
635
  identifier.get_unescaped_names(self.sample_weight_col),
623
- statement_params=statement_params,
636
+ statement_params,
624
637
  )
625
638
 
626
639
  if "|" in sproc_export_file_name:
@@ -630,7 +643,7 @@ class XGBRFClassifier(BaseTransformer):
630
643
  print("\n".join(fields[1:]))
631
644
 
632
645
  session.file.get(
633
- os.path.join(stage_result_file_name, sproc_export_file_name),
646
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
634
647
  local_result_file_name,
635
648
  statement_params=statement_params
636
649
  )
@@ -676,7 +689,7 @@ class XGBRFClassifier(BaseTransformer):
676
689
 
677
690
  # Register vectorized UDF for batch inference
678
691
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
679
- safe_id=self.id, method=inference_method)
692
+ safe_id=self._get_rand_id(), method=inference_method)
680
693
 
681
694
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
682
695
  # will try to pickle all of self which fails.
@@ -768,7 +781,7 @@ class XGBRFClassifier(BaseTransformer):
768
781
  return transformed_pandas_df.to_dict("records")
769
782
 
770
783
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
771
- safe_id=self.id
784
+ safe_id=self._get_rand_id()
772
785
  )
773
786
 
774
787
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -935,11 +948,18 @@ class XGBRFClassifier(BaseTransformer):
935
948
  Transformed dataset.
936
949
  """
937
950
  if isinstance(dataset, DataFrame):
951
+ expected_type_inferred = ""
952
+ # when it is classifier, infer the datatype from label columns
953
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
954
+ expected_type_inferred = convert_sp_to_sf_type(
955
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
956
+ )
957
+
938
958
  output_df = self._batch_inference(
939
959
  dataset=dataset,
940
960
  inference_method="predict",
941
961
  expected_output_cols_list=self.output_cols,
942
- expected_output_cols_type="",
962
+ expected_output_cols_type=expected_type_inferred,
943
963
  )
944
964
  elif isinstance(dataset, pd.DataFrame):
945
965
  output_df = self._sklearn_inference(
@@ -1010,10 +1030,10 @@ class XGBRFClassifier(BaseTransformer):
1010
1030
 
1011
1031
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
1012
1032
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
1013
- Returns an empty list if current object is not a classifier or not yet fitted.
1033
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
1014
1034
  """
1015
1035
  if getattr(self._sklearn_object, "classes_", None) is None:
1016
- return []
1036
+ return [output_cols_prefix]
1017
1037
 
1018
1038
  classes = self._sklearn_object.classes_
1019
1039
  if isinstance(classes, numpy.ndarray):
@@ -1242,7 +1262,7 @@ class XGBRFClassifier(BaseTransformer):
1242
1262
  cp.dump(self._sklearn_object, local_score_file)
1243
1263
 
1244
1264
  # Create temp stage to run score.
1245
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1265
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1246
1266
  session = dataset._session
1247
1267
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1248
1268
  SqlResultValidator(
@@ -1256,8 +1276,9 @@ class XGBRFClassifier(BaseTransformer):
1256
1276
  expected_value=f"Stage area {score_stage_name} successfully created."
1257
1277
  ).validate()
1258
1278
 
1259
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1260
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1279
+ # Use posixpath to construct stage paths
1280
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1281
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1261
1282
  statement_params = telemetry.get_function_usage_statement_params(
1262
1283
  project=_PROJECT,
1263
1284
  subproject=_SUBPROJECT,
@@ -1283,6 +1304,7 @@ class XGBRFClassifier(BaseTransformer):
1283
1304
  replace=True,
1284
1305
  session=session,
1285
1306
  statement_params=statement_params,
1307
+ anonymous=True
1286
1308
  )
1287
1309
  def score_wrapper_sproc(
1288
1310
  session: Session,
@@ -1290,7 +1312,8 @@ class XGBRFClassifier(BaseTransformer):
1290
1312
  stage_score_file_name: str,
1291
1313
  input_cols: List[str],
1292
1314
  label_cols: List[str],
1293
- sample_weight_col: Optional[str]
1315
+ sample_weight_col: Optional[str],
1316
+ statement_params: Dict[str, str]
1294
1317
  ) -> float:
1295
1318
  import cloudpickle as cp
1296
1319
  import numpy as np
@@ -1340,14 +1363,14 @@ class XGBRFClassifier(BaseTransformer):
1340
1363
  api_calls=[Session.call],
1341
1364
  custom_tags=dict([("autogen", True)]),
1342
1365
  )
1343
- score = session.call(
1344
- score_sproc_name,
1366
+ score = score_wrapper_sproc(
1367
+ session,
1345
1368
  query,
1346
1369
  stage_score_file_name,
1347
1370
  identifier.get_unescaped_names(self.input_cols),
1348
1371
  identifier.get_unescaped_names(self.label_cols),
1349
1372
  identifier.get_unescaped_names(self.sample_weight_col),
1350
- statement_params=statement_params,
1373
+ statement_params,
1351
1374
  )
1352
1375
 
1353
1376
  cleanup_temp_files([local_score_file_name])
@@ -1365,18 +1388,20 @@ class XGBRFClassifier(BaseTransformer):
1365
1388
  if self._sklearn_object._estimator_type == 'classifier':
1366
1389
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1367
1390
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1368
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1391
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1392
+ ([] if self._drop_input_cols else inputs) + outputs)
1369
1393
  # For regressor, the type of predict is float64
1370
1394
  elif self._sklearn_object._estimator_type == 'regressor':
1371
1395
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1372
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1373
-
1396
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1397
+ ([] if self._drop_input_cols else inputs) + outputs)
1374
1398
  for prob_func in PROB_FUNCTIONS:
1375
1399
  if hasattr(self, prob_func):
1376
1400
  output_cols_prefix: str = f"{prob_func}_"
1377
1401
  output_column_names = self._get_output_column_names(output_cols_prefix)
1378
1402
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1379
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1403
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1404
+ ([] if self._drop_input_cols else inputs) + outputs)
1380
1405
 
1381
1406
  @property
1382
1407
  def model_signatures(self) -> Dict[str, ModelSignature]: