snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. snowflake/ml/_internal/env_utils.py +2 -1
  2. snowflake/ml/_internal/file_utils.py +29 -7
  3. snowflake/ml/_internal/telemetry.py +5 -8
  4. snowflake/ml/_internal/utils/uri.py +7 -2
  5. snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
  6. snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
  7. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
  8. snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
  9. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
  10. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
  11. snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
  12. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
  13. snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
  14. snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
  15. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
  16. snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
  17. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
  18. snowflake/ml/model/_deployer.py +14 -27
  19. snowflake/ml/model/_env.py +4 -4
  20. snowflake/ml/model/_handlers/custom.py +14 -2
  21. snowflake/ml/model/_handlers/pytorch.py +186 -0
  22. snowflake/ml/model/_handlers/sklearn.py +14 -9
  23. snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
  24. snowflake/ml/model/_handlers/torchscript.py +180 -0
  25. snowflake/ml/model/_handlers/xgboost.py +19 -9
  26. snowflake/ml/model/_model.py +3 -2
  27. snowflake/ml/model/_model_meta.py +12 -7
  28. snowflake/ml/model/model_signature.py +446 -66
  29. snowflake/ml/model/type_hints.py +23 -4
  30. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
  31. snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
  32. snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
  33. snowflake/ml/modeling/cluster/birch.py +51 -26
  34. snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
  35. snowflake/ml/modeling/cluster/dbscan.py +51 -26
  36. snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
  37. snowflake/ml/modeling/cluster/k_means.py +51 -26
  38. snowflake/ml/modeling/cluster/mean_shift.py +51 -26
  39. snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
  40. snowflake/ml/modeling/cluster/optics.py +51 -26
  41. snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
  42. snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
  43. snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
  44. snowflake/ml/modeling/compose/column_transformer.py +51 -26
  45. snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
  46. snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
  47. snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
  48. snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
  49. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
  50. snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
  51. snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
  52. snowflake/ml/modeling/covariance/oas.py +51 -26
  53. snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
  54. snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
  55. snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
  56. snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
  57. snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
  58. snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
  59. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
  60. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
  61. snowflake/ml/modeling/decomposition/pca.py +51 -26
  62. snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
  63. snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
  64. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
  65. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
  66. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
  67. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
  68. snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
  69. snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
  70. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
  71. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
  72. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
  73. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
  74. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
  75. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
  76. snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
  77. snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
  78. snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
  79. snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
  80. snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
  81. snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
  82. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
  83. snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
  84. snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
  85. snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
  86. snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
  87. snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
  88. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
  89. snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
  90. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
  91. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
  92. snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
  93. snowflake/ml/modeling/impute/knn_imputer.py +51 -26
  94. snowflake/ml/modeling/impute/missing_indicator.py +51 -26
  95. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
  96. snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
  97. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
  98. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
  99. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
  100. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
  101. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
  102. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
  103. snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
  104. snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
  105. snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
  106. snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
  107. snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
  108. snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
  109. snowflake/ml/modeling/linear_model/lars.py +51 -26
  110. snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
  111. snowflake/ml/modeling/linear_model/lasso.py +51 -26
  112. snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
  113. snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
  114. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
  115. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
  116. snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
  117. snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
  118. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
  119. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
  120. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
  121. snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
  122. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
  123. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
  124. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
  125. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
  126. snowflake/ml/modeling/linear_model/perceptron.py +51 -26
  127. snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
  128. snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
  129. snowflake/ml/modeling/linear_model/ridge.py +51 -26
  130. snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
  131. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
  132. snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
  133. snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
  134. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
  135. snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
  136. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
  137. snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
  138. snowflake/ml/modeling/manifold/isomap.py +51 -26
  139. snowflake/ml/modeling/manifold/mds.py +51 -26
  140. snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
  141. snowflake/ml/modeling/manifold/tsne.py +51 -26
  142. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
  143. snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
  144. snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
  145. snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
  146. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
  147. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
  148. snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
  149. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
  150. snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
  151. snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
  152. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
  153. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
  154. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
  155. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
  156. snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
  157. snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
  158. snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
  159. snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
  160. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
  161. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
  162. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
  163. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
  164. snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
  165. snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
  166. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
  167. snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
  168. snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
  169. snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
  170. snowflake/ml/modeling/svm/linear_svc.py +51 -26
  171. snowflake/ml/modeling/svm/linear_svr.py +51 -26
  172. snowflake/ml/modeling/svm/nu_svc.py +51 -26
  173. snowflake/ml/modeling/svm/nu_svr.py +51 -26
  174. snowflake/ml/modeling/svm/svc.py +51 -26
  175. snowflake/ml/modeling/svm/svr.py +51 -26
  176. snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
  177. snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
  178. snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
  179. snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
  180. snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
  181. snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
  182. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
  183. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
  184. snowflake/ml/registry/model_registry.py +74 -56
  185. snowflake/ml/version.py +1 -1
  186. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
  187. snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
  188. snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
  189. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -282,7 +284,6 @@ class ExtraTreeRegressor(BaseTransformer):
282
284
  sample_weight_col: Optional[str] = None,
283
285
  ) -> None:
284
286
  super().__init__()
285
- self.id = str(uuid4()).replace("-", "_").upper()
286
287
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
287
288
 
288
289
  self._deps = list(deps)
@@ -312,6 +313,15 @@ class ExtraTreeRegressor(BaseTransformer):
312
313
  self.set_drop_input_cols(drop_input_cols)
313
314
  self.set_sample_weight_col(sample_weight_col)
314
315
 
316
+ def _get_rand_id(self) -> str:
317
+ """
318
+ Generate random id to be used in sproc and stage names.
319
+
320
+ Returns:
321
+ Random id string usable in sproc, table, and stage names.
322
+ """
323
+ return str(uuid4()).replace("-", "_").upper()
324
+
315
325
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
316
326
  """
317
327
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -390,7 +400,7 @@ class ExtraTreeRegressor(BaseTransformer):
390
400
  cp.dump(self._sklearn_object, local_transform_file)
391
401
 
392
402
  # Create temp stage to run fit.
393
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
403
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
394
404
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
395
405
  SqlResultValidator(
396
406
  session=session,
@@ -403,11 +413,12 @@ class ExtraTreeRegressor(BaseTransformer):
403
413
  expected_value=f"Stage area {transform_stage_name} successfully created."
404
414
  ).validate()
405
415
 
406
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
416
+ # Use posixpath to construct stage paths
417
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
418
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
407
419
  local_result_file_name = get_temp_file_path()
408
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
409
420
 
410
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
421
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
411
422
  statement_params = telemetry.get_function_usage_statement_params(
412
423
  project=_PROJECT,
413
424
  subproject=_SUBPROJECT,
@@ -433,6 +444,7 @@ class ExtraTreeRegressor(BaseTransformer):
433
444
  replace=True,
434
445
  session=session,
435
446
  statement_params=statement_params,
447
+ anonymous=True
436
448
  )
437
449
  def fit_wrapper_sproc(
438
450
  session: Session,
@@ -441,7 +453,8 @@ class ExtraTreeRegressor(BaseTransformer):
441
453
  stage_result_file_name: str,
442
454
  input_cols: List[str],
443
455
  label_cols: List[str],
444
- sample_weight_col: Optional[str]
456
+ sample_weight_col: Optional[str],
457
+ statement_params: Dict[str, str]
445
458
  ) -> str:
446
459
  import cloudpickle as cp
447
460
  import numpy as np
@@ -508,15 +521,15 @@ class ExtraTreeRegressor(BaseTransformer):
508
521
  api_calls=[Session.call],
509
522
  custom_tags=dict([("autogen", True)]),
510
523
  )
511
- sproc_export_file_name = session.call(
512
- fit_sproc_name,
524
+ sproc_export_file_name = fit_wrapper_sproc(
525
+ session,
513
526
  query,
514
527
  stage_transform_file_name,
515
528
  stage_result_file_name,
516
529
  identifier.get_unescaped_names(self.input_cols),
517
530
  identifier.get_unescaped_names(self.label_cols),
518
531
  identifier.get_unescaped_names(self.sample_weight_col),
519
- statement_params=statement_params,
532
+ statement_params,
520
533
  )
521
534
 
522
535
  if "|" in sproc_export_file_name:
@@ -526,7 +539,7 @@ class ExtraTreeRegressor(BaseTransformer):
526
539
  print("\n".join(fields[1:]))
527
540
 
528
541
  session.file.get(
529
- os.path.join(stage_result_file_name, sproc_export_file_name),
542
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
530
543
  local_result_file_name,
531
544
  statement_params=statement_params
532
545
  )
@@ -572,7 +585,7 @@ class ExtraTreeRegressor(BaseTransformer):
572
585
 
573
586
  # Register vectorized UDF for batch inference
574
587
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
575
- safe_id=self.id, method=inference_method)
588
+ safe_id=self._get_rand_id(), method=inference_method)
576
589
 
577
590
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
578
591
  # will try to pickle all of self which fails.
@@ -664,7 +677,7 @@ class ExtraTreeRegressor(BaseTransformer):
664
677
  return transformed_pandas_df.to_dict("records")
665
678
 
666
679
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
667
- safe_id=self.id
680
+ safe_id=self._get_rand_id()
668
681
  )
669
682
 
670
683
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -831,11 +844,18 @@ class ExtraTreeRegressor(BaseTransformer):
831
844
  Transformed dataset.
832
845
  """
833
846
  if isinstance(dataset, DataFrame):
847
+ expected_type_inferred = "float"
848
+ # when it is classifier, infer the datatype from label columns
849
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
850
+ expected_type_inferred = convert_sp_to_sf_type(
851
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
852
+ )
853
+
834
854
  output_df = self._batch_inference(
835
855
  dataset=dataset,
836
856
  inference_method="predict",
837
857
  expected_output_cols_list=self.output_cols,
838
- expected_output_cols_type="float",
858
+ expected_output_cols_type=expected_type_inferred,
839
859
  )
840
860
  elif isinstance(dataset, pd.DataFrame):
841
861
  output_df = self._sklearn_inference(
@@ -906,10 +926,10 @@ class ExtraTreeRegressor(BaseTransformer):
906
926
 
907
927
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
908
928
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
909
- Returns an empty list if current object is not a classifier or not yet fitted.
929
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
910
930
  """
911
931
  if getattr(self._sklearn_object, "classes_", None) is None:
912
- return []
932
+ return [output_cols_prefix]
913
933
 
914
934
  classes = self._sklearn_object.classes_
915
935
  if isinstance(classes, numpy.ndarray):
@@ -1134,7 +1154,7 @@ class ExtraTreeRegressor(BaseTransformer):
1134
1154
  cp.dump(self._sklearn_object, local_score_file)
1135
1155
 
1136
1156
  # Create temp stage to run score.
1137
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1157
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1138
1158
  session = dataset._session
1139
1159
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1140
1160
  SqlResultValidator(
@@ -1148,8 +1168,9 @@ class ExtraTreeRegressor(BaseTransformer):
1148
1168
  expected_value=f"Stage area {score_stage_name} successfully created."
1149
1169
  ).validate()
1150
1170
 
1151
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1152
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1171
+ # Use posixpath to construct stage paths
1172
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1173
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1153
1174
  statement_params = telemetry.get_function_usage_statement_params(
1154
1175
  project=_PROJECT,
1155
1176
  subproject=_SUBPROJECT,
@@ -1175,6 +1196,7 @@ class ExtraTreeRegressor(BaseTransformer):
1175
1196
  replace=True,
1176
1197
  session=session,
1177
1198
  statement_params=statement_params,
1199
+ anonymous=True
1178
1200
  )
1179
1201
  def score_wrapper_sproc(
1180
1202
  session: Session,
@@ -1182,7 +1204,8 @@ class ExtraTreeRegressor(BaseTransformer):
1182
1204
  stage_score_file_name: str,
1183
1205
  input_cols: List[str],
1184
1206
  label_cols: List[str],
1185
- sample_weight_col: Optional[str]
1207
+ sample_weight_col: Optional[str],
1208
+ statement_params: Dict[str, str]
1186
1209
  ) -> float:
1187
1210
  import cloudpickle as cp
1188
1211
  import numpy as np
@@ -1232,14 +1255,14 @@ class ExtraTreeRegressor(BaseTransformer):
1232
1255
  api_calls=[Session.call],
1233
1256
  custom_tags=dict([("autogen", True)]),
1234
1257
  )
1235
- score = session.call(
1236
- score_sproc_name,
1258
+ score = score_wrapper_sproc(
1259
+ session,
1237
1260
  query,
1238
1261
  stage_score_file_name,
1239
1262
  identifier.get_unescaped_names(self.input_cols),
1240
1263
  identifier.get_unescaped_names(self.label_cols),
1241
1264
  identifier.get_unescaped_names(self.sample_weight_col),
1242
- statement_params=statement_params,
1265
+ statement_params,
1243
1266
  )
1244
1267
 
1245
1268
  cleanup_temp_files([local_score_file_name])
@@ -1257,18 +1280,20 @@ class ExtraTreeRegressor(BaseTransformer):
1257
1280
  if self._sklearn_object._estimator_type == 'classifier':
1258
1281
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1259
1282
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1260
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1283
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1284
+ ([] if self._drop_input_cols else inputs) + outputs)
1261
1285
  # For regressor, the type of predict is float64
1262
1286
  elif self._sklearn_object._estimator_type == 'regressor':
1263
1287
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1264
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1265
-
1288
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1289
+ ([] if self._drop_input_cols else inputs) + outputs)
1266
1290
  for prob_func in PROB_FUNCTIONS:
1267
1291
  if hasattr(self, prob_func):
1268
1292
  output_cols_prefix: str = f"{prob_func}_"
1269
1293
  output_column_names = self._get_output_column_names(output_cols_prefix)
1270
1294
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1271
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1295
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1296
+ ([] if self._drop_input_cols else inputs) + outputs)
1272
1297
 
1273
1298
  @property
1274
1299
  def model_signatures(self) -> Dict[str, ModelSignature]:
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -26,6 +27,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
26
27
  from snowflake.snowpark import DataFrame, Session
27
28
  from snowflake.snowpark.functions import pandas_udf, sproc
28
29
  from snowflake.snowpark.types import PandasSeries
30
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
29
31
 
30
32
  from snowflake.ml.model.model_signature import (
31
33
  DataType,
@@ -390,7 +392,6 @@ class XGBClassifier(BaseTransformer):
390
392
  **kwargs,
391
393
  ) -> None:
392
394
  super().__init__()
393
- self.id = str(uuid4()).replace("-", "_").upper()
394
395
  deps: Set[str] = set([f'numpy=={np.__version__}', f'xgboost=={xgboost.__version__}', f'cloudpickle=={cp.__version__}'])
395
396
 
396
397
  self._deps = list(deps)
@@ -412,6 +413,15 @@ class XGBClassifier(BaseTransformer):
412
413
  self.set_drop_input_cols(drop_input_cols)
413
414
  self.set_sample_weight_col(sample_weight_col)
414
415
 
416
+ def _get_rand_id(self) -> str:
417
+ """
418
+ Generate random id to be used in sproc and stage names.
419
+
420
+ Returns:
421
+ Random id string usable in sproc, table, and stage names.
422
+ """
423
+ return str(uuid4()).replace("-", "_").upper()
424
+
415
425
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
416
426
  """
417
427
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -490,7 +500,7 @@ class XGBClassifier(BaseTransformer):
490
500
  cp.dump(self._sklearn_object, local_transform_file)
491
501
 
492
502
  # Create temp stage to run fit.
493
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
503
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
494
504
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
495
505
  SqlResultValidator(
496
506
  session=session,
@@ -503,11 +513,12 @@ class XGBClassifier(BaseTransformer):
503
513
  expected_value=f"Stage area {transform_stage_name} successfully created."
504
514
  ).validate()
505
515
 
506
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
516
+ # Use posixpath to construct stage paths
517
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
518
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
507
519
  local_result_file_name = get_temp_file_path()
508
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
509
520
 
510
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
521
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
511
522
  statement_params = telemetry.get_function_usage_statement_params(
512
523
  project=_PROJECT,
513
524
  subproject=_SUBPROJECT,
@@ -533,6 +544,7 @@ class XGBClassifier(BaseTransformer):
533
544
  replace=True,
534
545
  session=session,
535
546
  statement_params=statement_params,
547
+ anonymous=True
536
548
  )
537
549
  def fit_wrapper_sproc(
538
550
  session: Session,
@@ -541,7 +553,8 @@ class XGBClassifier(BaseTransformer):
541
553
  stage_result_file_name: str,
542
554
  input_cols: List[str],
543
555
  label_cols: List[str],
544
- sample_weight_col: Optional[str]
556
+ sample_weight_col: Optional[str],
557
+ statement_params: Dict[str, str]
545
558
  ) -> str:
546
559
  import cloudpickle as cp
547
560
  import numpy as np
@@ -608,15 +621,15 @@ class XGBClassifier(BaseTransformer):
608
621
  api_calls=[Session.call],
609
622
  custom_tags=dict([("autogen", True)]),
610
623
  )
611
- sproc_export_file_name = session.call(
612
- fit_sproc_name,
624
+ sproc_export_file_name = fit_wrapper_sproc(
625
+ session,
613
626
  query,
614
627
  stage_transform_file_name,
615
628
  stage_result_file_name,
616
629
  identifier.get_unescaped_names(self.input_cols),
617
630
  identifier.get_unescaped_names(self.label_cols),
618
631
  identifier.get_unescaped_names(self.sample_weight_col),
619
- statement_params=statement_params,
632
+ statement_params,
620
633
  )
621
634
 
622
635
  if "|" in sproc_export_file_name:
@@ -626,7 +639,7 @@ class XGBClassifier(BaseTransformer):
626
639
  print("\n".join(fields[1:]))
627
640
 
628
641
  session.file.get(
629
- os.path.join(stage_result_file_name, sproc_export_file_name),
642
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
630
643
  local_result_file_name,
631
644
  statement_params=statement_params
632
645
  )
@@ -672,7 +685,7 @@ class XGBClassifier(BaseTransformer):
672
685
 
673
686
  # Register vectorized UDF for batch inference
674
687
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
675
- safe_id=self.id, method=inference_method)
688
+ safe_id=self._get_rand_id(), method=inference_method)
676
689
 
677
690
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
678
691
  # will try to pickle all of self which fails.
@@ -764,7 +777,7 @@ class XGBClassifier(BaseTransformer):
764
777
  return transformed_pandas_df.to_dict("records")
765
778
 
766
779
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
767
- safe_id=self.id
780
+ safe_id=self._get_rand_id()
768
781
  )
769
782
 
770
783
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -931,11 +944,18 @@ class XGBClassifier(BaseTransformer):
931
944
  Transformed dataset.
932
945
  """
933
946
  if isinstance(dataset, DataFrame):
947
+ expected_type_inferred = ""
948
+ # when it is classifier, infer the datatype from label columns
949
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
950
+ expected_type_inferred = convert_sp_to_sf_type(
951
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
952
+ )
953
+
934
954
  output_df = self._batch_inference(
935
955
  dataset=dataset,
936
956
  inference_method="predict",
937
957
  expected_output_cols_list=self.output_cols,
938
- expected_output_cols_type="",
958
+ expected_output_cols_type=expected_type_inferred,
939
959
  )
940
960
  elif isinstance(dataset, pd.DataFrame):
941
961
  output_df = self._sklearn_inference(
@@ -1006,10 +1026,10 @@ class XGBClassifier(BaseTransformer):
1006
1026
 
1007
1027
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
1008
1028
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
1009
- Returns an empty list if current object is not a classifier or not yet fitted.
1029
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
1010
1030
  """
1011
1031
  if getattr(self._sklearn_object, "classes_", None) is None:
1012
- return []
1032
+ return [output_cols_prefix]
1013
1033
 
1014
1034
  classes = self._sklearn_object.classes_
1015
1035
  if isinstance(classes, numpy.ndarray):
@@ -1238,7 +1258,7 @@ class XGBClassifier(BaseTransformer):
1238
1258
  cp.dump(self._sklearn_object, local_score_file)
1239
1259
 
1240
1260
  # Create temp stage to run score.
1241
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1261
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1242
1262
  session = dataset._session
1243
1263
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1244
1264
  SqlResultValidator(
@@ -1252,8 +1272,9 @@ class XGBClassifier(BaseTransformer):
1252
1272
  expected_value=f"Stage area {score_stage_name} successfully created."
1253
1273
  ).validate()
1254
1274
 
1255
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1256
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1275
+ # Use posixpath to construct stage paths
1276
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1277
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1257
1278
  statement_params = telemetry.get_function_usage_statement_params(
1258
1279
  project=_PROJECT,
1259
1280
  subproject=_SUBPROJECT,
@@ -1279,6 +1300,7 @@ class XGBClassifier(BaseTransformer):
1279
1300
  replace=True,
1280
1301
  session=session,
1281
1302
  statement_params=statement_params,
1303
+ anonymous=True
1282
1304
  )
1283
1305
  def score_wrapper_sproc(
1284
1306
  session: Session,
@@ -1286,7 +1308,8 @@ class XGBClassifier(BaseTransformer):
1286
1308
  stage_score_file_name: str,
1287
1309
  input_cols: List[str],
1288
1310
  label_cols: List[str],
1289
- sample_weight_col: Optional[str]
1311
+ sample_weight_col: Optional[str],
1312
+ statement_params: Dict[str, str]
1290
1313
  ) -> float:
1291
1314
  import cloudpickle as cp
1292
1315
  import numpy as np
@@ -1336,14 +1359,14 @@ class XGBClassifier(BaseTransformer):
1336
1359
  api_calls=[Session.call],
1337
1360
  custom_tags=dict([("autogen", True)]),
1338
1361
  )
1339
- score = session.call(
1340
- score_sproc_name,
1362
+ score = score_wrapper_sproc(
1363
+ session,
1341
1364
  query,
1342
1365
  stage_score_file_name,
1343
1366
  identifier.get_unescaped_names(self.input_cols),
1344
1367
  identifier.get_unescaped_names(self.label_cols),
1345
1368
  identifier.get_unescaped_names(self.sample_weight_col),
1346
- statement_params=statement_params,
1369
+ statement_params,
1347
1370
  )
1348
1371
 
1349
1372
  cleanup_temp_files([local_score_file_name])
@@ -1361,18 +1384,20 @@ class XGBClassifier(BaseTransformer):
1361
1384
  if self._sklearn_object._estimator_type == 'classifier':
1362
1385
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1363
1386
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1364
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1387
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1388
+ ([] if self._drop_input_cols else inputs) + outputs)
1365
1389
  # For regressor, the type of predict is float64
1366
1390
  elif self._sklearn_object._estimator_type == 'regressor':
1367
1391
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1368
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1369
-
1392
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1393
+ ([] if self._drop_input_cols else inputs) + outputs)
1370
1394
  for prob_func in PROB_FUNCTIONS:
1371
1395
  if hasattr(self, prob_func):
1372
1396
  output_cols_prefix: str = f"{prob_func}_"
1373
1397
  output_column_names = self._get_output_column_names(output_cols_prefix)
1374
1398
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1375
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1399
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1400
+ ([] if self._drop_input_cols else inputs) + outputs)
1376
1401
 
1377
1402
  @property
1378
1403
  def model_signatures(self) -> Dict[str, ModelSignature]: