snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. snowflake/ml/_internal/env_utils.py +2 -1
  2. snowflake/ml/_internal/file_utils.py +29 -7
  3. snowflake/ml/_internal/telemetry.py +5 -8
  4. snowflake/ml/_internal/utils/uri.py +7 -2
  5. snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
  6. snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
  7. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
  8. snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
  9. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
  10. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
  11. snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
  12. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
  13. snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
  14. snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
  15. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
  16. snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
  17. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
  18. snowflake/ml/model/_deployer.py +14 -27
  19. snowflake/ml/model/_env.py +4 -4
  20. snowflake/ml/model/_handlers/custom.py +14 -2
  21. snowflake/ml/model/_handlers/pytorch.py +186 -0
  22. snowflake/ml/model/_handlers/sklearn.py +14 -9
  23. snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
  24. snowflake/ml/model/_handlers/torchscript.py +180 -0
  25. snowflake/ml/model/_handlers/xgboost.py +19 -9
  26. snowflake/ml/model/_model.py +3 -2
  27. snowflake/ml/model/_model_meta.py +12 -7
  28. snowflake/ml/model/model_signature.py +446 -66
  29. snowflake/ml/model/type_hints.py +23 -4
  30. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
  31. snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
  32. snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
  33. snowflake/ml/modeling/cluster/birch.py +51 -26
  34. snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
  35. snowflake/ml/modeling/cluster/dbscan.py +51 -26
  36. snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
  37. snowflake/ml/modeling/cluster/k_means.py +51 -26
  38. snowflake/ml/modeling/cluster/mean_shift.py +51 -26
  39. snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
  40. snowflake/ml/modeling/cluster/optics.py +51 -26
  41. snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
  42. snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
  43. snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
  44. snowflake/ml/modeling/compose/column_transformer.py +51 -26
  45. snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
  46. snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
  47. snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
  48. snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
  49. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
  50. snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
  51. snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
  52. snowflake/ml/modeling/covariance/oas.py +51 -26
  53. snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
  54. snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
  55. snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
  56. snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
  57. snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
  58. snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
  59. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
  60. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
  61. snowflake/ml/modeling/decomposition/pca.py +51 -26
  62. snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
  63. snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
  64. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
  65. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
  66. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
  67. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
  68. snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
  69. snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
  70. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
  71. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
  72. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
  73. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
  74. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
  75. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
  76. snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
  77. snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
  78. snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
  79. snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
  80. snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
  81. snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
  82. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
  83. snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
  84. snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
  85. snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
  86. snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
  87. snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
  88. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
  89. snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
  90. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
  91. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
  92. snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
  93. snowflake/ml/modeling/impute/knn_imputer.py +51 -26
  94. snowflake/ml/modeling/impute/missing_indicator.py +51 -26
  95. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
  96. snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
  97. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
  98. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
  99. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
  100. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
  101. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
  102. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
  103. snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
  104. snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
  105. snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
  106. snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
  107. snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
  108. snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
  109. snowflake/ml/modeling/linear_model/lars.py +51 -26
  110. snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
  111. snowflake/ml/modeling/linear_model/lasso.py +51 -26
  112. snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
  113. snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
  114. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
  115. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
  116. snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
  117. snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
  118. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
  119. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
  120. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
  121. snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
  122. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
  123. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
  124. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
  125. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
  126. snowflake/ml/modeling/linear_model/perceptron.py +51 -26
  127. snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
  128. snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
  129. snowflake/ml/modeling/linear_model/ridge.py +51 -26
  130. snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
  131. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
  132. snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
  133. snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
  134. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
  135. snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
  136. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
  137. snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
  138. snowflake/ml/modeling/manifold/isomap.py +51 -26
  139. snowflake/ml/modeling/manifold/mds.py +51 -26
  140. snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
  141. snowflake/ml/modeling/manifold/tsne.py +51 -26
  142. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
  143. snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
  144. snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
  145. snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
  146. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
  147. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
  148. snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
  149. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
  150. snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
  151. snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
  152. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
  153. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
  154. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
  155. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
  156. snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
  157. snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
  158. snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
  159. snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
  160. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
  161. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
  162. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
  163. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
  164. snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
  165. snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
  166. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
  167. snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
  168. snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
  169. snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
  170. snowflake/ml/modeling/svm/linear_svc.py +51 -26
  171. snowflake/ml/modeling/svm/linear_svr.py +51 -26
  172. snowflake/ml/modeling/svm/nu_svc.py +51 -26
  173. snowflake/ml/modeling/svm/nu_svr.py +51 -26
  174. snowflake/ml/modeling/svm/svc.py +51 -26
  175. snowflake/ml/modeling/svm/svr.py +51 -26
  176. snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
  177. snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
  178. snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
  179. snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
  180. snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
  181. snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
  182. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
  183. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
  184. snowflake/ml/registry/model_registry.py +74 -56
  185. snowflake/ml/version.py +1 -1
  186. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
  187. snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
  188. snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
  189. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -240,7 +242,6 @@ class SVR(BaseTransformer):
240
242
  sample_weight_col: Optional[str] = None,
241
243
  ) -> None:
242
244
  super().__init__()
243
- self.id = str(uuid4()).replace("-", "_").upper()
244
245
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
245
246
 
246
247
  self._deps = list(deps)
@@ -270,6 +271,15 @@ class SVR(BaseTransformer):
270
271
  self.set_drop_input_cols(drop_input_cols)
271
272
  self.set_sample_weight_col(sample_weight_col)
272
273
 
274
+ def _get_rand_id(self) -> str:
275
+ """
276
+ Generate random id to be used in sproc and stage names.
277
+
278
+ Returns:
279
+ Random id string usable in sproc, table, and stage names.
280
+ """
281
+ return str(uuid4()).replace("-", "_").upper()
282
+
273
283
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
274
284
  """
275
285
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -348,7 +358,7 @@ class SVR(BaseTransformer):
348
358
  cp.dump(self._sklearn_object, local_transform_file)
349
359
 
350
360
  # Create temp stage to run fit.
351
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
361
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
352
362
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
353
363
  SqlResultValidator(
354
364
  session=session,
@@ -361,11 +371,12 @@ class SVR(BaseTransformer):
361
371
  expected_value=f"Stage area {transform_stage_name} successfully created."
362
372
  ).validate()
363
373
 
364
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
374
+ # Use posixpath to construct stage paths
375
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
376
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
365
377
  local_result_file_name = get_temp_file_path()
366
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
367
378
 
368
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
379
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
369
380
  statement_params = telemetry.get_function_usage_statement_params(
370
381
  project=_PROJECT,
371
382
  subproject=_SUBPROJECT,
@@ -391,6 +402,7 @@ class SVR(BaseTransformer):
391
402
  replace=True,
392
403
  session=session,
393
404
  statement_params=statement_params,
405
+ anonymous=True
394
406
  )
395
407
  def fit_wrapper_sproc(
396
408
  session: Session,
@@ -399,7 +411,8 @@ class SVR(BaseTransformer):
399
411
  stage_result_file_name: str,
400
412
  input_cols: List[str],
401
413
  label_cols: List[str],
402
- sample_weight_col: Optional[str]
414
+ sample_weight_col: Optional[str],
415
+ statement_params: Dict[str, str]
403
416
  ) -> str:
404
417
  import cloudpickle as cp
405
418
  import numpy as np
@@ -466,15 +479,15 @@ class SVR(BaseTransformer):
466
479
  api_calls=[Session.call],
467
480
  custom_tags=dict([("autogen", True)]),
468
481
  )
469
- sproc_export_file_name = session.call(
470
- fit_sproc_name,
482
+ sproc_export_file_name = fit_wrapper_sproc(
483
+ session,
471
484
  query,
472
485
  stage_transform_file_name,
473
486
  stage_result_file_name,
474
487
  identifier.get_unescaped_names(self.input_cols),
475
488
  identifier.get_unescaped_names(self.label_cols),
476
489
  identifier.get_unescaped_names(self.sample_weight_col),
477
- statement_params=statement_params,
490
+ statement_params,
478
491
  )
479
492
 
480
493
  if "|" in sproc_export_file_name:
@@ -484,7 +497,7 @@ class SVR(BaseTransformer):
484
497
  print("\n".join(fields[1:]))
485
498
 
486
499
  session.file.get(
487
- os.path.join(stage_result_file_name, sproc_export_file_name),
500
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
488
501
  local_result_file_name,
489
502
  statement_params=statement_params
490
503
  )
@@ -530,7 +543,7 @@ class SVR(BaseTransformer):
530
543
 
531
544
  # Register vectorized UDF for batch inference
532
545
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
533
- safe_id=self.id, method=inference_method)
546
+ safe_id=self._get_rand_id(), method=inference_method)
534
547
 
535
548
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
536
549
  # will try to pickle all of self which fails.
@@ -622,7 +635,7 @@ class SVR(BaseTransformer):
622
635
  return transformed_pandas_df.to_dict("records")
623
636
 
624
637
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
625
- safe_id=self.id
638
+ safe_id=self._get_rand_id()
626
639
  )
627
640
 
628
641
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -789,11 +802,18 @@ class SVR(BaseTransformer):
789
802
  Transformed dataset.
790
803
  """
791
804
  if isinstance(dataset, DataFrame):
805
+ expected_type_inferred = "float"
806
+ # when it is classifier, infer the datatype from label columns
807
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
808
+ expected_type_inferred = convert_sp_to_sf_type(
809
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
810
+ )
811
+
792
812
  output_df = self._batch_inference(
793
813
  dataset=dataset,
794
814
  inference_method="predict",
795
815
  expected_output_cols_list=self.output_cols,
796
- expected_output_cols_type="float",
816
+ expected_output_cols_type=expected_type_inferred,
797
817
  )
798
818
  elif isinstance(dataset, pd.DataFrame):
799
819
  output_df = self._sklearn_inference(
@@ -864,10 +884,10 @@ class SVR(BaseTransformer):
864
884
 
865
885
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
866
886
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
867
- Returns an empty list if current object is not a classifier or not yet fitted.
887
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
868
888
  """
869
889
  if getattr(self._sklearn_object, "classes_", None) is None:
870
- return []
890
+ return [output_cols_prefix]
871
891
 
872
892
  classes = self._sklearn_object.classes_
873
893
  if isinstance(classes, numpy.ndarray):
@@ -1092,7 +1112,7 @@ class SVR(BaseTransformer):
1092
1112
  cp.dump(self._sklearn_object, local_score_file)
1093
1113
 
1094
1114
  # Create temp stage to run score.
1095
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1115
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1096
1116
  session = dataset._session
1097
1117
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1098
1118
  SqlResultValidator(
@@ -1106,8 +1126,9 @@ class SVR(BaseTransformer):
1106
1126
  expected_value=f"Stage area {score_stage_name} successfully created."
1107
1127
  ).validate()
1108
1128
 
1109
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1110
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1129
+ # Use posixpath to construct stage paths
1130
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1131
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1111
1132
  statement_params = telemetry.get_function_usage_statement_params(
1112
1133
  project=_PROJECT,
1113
1134
  subproject=_SUBPROJECT,
@@ -1133,6 +1154,7 @@ class SVR(BaseTransformer):
1133
1154
  replace=True,
1134
1155
  session=session,
1135
1156
  statement_params=statement_params,
1157
+ anonymous=True
1136
1158
  )
1137
1159
  def score_wrapper_sproc(
1138
1160
  session: Session,
@@ -1140,7 +1162,8 @@ class SVR(BaseTransformer):
1140
1162
  stage_score_file_name: str,
1141
1163
  input_cols: List[str],
1142
1164
  label_cols: List[str],
1143
- sample_weight_col: Optional[str]
1165
+ sample_weight_col: Optional[str],
1166
+ statement_params: Dict[str, str]
1144
1167
  ) -> float:
1145
1168
  import cloudpickle as cp
1146
1169
  import numpy as np
@@ -1190,14 +1213,14 @@ class SVR(BaseTransformer):
1190
1213
  api_calls=[Session.call],
1191
1214
  custom_tags=dict([("autogen", True)]),
1192
1215
  )
1193
- score = session.call(
1194
- score_sproc_name,
1216
+ score = score_wrapper_sproc(
1217
+ session,
1195
1218
  query,
1196
1219
  stage_score_file_name,
1197
1220
  identifier.get_unescaped_names(self.input_cols),
1198
1221
  identifier.get_unescaped_names(self.label_cols),
1199
1222
  identifier.get_unescaped_names(self.sample_weight_col),
1200
- statement_params=statement_params,
1223
+ statement_params,
1201
1224
  )
1202
1225
 
1203
1226
  cleanup_temp_files([local_score_file_name])
@@ -1215,18 +1238,20 @@ class SVR(BaseTransformer):
1215
1238
  if self._sklearn_object._estimator_type == 'classifier':
1216
1239
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1217
1240
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1218
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1241
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1242
+ ([] if self._drop_input_cols else inputs) + outputs)
1219
1243
  # For regressor, the type of predict is float64
1220
1244
  elif self._sklearn_object._estimator_type == 'regressor':
1221
1245
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1222
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1223
-
1246
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1247
+ ([] if self._drop_input_cols else inputs) + outputs)
1224
1248
  for prob_func in PROB_FUNCTIONS:
1225
1249
  if hasattr(self, prob_func):
1226
1250
  output_cols_prefix: str = f"{prob_func}_"
1227
1251
  output_column_names = self._get_output_column_names(output_cols_prefix)
1228
1252
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1229
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1253
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1254
+ ([] if self._drop_input_cols else inputs) + outputs)
1230
1255
 
1231
1256
  @property
1232
1257
  def model_signatures(self) -> Dict[str, ModelSignature]:
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -307,7 +309,6 @@ class DecisionTreeClassifier(BaseTransformer):
307
309
  sample_weight_col: Optional[str] = None,
308
310
  ) -> None:
309
311
  super().__init__()
310
- self.id = str(uuid4()).replace("-", "_").upper()
311
312
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
312
313
 
313
314
  self._deps = list(deps)
@@ -338,6 +339,15 @@ class DecisionTreeClassifier(BaseTransformer):
338
339
  self.set_drop_input_cols(drop_input_cols)
339
340
  self.set_sample_weight_col(sample_weight_col)
340
341
 
342
+ def _get_rand_id(self) -> str:
343
+ """
344
+ Generate random id to be used in sproc and stage names.
345
+
346
+ Returns:
347
+ Random id string usable in sproc, table, and stage names.
348
+ """
349
+ return str(uuid4()).replace("-", "_").upper()
350
+
341
351
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
342
352
  """
343
353
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -416,7 +426,7 @@ class DecisionTreeClassifier(BaseTransformer):
416
426
  cp.dump(self._sklearn_object, local_transform_file)
417
427
 
418
428
  # Create temp stage to run fit.
419
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
429
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
420
430
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
421
431
  SqlResultValidator(
422
432
  session=session,
@@ -429,11 +439,12 @@ class DecisionTreeClassifier(BaseTransformer):
429
439
  expected_value=f"Stage area {transform_stage_name} successfully created."
430
440
  ).validate()
431
441
 
432
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
442
+ # Use posixpath to construct stage paths
443
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
444
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
433
445
  local_result_file_name = get_temp_file_path()
434
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
435
446
 
436
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
447
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
437
448
  statement_params = telemetry.get_function_usage_statement_params(
438
449
  project=_PROJECT,
439
450
  subproject=_SUBPROJECT,
@@ -459,6 +470,7 @@ class DecisionTreeClassifier(BaseTransformer):
459
470
  replace=True,
460
471
  session=session,
461
472
  statement_params=statement_params,
473
+ anonymous=True
462
474
  )
463
475
  def fit_wrapper_sproc(
464
476
  session: Session,
@@ -467,7 +479,8 @@ class DecisionTreeClassifier(BaseTransformer):
467
479
  stage_result_file_name: str,
468
480
  input_cols: List[str],
469
481
  label_cols: List[str],
470
- sample_weight_col: Optional[str]
482
+ sample_weight_col: Optional[str],
483
+ statement_params: Dict[str, str]
471
484
  ) -> str:
472
485
  import cloudpickle as cp
473
486
  import numpy as np
@@ -534,15 +547,15 @@ class DecisionTreeClassifier(BaseTransformer):
534
547
  api_calls=[Session.call],
535
548
  custom_tags=dict([("autogen", True)]),
536
549
  )
537
- sproc_export_file_name = session.call(
538
- fit_sproc_name,
550
+ sproc_export_file_name = fit_wrapper_sproc(
551
+ session,
539
552
  query,
540
553
  stage_transform_file_name,
541
554
  stage_result_file_name,
542
555
  identifier.get_unescaped_names(self.input_cols),
543
556
  identifier.get_unescaped_names(self.label_cols),
544
557
  identifier.get_unescaped_names(self.sample_weight_col),
545
- statement_params=statement_params,
558
+ statement_params,
546
559
  )
547
560
 
548
561
  if "|" in sproc_export_file_name:
@@ -552,7 +565,7 @@ class DecisionTreeClassifier(BaseTransformer):
552
565
  print("\n".join(fields[1:]))
553
566
 
554
567
  session.file.get(
555
- os.path.join(stage_result_file_name, sproc_export_file_name),
568
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
556
569
  local_result_file_name,
557
570
  statement_params=statement_params
558
571
  )
@@ -598,7 +611,7 @@ class DecisionTreeClassifier(BaseTransformer):
598
611
 
599
612
  # Register vectorized UDF for batch inference
600
613
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
601
- safe_id=self.id, method=inference_method)
614
+ safe_id=self._get_rand_id(), method=inference_method)
602
615
 
603
616
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
604
617
  # will try to pickle all of self which fails.
@@ -690,7 +703,7 @@ class DecisionTreeClassifier(BaseTransformer):
690
703
  return transformed_pandas_df.to_dict("records")
691
704
 
692
705
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
693
- safe_id=self.id
706
+ safe_id=self._get_rand_id()
694
707
  )
695
708
 
696
709
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -857,11 +870,18 @@ class DecisionTreeClassifier(BaseTransformer):
857
870
  Transformed dataset.
858
871
  """
859
872
  if isinstance(dataset, DataFrame):
873
+ expected_type_inferred = ""
874
+ # when it is classifier, infer the datatype from label columns
875
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
876
+ expected_type_inferred = convert_sp_to_sf_type(
877
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
878
+ )
879
+
860
880
  output_df = self._batch_inference(
861
881
  dataset=dataset,
862
882
  inference_method="predict",
863
883
  expected_output_cols_list=self.output_cols,
864
- expected_output_cols_type="",
884
+ expected_output_cols_type=expected_type_inferred,
865
885
  )
866
886
  elif isinstance(dataset, pd.DataFrame):
867
887
  output_df = self._sklearn_inference(
@@ -932,10 +952,10 @@ class DecisionTreeClassifier(BaseTransformer):
932
952
 
933
953
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
934
954
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
935
- Returns an empty list if current object is not a classifier or not yet fitted.
955
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
936
956
  """
937
957
  if getattr(self._sklearn_object, "classes_", None) is None:
938
- return []
958
+ return [output_cols_prefix]
939
959
 
940
960
  classes = self._sklearn_object.classes_
941
961
  if isinstance(classes, numpy.ndarray):
@@ -1164,7 +1184,7 @@ class DecisionTreeClassifier(BaseTransformer):
1164
1184
  cp.dump(self._sklearn_object, local_score_file)
1165
1185
 
1166
1186
  # Create temp stage to run score.
1167
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1187
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1168
1188
  session = dataset._session
1169
1189
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1170
1190
  SqlResultValidator(
@@ -1178,8 +1198,9 @@ class DecisionTreeClassifier(BaseTransformer):
1178
1198
  expected_value=f"Stage area {score_stage_name} successfully created."
1179
1199
  ).validate()
1180
1200
 
1181
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1182
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1201
+ # Use posixpath to construct stage paths
1202
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1203
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1183
1204
  statement_params = telemetry.get_function_usage_statement_params(
1184
1205
  project=_PROJECT,
1185
1206
  subproject=_SUBPROJECT,
@@ -1205,6 +1226,7 @@ class DecisionTreeClassifier(BaseTransformer):
1205
1226
  replace=True,
1206
1227
  session=session,
1207
1228
  statement_params=statement_params,
1229
+ anonymous=True
1208
1230
  )
1209
1231
  def score_wrapper_sproc(
1210
1232
  session: Session,
@@ -1212,7 +1234,8 @@ class DecisionTreeClassifier(BaseTransformer):
1212
1234
  stage_score_file_name: str,
1213
1235
  input_cols: List[str],
1214
1236
  label_cols: List[str],
1215
- sample_weight_col: Optional[str]
1237
+ sample_weight_col: Optional[str],
1238
+ statement_params: Dict[str, str]
1216
1239
  ) -> float:
1217
1240
  import cloudpickle as cp
1218
1241
  import numpy as np
@@ -1262,14 +1285,14 @@ class DecisionTreeClassifier(BaseTransformer):
1262
1285
  api_calls=[Session.call],
1263
1286
  custom_tags=dict([("autogen", True)]),
1264
1287
  )
1265
- score = session.call(
1266
- score_sproc_name,
1288
+ score = score_wrapper_sproc(
1289
+ session,
1267
1290
  query,
1268
1291
  stage_score_file_name,
1269
1292
  identifier.get_unescaped_names(self.input_cols),
1270
1293
  identifier.get_unescaped_names(self.label_cols),
1271
1294
  identifier.get_unescaped_names(self.sample_weight_col),
1272
- statement_params=statement_params,
1295
+ statement_params,
1273
1296
  )
1274
1297
 
1275
1298
  cleanup_temp_files([local_score_file_name])
@@ -1287,18 +1310,20 @@ class DecisionTreeClassifier(BaseTransformer):
1287
1310
  if self._sklearn_object._estimator_type == 'classifier':
1288
1311
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1289
1312
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1290
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1313
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1314
+ ([] if self._drop_input_cols else inputs) + outputs)
1291
1315
  # For regressor, the type of predict is float64
1292
1316
  elif self._sklearn_object._estimator_type == 'regressor':
1293
1317
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1294
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1295
-
1318
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1319
+ ([] if self._drop_input_cols else inputs) + outputs)
1296
1320
  for prob_func in PROB_FUNCTIONS:
1297
1321
  if hasattr(self, prob_func):
1298
1322
  output_cols_prefix: str = f"{prob_func}_"
1299
1323
  output_column_names = self._get_output_column_names(output_cols_prefix)
1300
1324
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1301
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1325
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1326
+ ([] if self._drop_input_cols else inputs) + outputs)
1302
1327
 
1303
1328
  @property
1304
1329
  def model_signatures(self) -> Dict[str, ModelSignature]: