snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. snowflake/ml/_internal/env_utils.py +2 -1
  2. snowflake/ml/_internal/file_utils.py +29 -7
  3. snowflake/ml/_internal/telemetry.py +5 -8
  4. snowflake/ml/_internal/utils/uri.py +7 -2
  5. snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
  6. snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
  7. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
  8. snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
  9. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
  10. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
  11. snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
  12. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
  13. snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
  14. snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
  15. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
  16. snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
  17. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
  18. snowflake/ml/model/_deployer.py +14 -27
  19. snowflake/ml/model/_env.py +4 -4
  20. snowflake/ml/model/_handlers/custom.py +14 -2
  21. snowflake/ml/model/_handlers/pytorch.py +186 -0
  22. snowflake/ml/model/_handlers/sklearn.py +14 -9
  23. snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
  24. snowflake/ml/model/_handlers/torchscript.py +180 -0
  25. snowflake/ml/model/_handlers/xgboost.py +19 -9
  26. snowflake/ml/model/_model.py +3 -2
  27. snowflake/ml/model/_model_meta.py +12 -7
  28. snowflake/ml/model/model_signature.py +446 -66
  29. snowflake/ml/model/type_hints.py +23 -4
  30. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
  31. snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
  32. snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
  33. snowflake/ml/modeling/cluster/birch.py +51 -26
  34. snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
  35. snowflake/ml/modeling/cluster/dbscan.py +51 -26
  36. snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
  37. snowflake/ml/modeling/cluster/k_means.py +51 -26
  38. snowflake/ml/modeling/cluster/mean_shift.py +51 -26
  39. snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
  40. snowflake/ml/modeling/cluster/optics.py +51 -26
  41. snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
  42. snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
  43. snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
  44. snowflake/ml/modeling/compose/column_transformer.py +51 -26
  45. snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
  46. snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
  47. snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
  48. snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
  49. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
  50. snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
  51. snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
  52. snowflake/ml/modeling/covariance/oas.py +51 -26
  53. snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
  54. snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
  55. snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
  56. snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
  57. snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
  58. snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
  59. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
  60. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
  61. snowflake/ml/modeling/decomposition/pca.py +51 -26
  62. snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
  63. snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
  64. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
  65. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
  66. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
  67. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
  68. snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
  69. snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
  70. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
  71. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
  72. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
  73. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
  74. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
  75. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
  76. snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
  77. snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
  78. snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
  79. snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
  80. snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
  81. snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
  82. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
  83. snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
  84. snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
  85. snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
  86. snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
  87. snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
  88. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
  89. snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
  90. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
  91. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
  92. snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
  93. snowflake/ml/modeling/impute/knn_imputer.py +51 -26
  94. snowflake/ml/modeling/impute/missing_indicator.py +51 -26
  95. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
  96. snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
  97. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
  98. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
  99. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
  100. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
  101. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
  102. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
  103. snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
  104. snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
  105. snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
  106. snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
  107. snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
  108. snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
  109. snowflake/ml/modeling/linear_model/lars.py +51 -26
  110. snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
  111. snowflake/ml/modeling/linear_model/lasso.py +51 -26
  112. snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
  113. snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
  114. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
  115. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
  116. snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
  117. snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
  118. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
  119. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
  120. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
  121. snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
  122. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
  123. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
  124. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
  125. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
  126. snowflake/ml/modeling/linear_model/perceptron.py +51 -26
  127. snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
  128. snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
  129. snowflake/ml/modeling/linear_model/ridge.py +51 -26
  130. snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
  131. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
  132. snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
  133. snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
  134. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
  135. snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
  136. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
  137. snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
  138. snowflake/ml/modeling/manifold/isomap.py +51 -26
  139. snowflake/ml/modeling/manifold/mds.py +51 -26
  140. snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
  141. snowflake/ml/modeling/manifold/tsne.py +51 -26
  142. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
  143. snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
  144. snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
  145. snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
  146. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
  147. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
  148. snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
  149. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
  150. snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
  151. snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
  152. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
  153. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
  154. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
  155. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
  156. snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
  157. snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
  158. snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
  159. snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
  160. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
  161. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
  162. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
  163. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
  164. snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
  165. snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
  166. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
  167. snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
  168. snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
  169. snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
  170. snowflake/ml/modeling/svm/linear_svc.py +51 -26
  171. snowflake/ml/modeling/svm/linear_svr.py +51 -26
  172. snowflake/ml/modeling/svm/nu_svc.py +51 -26
  173. snowflake/ml/modeling/svm/nu_svr.py +51 -26
  174. snowflake/ml/modeling/svm/svc.py +51 -26
  175. snowflake/ml/modeling/svm/svr.py +51 -26
  176. snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
  177. snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
  178. snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
  179. snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
  180. snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
  181. snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
  182. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
  183. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
  184. snowflake/ml/registry/model_registry.py +74 -56
  185. snowflake/ml/version.py +1 -1
  186. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
  187. snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
  188. snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
  189. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -290,7 +292,6 @@ class DecisionTreeRegressor(BaseTransformer):
290
292
  sample_weight_col: Optional[str] = None,
291
293
  ) -> None:
292
294
  super().__init__()
293
- self.id = str(uuid4()).replace("-", "_").upper()
294
295
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
295
296
 
296
297
  self._deps = list(deps)
@@ -320,6 +321,15 @@ class DecisionTreeRegressor(BaseTransformer):
320
321
  self.set_drop_input_cols(drop_input_cols)
321
322
  self.set_sample_weight_col(sample_weight_col)
322
323
 
324
+ def _get_rand_id(self) -> str:
325
+ """
326
+ Generate random id to be used in sproc and stage names.
327
+
328
+ Returns:
329
+ Random id string usable in sproc, table, and stage names.
330
+ """
331
+ return str(uuid4()).replace("-", "_").upper()
332
+
323
333
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
324
334
  """
325
335
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -398,7 +408,7 @@ class DecisionTreeRegressor(BaseTransformer):
398
408
  cp.dump(self._sklearn_object, local_transform_file)
399
409
 
400
410
  # Create temp stage to run fit.
401
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
411
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
402
412
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
403
413
  SqlResultValidator(
404
414
  session=session,
@@ -411,11 +421,12 @@ class DecisionTreeRegressor(BaseTransformer):
411
421
  expected_value=f"Stage area {transform_stage_name} successfully created."
412
422
  ).validate()
413
423
 
414
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
424
+ # Use posixpath to construct stage paths
425
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
426
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
415
427
  local_result_file_name = get_temp_file_path()
416
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
417
428
 
418
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
429
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
419
430
  statement_params = telemetry.get_function_usage_statement_params(
420
431
  project=_PROJECT,
421
432
  subproject=_SUBPROJECT,
@@ -441,6 +452,7 @@ class DecisionTreeRegressor(BaseTransformer):
441
452
  replace=True,
442
453
  session=session,
443
454
  statement_params=statement_params,
455
+ anonymous=True
444
456
  )
445
457
  def fit_wrapper_sproc(
446
458
  session: Session,
@@ -449,7 +461,8 @@ class DecisionTreeRegressor(BaseTransformer):
449
461
  stage_result_file_name: str,
450
462
  input_cols: List[str],
451
463
  label_cols: List[str],
452
- sample_weight_col: Optional[str]
464
+ sample_weight_col: Optional[str],
465
+ statement_params: Dict[str, str]
453
466
  ) -> str:
454
467
  import cloudpickle as cp
455
468
  import numpy as np
@@ -516,15 +529,15 @@ class DecisionTreeRegressor(BaseTransformer):
516
529
  api_calls=[Session.call],
517
530
  custom_tags=dict([("autogen", True)]),
518
531
  )
519
- sproc_export_file_name = session.call(
520
- fit_sproc_name,
532
+ sproc_export_file_name = fit_wrapper_sproc(
533
+ session,
521
534
  query,
522
535
  stage_transform_file_name,
523
536
  stage_result_file_name,
524
537
  identifier.get_unescaped_names(self.input_cols),
525
538
  identifier.get_unescaped_names(self.label_cols),
526
539
  identifier.get_unescaped_names(self.sample_weight_col),
527
- statement_params=statement_params,
540
+ statement_params,
528
541
  )
529
542
 
530
543
  if "|" in sproc_export_file_name:
@@ -534,7 +547,7 @@ class DecisionTreeRegressor(BaseTransformer):
534
547
  print("\n".join(fields[1:]))
535
548
 
536
549
  session.file.get(
537
- os.path.join(stage_result_file_name, sproc_export_file_name),
550
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
538
551
  local_result_file_name,
539
552
  statement_params=statement_params
540
553
  )
@@ -580,7 +593,7 @@ class DecisionTreeRegressor(BaseTransformer):
580
593
 
581
594
  # Register vectorized UDF for batch inference
582
595
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
583
- safe_id=self.id, method=inference_method)
596
+ safe_id=self._get_rand_id(), method=inference_method)
584
597
 
585
598
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
586
599
  # will try to pickle all of self which fails.
@@ -672,7 +685,7 @@ class DecisionTreeRegressor(BaseTransformer):
672
685
  return transformed_pandas_df.to_dict("records")
673
686
 
674
687
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
675
- safe_id=self.id
688
+ safe_id=self._get_rand_id()
676
689
  )
677
690
 
678
691
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -839,11 +852,18 @@ class DecisionTreeRegressor(BaseTransformer):
839
852
  Transformed dataset.
840
853
  """
841
854
  if isinstance(dataset, DataFrame):
855
+ expected_type_inferred = "float"
856
+ # when it is classifier, infer the datatype from label columns
857
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
858
+ expected_type_inferred = convert_sp_to_sf_type(
859
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
860
+ )
861
+
842
862
  output_df = self._batch_inference(
843
863
  dataset=dataset,
844
864
  inference_method="predict",
845
865
  expected_output_cols_list=self.output_cols,
846
- expected_output_cols_type="float",
866
+ expected_output_cols_type=expected_type_inferred,
847
867
  )
848
868
  elif isinstance(dataset, pd.DataFrame):
849
869
  output_df = self._sklearn_inference(
@@ -914,10 +934,10 @@ class DecisionTreeRegressor(BaseTransformer):
914
934
 
915
935
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
916
936
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
917
- Returns an empty list if current object is not a classifier or not yet fitted.
937
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
918
938
  """
919
939
  if getattr(self._sklearn_object, "classes_", None) is None:
920
- return []
940
+ return [output_cols_prefix]
921
941
 
922
942
  classes = self._sklearn_object.classes_
923
943
  if isinstance(classes, numpy.ndarray):
@@ -1142,7 +1162,7 @@ class DecisionTreeRegressor(BaseTransformer):
1142
1162
  cp.dump(self._sklearn_object, local_score_file)
1143
1163
 
1144
1164
  # Create temp stage to run score.
1145
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1165
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1146
1166
  session = dataset._session
1147
1167
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1148
1168
  SqlResultValidator(
@@ -1156,8 +1176,9 @@ class DecisionTreeRegressor(BaseTransformer):
1156
1176
  expected_value=f"Stage area {score_stage_name} successfully created."
1157
1177
  ).validate()
1158
1178
 
1159
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1160
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1179
+ # Use posixpath to construct stage paths
1180
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1181
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1161
1182
  statement_params = telemetry.get_function_usage_statement_params(
1162
1183
  project=_PROJECT,
1163
1184
  subproject=_SUBPROJECT,
@@ -1183,6 +1204,7 @@ class DecisionTreeRegressor(BaseTransformer):
1183
1204
  replace=True,
1184
1205
  session=session,
1185
1206
  statement_params=statement_params,
1207
+ anonymous=True
1186
1208
  )
1187
1209
  def score_wrapper_sproc(
1188
1210
  session: Session,
@@ -1190,7 +1212,8 @@ class DecisionTreeRegressor(BaseTransformer):
1190
1212
  stage_score_file_name: str,
1191
1213
  input_cols: List[str],
1192
1214
  label_cols: List[str],
1193
- sample_weight_col: Optional[str]
1215
+ sample_weight_col: Optional[str],
1216
+ statement_params: Dict[str, str]
1194
1217
  ) -> float:
1195
1218
  import cloudpickle as cp
1196
1219
  import numpy as np
@@ -1240,14 +1263,14 @@ class DecisionTreeRegressor(BaseTransformer):
1240
1263
  api_calls=[Session.call],
1241
1264
  custom_tags=dict([("autogen", True)]),
1242
1265
  )
1243
- score = session.call(
1244
- score_sproc_name,
1266
+ score = score_wrapper_sproc(
1267
+ session,
1245
1268
  query,
1246
1269
  stage_score_file_name,
1247
1270
  identifier.get_unescaped_names(self.input_cols),
1248
1271
  identifier.get_unescaped_names(self.label_cols),
1249
1272
  identifier.get_unescaped_names(self.sample_weight_col),
1250
- statement_params=statement_params,
1273
+ statement_params,
1251
1274
  )
1252
1275
 
1253
1276
  cleanup_temp_files([local_score_file_name])
@@ -1265,18 +1288,20 @@ class DecisionTreeRegressor(BaseTransformer):
1265
1288
  if self._sklearn_object._estimator_type == 'classifier':
1266
1289
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1267
1290
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1268
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1291
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1292
+ ([] if self._drop_input_cols else inputs) + outputs)
1269
1293
  # For regressor, the type of predict is float64
1270
1294
  elif self._sklearn_object._estimator_type == 'regressor':
1271
1295
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1272
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1273
-
1296
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1297
+ ([] if self._drop_input_cols else inputs) + outputs)
1274
1298
  for prob_func in PROB_FUNCTIONS:
1275
1299
  if hasattr(self, prob_func):
1276
1300
  output_cols_prefix: str = f"{prob_func}_"
1277
1301
  output_column_names = self._get_output_column_names(output_cols_prefix)
1278
1302
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1279
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1303
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1304
+ ([] if self._drop_input_cols else inputs) + outputs)
1280
1305
 
1281
1306
  @property
1282
1307
  def model_signatures(self) -> Dict[str, ModelSignature]:
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -299,7 +301,6 @@ class ExtraTreeClassifier(BaseTransformer):
299
301
  sample_weight_col: Optional[str] = None,
300
302
  ) -> None:
301
303
  super().__init__()
302
- self.id = str(uuid4()).replace("-", "_").upper()
303
304
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
304
305
 
305
306
  self._deps = list(deps)
@@ -330,6 +331,15 @@ class ExtraTreeClassifier(BaseTransformer):
330
331
  self.set_drop_input_cols(drop_input_cols)
331
332
  self.set_sample_weight_col(sample_weight_col)
332
333
 
334
+ def _get_rand_id(self) -> str:
335
+ """
336
+ Generate random id to be used in sproc and stage names.
337
+
338
+ Returns:
339
+ Random id string usable in sproc, table, and stage names.
340
+ """
341
+ return str(uuid4()).replace("-", "_").upper()
342
+
333
343
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
334
344
  """
335
345
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -408,7 +418,7 @@ class ExtraTreeClassifier(BaseTransformer):
408
418
  cp.dump(self._sklearn_object, local_transform_file)
409
419
 
410
420
  # Create temp stage to run fit.
411
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
421
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
412
422
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
413
423
  SqlResultValidator(
414
424
  session=session,
@@ -421,11 +431,12 @@ class ExtraTreeClassifier(BaseTransformer):
421
431
  expected_value=f"Stage area {transform_stage_name} successfully created."
422
432
  ).validate()
423
433
 
424
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
434
+ # Use posixpath to construct stage paths
435
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
436
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
425
437
  local_result_file_name = get_temp_file_path()
426
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
427
438
 
428
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
439
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
429
440
  statement_params = telemetry.get_function_usage_statement_params(
430
441
  project=_PROJECT,
431
442
  subproject=_SUBPROJECT,
@@ -451,6 +462,7 @@ class ExtraTreeClassifier(BaseTransformer):
451
462
  replace=True,
452
463
  session=session,
453
464
  statement_params=statement_params,
465
+ anonymous=True
454
466
  )
455
467
  def fit_wrapper_sproc(
456
468
  session: Session,
@@ -459,7 +471,8 @@ class ExtraTreeClassifier(BaseTransformer):
459
471
  stage_result_file_name: str,
460
472
  input_cols: List[str],
461
473
  label_cols: List[str],
462
- sample_weight_col: Optional[str]
474
+ sample_weight_col: Optional[str],
475
+ statement_params: Dict[str, str]
463
476
  ) -> str:
464
477
  import cloudpickle as cp
465
478
  import numpy as np
@@ -526,15 +539,15 @@ class ExtraTreeClassifier(BaseTransformer):
526
539
  api_calls=[Session.call],
527
540
  custom_tags=dict([("autogen", True)]),
528
541
  )
529
- sproc_export_file_name = session.call(
530
- fit_sproc_name,
542
+ sproc_export_file_name = fit_wrapper_sproc(
543
+ session,
531
544
  query,
532
545
  stage_transform_file_name,
533
546
  stage_result_file_name,
534
547
  identifier.get_unescaped_names(self.input_cols),
535
548
  identifier.get_unescaped_names(self.label_cols),
536
549
  identifier.get_unescaped_names(self.sample_weight_col),
537
- statement_params=statement_params,
550
+ statement_params,
538
551
  )
539
552
 
540
553
  if "|" in sproc_export_file_name:
@@ -544,7 +557,7 @@ class ExtraTreeClassifier(BaseTransformer):
544
557
  print("\n".join(fields[1:]))
545
558
 
546
559
  session.file.get(
547
- os.path.join(stage_result_file_name, sproc_export_file_name),
560
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
548
561
  local_result_file_name,
549
562
  statement_params=statement_params
550
563
  )
@@ -590,7 +603,7 @@ class ExtraTreeClassifier(BaseTransformer):
590
603
 
591
604
  # Register vectorized UDF for batch inference
592
605
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
593
- safe_id=self.id, method=inference_method)
606
+ safe_id=self._get_rand_id(), method=inference_method)
594
607
 
595
608
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
596
609
  # will try to pickle all of self which fails.
@@ -682,7 +695,7 @@ class ExtraTreeClassifier(BaseTransformer):
682
695
  return transformed_pandas_df.to_dict("records")
683
696
 
684
697
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
685
- safe_id=self.id
698
+ safe_id=self._get_rand_id()
686
699
  )
687
700
 
688
701
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -849,11 +862,18 @@ class ExtraTreeClassifier(BaseTransformer):
849
862
  Transformed dataset.
850
863
  """
851
864
  if isinstance(dataset, DataFrame):
865
+ expected_type_inferred = ""
866
+ # when it is classifier, infer the datatype from label columns
867
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
868
+ expected_type_inferred = convert_sp_to_sf_type(
869
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
870
+ )
871
+
852
872
  output_df = self._batch_inference(
853
873
  dataset=dataset,
854
874
  inference_method="predict",
855
875
  expected_output_cols_list=self.output_cols,
856
- expected_output_cols_type="",
876
+ expected_output_cols_type=expected_type_inferred,
857
877
  )
858
878
  elif isinstance(dataset, pd.DataFrame):
859
879
  output_df = self._sklearn_inference(
@@ -924,10 +944,10 @@ class ExtraTreeClassifier(BaseTransformer):
924
944
 
925
945
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
926
946
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
927
- Returns an empty list if current object is not a classifier or not yet fitted.
947
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
928
948
  """
929
949
  if getattr(self._sklearn_object, "classes_", None) is None:
930
- return []
950
+ return [output_cols_prefix]
931
951
 
932
952
  classes = self._sklearn_object.classes_
933
953
  if isinstance(classes, numpy.ndarray):
@@ -1156,7 +1176,7 @@ class ExtraTreeClassifier(BaseTransformer):
1156
1176
  cp.dump(self._sklearn_object, local_score_file)
1157
1177
 
1158
1178
  # Create temp stage to run score.
1159
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1179
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1160
1180
  session = dataset._session
1161
1181
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1162
1182
  SqlResultValidator(
@@ -1170,8 +1190,9 @@ class ExtraTreeClassifier(BaseTransformer):
1170
1190
  expected_value=f"Stage area {score_stage_name} successfully created."
1171
1191
  ).validate()
1172
1192
 
1173
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1174
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1193
+ # Use posixpath to construct stage paths
1194
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1195
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1175
1196
  statement_params = telemetry.get_function_usage_statement_params(
1176
1197
  project=_PROJECT,
1177
1198
  subproject=_SUBPROJECT,
@@ -1197,6 +1218,7 @@ class ExtraTreeClassifier(BaseTransformer):
1197
1218
  replace=True,
1198
1219
  session=session,
1199
1220
  statement_params=statement_params,
1221
+ anonymous=True
1200
1222
  )
1201
1223
  def score_wrapper_sproc(
1202
1224
  session: Session,
@@ -1204,7 +1226,8 @@ class ExtraTreeClassifier(BaseTransformer):
1204
1226
  stage_score_file_name: str,
1205
1227
  input_cols: List[str],
1206
1228
  label_cols: List[str],
1207
- sample_weight_col: Optional[str]
1229
+ sample_weight_col: Optional[str],
1230
+ statement_params: Dict[str, str]
1208
1231
  ) -> float:
1209
1232
  import cloudpickle as cp
1210
1233
  import numpy as np
@@ -1254,14 +1277,14 @@ class ExtraTreeClassifier(BaseTransformer):
1254
1277
  api_calls=[Session.call],
1255
1278
  custom_tags=dict([("autogen", True)]),
1256
1279
  )
1257
- score = session.call(
1258
- score_sproc_name,
1280
+ score = score_wrapper_sproc(
1281
+ session,
1259
1282
  query,
1260
1283
  stage_score_file_name,
1261
1284
  identifier.get_unescaped_names(self.input_cols),
1262
1285
  identifier.get_unescaped_names(self.label_cols),
1263
1286
  identifier.get_unescaped_names(self.sample_weight_col),
1264
- statement_params=statement_params,
1287
+ statement_params,
1265
1288
  )
1266
1289
 
1267
1290
  cleanup_temp_files([local_score_file_name])
@@ -1279,18 +1302,20 @@ class ExtraTreeClassifier(BaseTransformer):
1279
1302
  if self._sklearn_object._estimator_type == 'classifier':
1280
1303
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1281
1304
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1282
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1305
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1306
+ ([] if self._drop_input_cols else inputs) + outputs)
1283
1307
  # For regressor, the type of predict is float64
1284
1308
  elif self._sklearn_object._estimator_type == 'regressor':
1285
1309
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1286
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1287
-
1310
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1311
+ ([] if self._drop_input_cols else inputs) + outputs)
1288
1312
  for prob_func in PROB_FUNCTIONS:
1289
1313
  if hasattr(self, prob_func):
1290
1314
  output_cols_prefix: str = f"{prob_func}_"
1291
1315
  output_column_names = self._get_output_column_names(output_cols_prefix)
1292
1316
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1293
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1317
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1318
+ ([] if self._drop_input_cols else inputs) + outputs)
1294
1319
 
1295
1320
  @property
1296
1321
  def model_signatures(self) -> Dict[str, ModelSignature]: