snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. snowflake/ml/_internal/env_utils.py +2 -1
  2. snowflake/ml/_internal/file_utils.py +29 -7
  3. snowflake/ml/_internal/telemetry.py +5 -8
  4. snowflake/ml/_internal/utils/uri.py +7 -2
  5. snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
  6. snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
  7. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
  8. snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
  9. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
  10. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
  11. snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
  12. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
  13. snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
  14. snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
  15. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
  16. snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
  17. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
  18. snowflake/ml/model/_deployer.py +14 -27
  19. snowflake/ml/model/_env.py +4 -4
  20. snowflake/ml/model/_handlers/custom.py +14 -2
  21. snowflake/ml/model/_handlers/pytorch.py +186 -0
  22. snowflake/ml/model/_handlers/sklearn.py +14 -9
  23. snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
  24. snowflake/ml/model/_handlers/torchscript.py +180 -0
  25. snowflake/ml/model/_handlers/xgboost.py +19 -9
  26. snowflake/ml/model/_model.py +3 -2
  27. snowflake/ml/model/_model_meta.py +12 -7
  28. snowflake/ml/model/model_signature.py +446 -66
  29. snowflake/ml/model/type_hints.py +23 -4
  30. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
  31. snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
  32. snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
  33. snowflake/ml/modeling/cluster/birch.py +51 -26
  34. snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
  35. snowflake/ml/modeling/cluster/dbscan.py +51 -26
  36. snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
  37. snowflake/ml/modeling/cluster/k_means.py +51 -26
  38. snowflake/ml/modeling/cluster/mean_shift.py +51 -26
  39. snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
  40. snowflake/ml/modeling/cluster/optics.py +51 -26
  41. snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
  42. snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
  43. snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
  44. snowflake/ml/modeling/compose/column_transformer.py +51 -26
  45. snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
  46. snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
  47. snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
  48. snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
  49. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
  50. snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
  51. snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
  52. snowflake/ml/modeling/covariance/oas.py +51 -26
  53. snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
  54. snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
  55. snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
  56. snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
  57. snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
  58. snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
  59. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
  60. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
  61. snowflake/ml/modeling/decomposition/pca.py +51 -26
  62. snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
  63. snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
  64. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
  65. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
  66. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
  67. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
  68. snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
  69. snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
  70. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
  71. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
  72. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
  73. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
  74. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
  75. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
  76. snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
  77. snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
  78. snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
  79. snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
  80. snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
  81. snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
  82. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
  83. snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
  84. snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
  85. snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
  86. snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
  87. snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
  88. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
  89. snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
  90. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
  91. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
  92. snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
  93. snowflake/ml/modeling/impute/knn_imputer.py +51 -26
  94. snowflake/ml/modeling/impute/missing_indicator.py +51 -26
  95. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
  96. snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
  97. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
  98. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
  99. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
  100. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
  101. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
  102. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
  103. snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
  104. snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
  105. snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
  106. snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
  107. snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
  108. snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
  109. snowflake/ml/modeling/linear_model/lars.py +51 -26
  110. snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
  111. snowflake/ml/modeling/linear_model/lasso.py +51 -26
  112. snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
  113. snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
  114. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
  115. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
  116. snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
  117. snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
  118. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
  119. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
  120. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
  121. snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
  122. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
  123. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
  124. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
  125. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
  126. snowflake/ml/modeling/linear_model/perceptron.py +51 -26
  127. snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
  128. snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
  129. snowflake/ml/modeling/linear_model/ridge.py +51 -26
  130. snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
  131. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
  132. snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
  133. snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
  134. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
  135. snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
  136. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
  137. snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
  138. snowflake/ml/modeling/manifold/isomap.py +51 -26
  139. snowflake/ml/modeling/manifold/mds.py +51 -26
  140. snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
  141. snowflake/ml/modeling/manifold/tsne.py +51 -26
  142. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
  143. snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
  144. snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
  145. snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
  146. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
  147. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
  148. snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
  149. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
  150. snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
  151. snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
  152. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
  153. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
  154. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
  155. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
  156. snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
  157. snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
  158. snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
  159. snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
  160. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
  161. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
  162. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
  163. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
  164. snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
  165. snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
  166. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
  167. snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
  168. snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
  169. snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
  170. snowflake/ml/modeling/svm/linear_svc.py +51 -26
  171. snowflake/ml/modeling/svm/linear_svr.py +51 -26
  172. snowflake/ml/modeling/svm/nu_svc.py +51 -26
  173. snowflake/ml/modeling/svm/nu_svr.py +51 -26
  174. snowflake/ml/modeling/svm/svc.py +51 -26
  175. snowflake/ml/modeling/svm/svr.py +51 -26
  176. snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
  177. snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
  178. snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
  179. snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
  180. snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
  181. snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
  182. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
  183. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
  184. snowflake/ml/registry/model_registry.py +74 -56
  185. snowflake/ml/version.py +1 -1
  186. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
  187. snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
  188. snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
  189. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -348,7 +350,6 @@ class ExtraTreesClassifier(BaseTransformer):
348
350
  sample_weight_col: Optional[str] = None,
349
351
  ) -> None:
350
352
  super().__init__()
351
- self.id = str(uuid4()).replace("-", "_").upper()
352
353
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
353
354
 
354
355
  self._deps = list(deps)
@@ -385,6 +386,15 @@ class ExtraTreesClassifier(BaseTransformer):
385
386
  self.set_drop_input_cols(drop_input_cols)
386
387
  self.set_sample_weight_col(sample_weight_col)
387
388
 
389
+ def _get_rand_id(self) -> str:
390
+ """
391
+ Generate random id to be used in sproc and stage names.
392
+
393
+ Returns:
394
+ Random id string usable in sproc, table, and stage names.
395
+ """
396
+ return str(uuid4()).replace("-", "_").upper()
397
+
388
398
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
389
399
  """
390
400
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -463,7 +473,7 @@ class ExtraTreesClassifier(BaseTransformer):
463
473
  cp.dump(self._sklearn_object, local_transform_file)
464
474
 
465
475
  # Create temp stage to run fit.
466
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
476
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
467
477
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
468
478
  SqlResultValidator(
469
479
  session=session,
@@ -476,11 +486,12 @@ class ExtraTreesClassifier(BaseTransformer):
476
486
  expected_value=f"Stage area {transform_stage_name} successfully created."
477
487
  ).validate()
478
488
 
479
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
489
+ # Use posixpath to construct stage paths
490
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
491
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
480
492
  local_result_file_name = get_temp_file_path()
481
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
482
493
 
483
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
494
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
484
495
  statement_params = telemetry.get_function_usage_statement_params(
485
496
  project=_PROJECT,
486
497
  subproject=_SUBPROJECT,
@@ -506,6 +517,7 @@ class ExtraTreesClassifier(BaseTransformer):
506
517
  replace=True,
507
518
  session=session,
508
519
  statement_params=statement_params,
520
+ anonymous=True
509
521
  )
510
522
  def fit_wrapper_sproc(
511
523
  session: Session,
@@ -514,7 +526,8 @@ class ExtraTreesClassifier(BaseTransformer):
514
526
  stage_result_file_name: str,
515
527
  input_cols: List[str],
516
528
  label_cols: List[str],
517
- sample_weight_col: Optional[str]
529
+ sample_weight_col: Optional[str],
530
+ statement_params: Dict[str, str]
518
531
  ) -> str:
519
532
  import cloudpickle as cp
520
533
  import numpy as np
@@ -581,15 +594,15 @@ class ExtraTreesClassifier(BaseTransformer):
581
594
  api_calls=[Session.call],
582
595
  custom_tags=dict([("autogen", True)]),
583
596
  )
584
- sproc_export_file_name = session.call(
585
- fit_sproc_name,
597
+ sproc_export_file_name = fit_wrapper_sproc(
598
+ session,
586
599
  query,
587
600
  stage_transform_file_name,
588
601
  stage_result_file_name,
589
602
  identifier.get_unescaped_names(self.input_cols),
590
603
  identifier.get_unescaped_names(self.label_cols),
591
604
  identifier.get_unescaped_names(self.sample_weight_col),
592
- statement_params=statement_params,
605
+ statement_params,
593
606
  )
594
607
 
595
608
  if "|" in sproc_export_file_name:
@@ -599,7 +612,7 @@ class ExtraTreesClassifier(BaseTransformer):
599
612
  print("\n".join(fields[1:]))
600
613
 
601
614
  session.file.get(
602
- os.path.join(stage_result_file_name, sproc_export_file_name),
615
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
603
616
  local_result_file_name,
604
617
  statement_params=statement_params
605
618
  )
@@ -645,7 +658,7 @@ class ExtraTreesClassifier(BaseTransformer):
645
658
 
646
659
  # Register vectorized UDF for batch inference
647
660
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
648
- safe_id=self.id, method=inference_method)
661
+ safe_id=self._get_rand_id(), method=inference_method)
649
662
 
650
663
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
651
664
  # will try to pickle all of self which fails.
@@ -737,7 +750,7 @@ class ExtraTreesClassifier(BaseTransformer):
737
750
  return transformed_pandas_df.to_dict("records")
738
751
 
739
752
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
740
- safe_id=self.id
753
+ safe_id=self._get_rand_id()
741
754
  )
742
755
 
743
756
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -904,11 +917,18 @@ class ExtraTreesClassifier(BaseTransformer):
904
917
  Transformed dataset.
905
918
  """
906
919
  if isinstance(dataset, DataFrame):
920
+ expected_type_inferred = ""
921
+ # when it is classifier, infer the datatype from label columns
922
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
923
+ expected_type_inferred = convert_sp_to_sf_type(
924
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
925
+ )
926
+
907
927
  output_df = self._batch_inference(
908
928
  dataset=dataset,
909
929
  inference_method="predict",
910
930
  expected_output_cols_list=self.output_cols,
911
- expected_output_cols_type="",
931
+ expected_output_cols_type=expected_type_inferred,
912
932
  )
913
933
  elif isinstance(dataset, pd.DataFrame):
914
934
  output_df = self._sklearn_inference(
@@ -979,10 +999,10 @@ class ExtraTreesClassifier(BaseTransformer):
979
999
 
980
1000
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
981
1001
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
982
- Returns an empty list if current object is not a classifier or not yet fitted.
1002
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
983
1003
  """
984
1004
  if getattr(self._sklearn_object, "classes_", None) is None:
985
- return []
1005
+ return [output_cols_prefix]
986
1006
 
987
1007
  classes = self._sklearn_object.classes_
988
1008
  if isinstance(classes, numpy.ndarray):
@@ -1211,7 +1231,7 @@ class ExtraTreesClassifier(BaseTransformer):
1211
1231
  cp.dump(self._sklearn_object, local_score_file)
1212
1232
 
1213
1233
  # Create temp stage to run score.
1214
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1234
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1215
1235
  session = dataset._session
1216
1236
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1217
1237
  SqlResultValidator(
@@ -1225,8 +1245,9 @@ class ExtraTreesClassifier(BaseTransformer):
1225
1245
  expected_value=f"Stage area {score_stage_name} successfully created."
1226
1246
  ).validate()
1227
1247
 
1228
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1229
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1248
+ # Use posixpath to construct stage paths
1249
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1250
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1230
1251
  statement_params = telemetry.get_function_usage_statement_params(
1231
1252
  project=_PROJECT,
1232
1253
  subproject=_SUBPROJECT,
@@ -1252,6 +1273,7 @@ class ExtraTreesClassifier(BaseTransformer):
1252
1273
  replace=True,
1253
1274
  session=session,
1254
1275
  statement_params=statement_params,
1276
+ anonymous=True
1255
1277
  )
1256
1278
  def score_wrapper_sproc(
1257
1279
  session: Session,
@@ -1259,7 +1281,8 @@ class ExtraTreesClassifier(BaseTransformer):
1259
1281
  stage_score_file_name: str,
1260
1282
  input_cols: List[str],
1261
1283
  label_cols: List[str],
1262
- sample_weight_col: Optional[str]
1284
+ sample_weight_col: Optional[str],
1285
+ statement_params: Dict[str, str]
1263
1286
  ) -> float:
1264
1287
  import cloudpickle as cp
1265
1288
  import numpy as np
@@ -1309,14 +1332,14 @@ class ExtraTreesClassifier(BaseTransformer):
1309
1332
  api_calls=[Session.call],
1310
1333
  custom_tags=dict([("autogen", True)]),
1311
1334
  )
1312
- score = session.call(
1313
- score_sproc_name,
1335
+ score = score_wrapper_sproc(
1336
+ session,
1314
1337
  query,
1315
1338
  stage_score_file_name,
1316
1339
  identifier.get_unescaped_names(self.input_cols),
1317
1340
  identifier.get_unescaped_names(self.label_cols),
1318
1341
  identifier.get_unescaped_names(self.sample_weight_col),
1319
- statement_params=statement_params,
1342
+ statement_params,
1320
1343
  )
1321
1344
 
1322
1345
  cleanup_temp_files([local_score_file_name])
@@ -1334,18 +1357,20 @@ class ExtraTreesClassifier(BaseTransformer):
1334
1357
  if self._sklearn_object._estimator_type == 'classifier':
1335
1358
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1336
1359
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1337
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1360
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1361
+ ([] if self._drop_input_cols else inputs) + outputs)
1338
1362
  # For regressor, the type of predict is float64
1339
1363
  elif self._sklearn_object._estimator_type == 'regressor':
1340
1364
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1341
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1342
-
1365
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1366
+ ([] if self._drop_input_cols else inputs) + outputs)
1343
1367
  for prob_func in PROB_FUNCTIONS:
1344
1368
  if hasattr(self, prob_func):
1345
1369
  output_cols_prefix: str = f"{prob_func}_"
1346
1370
  output_column_names = self._get_output_column_names(output_cols_prefix)
1347
1371
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1348
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1372
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1373
+ ([] if self._drop_input_cols else inputs) + outputs)
1349
1374
 
1350
1375
  @property
1351
1376
  def model_signatures(self) -> Dict[str, ModelSignature]:
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -328,7 +330,6 @@ class ExtraTreesRegressor(BaseTransformer):
328
330
  sample_weight_col: Optional[str] = None,
329
331
  ) -> None:
330
332
  super().__init__()
331
- self.id = str(uuid4()).replace("-", "_").upper()
332
333
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
333
334
 
334
335
  self._deps = list(deps)
@@ -364,6 +365,15 @@ class ExtraTreesRegressor(BaseTransformer):
364
365
  self.set_drop_input_cols(drop_input_cols)
365
366
  self.set_sample_weight_col(sample_weight_col)
366
367
 
368
+ def _get_rand_id(self) -> str:
369
+ """
370
+ Generate random id to be used in sproc and stage names.
371
+
372
+ Returns:
373
+ Random id string usable in sproc, table, and stage names.
374
+ """
375
+ return str(uuid4()).replace("-", "_").upper()
376
+
367
377
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
368
378
  """
369
379
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -442,7 +452,7 @@ class ExtraTreesRegressor(BaseTransformer):
442
452
  cp.dump(self._sklearn_object, local_transform_file)
443
453
 
444
454
  # Create temp stage to run fit.
445
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
455
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
446
456
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
447
457
  SqlResultValidator(
448
458
  session=session,
@@ -455,11 +465,12 @@ class ExtraTreesRegressor(BaseTransformer):
455
465
  expected_value=f"Stage area {transform_stage_name} successfully created."
456
466
  ).validate()
457
467
 
458
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
468
+ # Use posixpath to construct stage paths
469
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
470
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
459
471
  local_result_file_name = get_temp_file_path()
460
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
461
472
 
462
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
473
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
463
474
  statement_params = telemetry.get_function_usage_statement_params(
464
475
  project=_PROJECT,
465
476
  subproject=_SUBPROJECT,
@@ -485,6 +496,7 @@ class ExtraTreesRegressor(BaseTransformer):
485
496
  replace=True,
486
497
  session=session,
487
498
  statement_params=statement_params,
499
+ anonymous=True
488
500
  )
489
501
  def fit_wrapper_sproc(
490
502
  session: Session,
@@ -493,7 +505,8 @@ class ExtraTreesRegressor(BaseTransformer):
493
505
  stage_result_file_name: str,
494
506
  input_cols: List[str],
495
507
  label_cols: List[str],
496
- sample_weight_col: Optional[str]
508
+ sample_weight_col: Optional[str],
509
+ statement_params: Dict[str, str]
497
510
  ) -> str:
498
511
  import cloudpickle as cp
499
512
  import numpy as np
@@ -560,15 +573,15 @@ class ExtraTreesRegressor(BaseTransformer):
560
573
  api_calls=[Session.call],
561
574
  custom_tags=dict([("autogen", True)]),
562
575
  )
563
- sproc_export_file_name = session.call(
564
- fit_sproc_name,
576
+ sproc_export_file_name = fit_wrapper_sproc(
577
+ session,
565
578
  query,
566
579
  stage_transform_file_name,
567
580
  stage_result_file_name,
568
581
  identifier.get_unescaped_names(self.input_cols),
569
582
  identifier.get_unescaped_names(self.label_cols),
570
583
  identifier.get_unescaped_names(self.sample_weight_col),
571
- statement_params=statement_params,
584
+ statement_params,
572
585
  )
573
586
 
574
587
  if "|" in sproc_export_file_name:
@@ -578,7 +591,7 @@ class ExtraTreesRegressor(BaseTransformer):
578
591
  print("\n".join(fields[1:]))
579
592
 
580
593
  session.file.get(
581
- os.path.join(stage_result_file_name, sproc_export_file_name),
594
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
582
595
  local_result_file_name,
583
596
  statement_params=statement_params
584
597
  )
@@ -624,7 +637,7 @@ class ExtraTreesRegressor(BaseTransformer):
624
637
 
625
638
  # Register vectorized UDF for batch inference
626
639
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
627
- safe_id=self.id, method=inference_method)
640
+ safe_id=self._get_rand_id(), method=inference_method)
628
641
 
629
642
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
630
643
  # will try to pickle all of self which fails.
@@ -716,7 +729,7 @@ class ExtraTreesRegressor(BaseTransformer):
716
729
  return transformed_pandas_df.to_dict("records")
717
730
 
718
731
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
719
- safe_id=self.id
732
+ safe_id=self._get_rand_id()
720
733
  )
721
734
 
722
735
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -883,11 +896,18 @@ class ExtraTreesRegressor(BaseTransformer):
883
896
  Transformed dataset.
884
897
  """
885
898
  if isinstance(dataset, DataFrame):
899
+ expected_type_inferred = "float"
900
+ # when it is classifier, infer the datatype from label columns
901
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
902
+ expected_type_inferred = convert_sp_to_sf_type(
903
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
904
+ )
905
+
886
906
  output_df = self._batch_inference(
887
907
  dataset=dataset,
888
908
  inference_method="predict",
889
909
  expected_output_cols_list=self.output_cols,
890
- expected_output_cols_type="float",
910
+ expected_output_cols_type=expected_type_inferred,
891
911
  )
892
912
  elif isinstance(dataset, pd.DataFrame):
893
913
  output_df = self._sklearn_inference(
@@ -958,10 +978,10 @@ class ExtraTreesRegressor(BaseTransformer):
958
978
 
959
979
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
960
980
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
961
- Returns an empty list if current object is not a classifier or not yet fitted.
981
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
962
982
  """
963
983
  if getattr(self._sklearn_object, "classes_", None) is None:
964
- return []
984
+ return [output_cols_prefix]
965
985
 
966
986
  classes = self._sklearn_object.classes_
967
987
  if isinstance(classes, numpy.ndarray):
@@ -1186,7 +1206,7 @@ class ExtraTreesRegressor(BaseTransformer):
1186
1206
  cp.dump(self._sklearn_object, local_score_file)
1187
1207
 
1188
1208
  # Create temp stage to run score.
1189
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1209
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1190
1210
  session = dataset._session
1191
1211
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1192
1212
  SqlResultValidator(
@@ -1200,8 +1220,9 @@ class ExtraTreesRegressor(BaseTransformer):
1200
1220
  expected_value=f"Stage area {score_stage_name} successfully created."
1201
1221
  ).validate()
1202
1222
 
1203
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1204
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1223
+ # Use posixpath to construct stage paths
1224
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1225
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1205
1226
  statement_params = telemetry.get_function_usage_statement_params(
1206
1227
  project=_PROJECT,
1207
1228
  subproject=_SUBPROJECT,
@@ -1227,6 +1248,7 @@ class ExtraTreesRegressor(BaseTransformer):
1227
1248
  replace=True,
1228
1249
  session=session,
1229
1250
  statement_params=statement_params,
1251
+ anonymous=True
1230
1252
  )
1231
1253
  def score_wrapper_sproc(
1232
1254
  session: Session,
@@ -1234,7 +1256,8 @@ class ExtraTreesRegressor(BaseTransformer):
1234
1256
  stage_score_file_name: str,
1235
1257
  input_cols: List[str],
1236
1258
  label_cols: List[str],
1237
- sample_weight_col: Optional[str]
1259
+ sample_weight_col: Optional[str],
1260
+ statement_params: Dict[str, str]
1238
1261
  ) -> float:
1239
1262
  import cloudpickle as cp
1240
1263
  import numpy as np
@@ -1284,14 +1307,14 @@ class ExtraTreesRegressor(BaseTransformer):
1284
1307
  api_calls=[Session.call],
1285
1308
  custom_tags=dict([("autogen", True)]),
1286
1309
  )
1287
- score = session.call(
1288
- score_sproc_name,
1310
+ score = score_wrapper_sproc(
1311
+ session,
1289
1312
  query,
1290
1313
  stage_score_file_name,
1291
1314
  identifier.get_unescaped_names(self.input_cols),
1292
1315
  identifier.get_unescaped_names(self.label_cols),
1293
1316
  identifier.get_unescaped_names(self.sample_weight_col),
1294
- statement_params=statement_params,
1317
+ statement_params,
1295
1318
  )
1296
1319
 
1297
1320
  cleanup_temp_files([local_score_file_name])
@@ -1309,18 +1332,20 @@ class ExtraTreesRegressor(BaseTransformer):
1309
1332
  if self._sklearn_object._estimator_type == 'classifier':
1310
1333
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1311
1334
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1312
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1335
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1336
+ ([] if self._drop_input_cols else inputs) + outputs)
1313
1337
  # For regressor, the type of predict is float64
1314
1338
  elif self._sklearn_object._estimator_type == 'regressor':
1315
1339
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1316
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1317
-
1340
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1341
+ ([] if self._drop_input_cols else inputs) + outputs)
1318
1342
  for prob_func in PROB_FUNCTIONS:
1319
1343
  if hasattr(self, prob_func):
1320
1344
  output_cols_prefix: str = f"{prob_func}_"
1321
1345
  output_column_names = self._get_output_column_names(output_cols_prefix)
1322
1346
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1323
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1347
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1348
+ ([] if self._drop_input_cols else inputs) + outputs)
1324
1349
 
1325
1350
  @property
1326
1351
  def model_signatures(self) -> Dict[str, ModelSignature]: