snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. snowflake/ml/_internal/env_utils.py +2 -1
  2. snowflake/ml/_internal/file_utils.py +29 -7
  3. snowflake/ml/_internal/telemetry.py +5 -8
  4. snowflake/ml/_internal/utils/uri.py +7 -2
  5. snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
  6. snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
  7. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
  8. snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
  9. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
  10. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
  11. snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
  12. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
  13. snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
  14. snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
  15. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
  16. snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
  17. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
  18. snowflake/ml/model/_deployer.py +14 -27
  19. snowflake/ml/model/_env.py +4 -4
  20. snowflake/ml/model/_handlers/custom.py +14 -2
  21. snowflake/ml/model/_handlers/pytorch.py +186 -0
  22. snowflake/ml/model/_handlers/sklearn.py +14 -9
  23. snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
  24. snowflake/ml/model/_handlers/torchscript.py +180 -0
  25. snowflake/ml/model/_handlers/xgboost.py +19 -9
  26. snowflake/ml/model/_model.py +3 -2
  27. snowflake/ml/model/_model_meta.py +12 -7
  28. snowflake/ml/model/model_signature.py +446 -66
  29. snowflake/ml/model/type_hints.py +23 -4
  30. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
  31. snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
  32. snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
  33. snowflake/ml/modeling/cluster/birch.py +51 -26
  34. snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
  35. snowflake/ml/modeling/cluster/dbscan.py +51 -26
  36. snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
  37. snowflake/ml/modeling/cluster/k_means.py +51 -26
  38. snowflake/ml/modeling/cluster/mean_shift.py +51 -26
  39. snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
  40. snowflake/ml/modeling/cluster/optics.py +51 -26
  41. snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
  42. snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
  43. snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
  44. snowflake/ml/modeling/compose/column_transformer.py +51 -26
  45. snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
  46. snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
  47. snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
  48. snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
  49. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
  50. snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
  51. snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
  52. snowflake/ml/modeling/covariance/oas.py +51 -26
  53. snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
  54. snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
  55. snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
  56. snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
  57. snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
  58. snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
  59. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
  60. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
  61. snowflake/ml/modeling/decomposition/pca.py +51 -26
  62. snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
  63. snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
  64. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
  65. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
  66. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
  67. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
  68. snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
  69. snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
  70. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
  71. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
  72. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
  73. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
  74. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
  75. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
  76. snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
  77. snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
  78. snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
  79. snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
  80. snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
  81. snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
  82. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
  83. snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
  84. snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
  85. snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
  86. snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
  87. snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
  88. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
  89. snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
  90. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
  91. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
  92. snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
  93. snowflake/ml/modeling/impute/knn_imputer.py +51 -26
  94. snowflake/ml/modeling/impute/missing_indicator.py +51 -26
  95. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
  96. snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
  97. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
  98. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
  99. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
  100. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
  101. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
  102. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
  103. snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
  104. snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
  105. snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
  106. snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
  107. snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
  108. snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
  109. snowflake/ml/modeling/linear_model/lars.py +51 -26
  110. snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
  111. snowflake/ml/modeling/linear_model/lasso.py +51 -26
  112. snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
  113. snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
  114. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
  115. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
  116. snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
  117. snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
  118. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
  119. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
  120. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
  121. snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
  122. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
  123. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
  124. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
  125. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
  126. snowflake/ml/modeling/linear_model/perceptron.py +51 -26
  127. snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
  128. snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
  129. snowflake/ml/modeling/linear_model/ridge.py +51 -26
  130. snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
  131. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
  132. snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
  133. snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
  134. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
  135. snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
  136. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
  137. snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
  138. snowflake/ml/modeling/manifold/isomap.py +51 -26
  139. snowflake/ml/modeling/manifold/mds.py +51 -26
  140. snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
  141. snowflake/ml/modeling/manifold/tsne.py +51 -26
  142. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
  143. snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
  144. snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
  145. snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
  146. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
  147. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
  148. snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
  149. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
  150. snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
  151. snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
  152. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
  153. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
  154. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
  155. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
  156. snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
  157. snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
  158. snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
  159. snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
  160. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
  161. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
  162. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
  163. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
  164. snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
  165. snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
  166. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
  167. snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
  168. snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
  169. snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
  170. snowflake/ml/modeling/svm/linear_svc.py +51 -26
  171. snowflake/ml/modeling/svm/linear_svr.py +51 -26
  172. snowflake/ml/modeling/svm/nu_svc.py +51 -26
  173. snowflake/ml/modeling/svm/nu_svr.py +51 -26
  174. snowflake/ml/modeling/svm/svc.py +51 -26
  175. snowflake/ml/modeling/svm/svr.py +51 -26
  176. snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
  177. snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
  178. snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
  179. snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
  180. snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
  181. snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
  182. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
  183. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
  184. snowflake/ml/registry/model_registry.py +74 -56
  185. snowflake/ml/version.py +1 -1
  186. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
  187. snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
  188. snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
  189. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -253,7 +255,6 @@ class ColumnTransformer(BaseTransformer):
253
255
  sample_weight_col: Optional[str] = None,
254
256
  ) -> None:
255
257
  super().__init__()
256
- self.id = str(uuid4()).replace("-", "_").upper()
257
258
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
258
259
  deps = deps | _gather_dependencies(transformers)
259
260
  self._deps = list(deps)
@@ -279,6 +280,15 @@ class ColumnTransformer(BaseTransformer):
279
280
  self.set_drop_input_cols(drop_input_cols)
280
281
  self.set_sample_weight_col(sample_weight_col)
281
282
 
283
+ def _get_rand_id(self) -> str:
284
+ """
285
+ Generate random id to be used in sproc and stage names.
286
+
287
+ Returns:
288
+ Random id string usable in sproc, table, and stage names.
289
+ """
290
+ return str(uuid4()).replace("-", "_").upper()
291
+
282
292
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
283
293
  """
284
294
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -357,7 +367,7 @@ class ColumnTransformer(BaseTransformer):
357
367
  cp.dump(self._sklearn_object, local_transform_file)
358
368
 
359
369
  # Create temp stage to run fit.
360
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
370
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
361
371
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
362
372
  SqlResultValidator(
363
373
  session=session,
@@ -370,11 +380,12 @@ class ColumnTransformer(BaseTransformer):
370
380
  expected_value=f"Stage area {transform_stage_name} successfully created."
371
381
  ).validate()
372
382
 
373
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
383
+ # Use posixpath to construct stage paths
384
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
385
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
374
386
  local_result_file_name = get_temp_file_path()
375
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
376
387
 
377
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
388
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
378
389
  statement_params = telemetry.get_function_usage_statement_params(
379
390
  project=_PROJECT,
380
391
  subproject=_SUBPROJECT,
@@ -400,6 +411,7 @@ class ColumnTransformer(BaseTransformer):
400
411
  replace=True,
401
412
  session=session,
402
413
  statement_params=statement_params,
414
+ anonymous=True
403
415
  )
404
416
  def fit_wrapper_sproc(
405
417
  session: Session,
@@ -408,7 +420,8 @@ class ColumnTransformer(BaseTransformer):
408
420
  stage_result_file_name: str,
409
421
  input_cols: List[str],
410
422
  label_cols: List[str],
411
- sample_weight_col: Optional[str]
423
+ sample_weight_col: Optional[str],
424
+ statement_params: Dict[str, str]
412
425
  ) -> str:
413
426
  import cloudpickle as cp
414
427
  import numpy as np
@@ -475,15 +488,15 @@ class ColumnTransformer(BaseTransformer):
475
488
  api_calls=[Session.call],
476
489
  custom_tags=dict([("autogen", True)]),
477
490
  )
478
- sproc_export_file_name = session.call(
479
- fit_sproc_name,
491
+ sproc_export_file_name = fit_wrapper_sproc(
492
+ session,
480
493
  query,
481
494
  stage_transform_file_name,
482
495
  stage_result_file_name,
483
496
  identifier.get_unescaped_names(self.input_cols),
484
497
  identifier.get_unescaped_names(self.label_cols),
485
498
  identifier.get_unescaped_names(self.sample_weight_col),
486
- statement_params=statement_params,
499
+ statement_params,
487
500
  )
488
501
 
489
502
  if "|" in sproc_export_file_name:
@@ -493,7 +506,7 @@ class ColumnTransformer(BaseTransformer):
493
506
  print("\n".join(fields[1:]))
494
507
 
495
508
  session.file.get(
496
- os.path.join(stage_result_file_name, sproc_export_file_name),
509
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
497
510
  local_result_file_name,
498
511
  statement_params=statement_params
499
512
  )
@@ -539,7 +552,7 @@ class ColumnTransformer(BaseTransformer):
539
552
 
540
553
  # Register vectorized UDF for batch inference
541
554
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
542
- safe_id=self.id, method=inference_method)
555
+ safe_id=self._get_rand_id(), method=inference_method)
543
556
 
544
557
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
545
558
  # will try to pickle all of self which fails.
@@ -631,7 +644,7 @@ class ColumnTransformer(BaseTransformer):
631
644
  return transformed_pandas_df.to_dict("records")
632
645
 
633
646
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
634
- safe_id=self.id
647
+ safe_id=self._get_rand_id()
635
648
  )
636
649
 
637
650
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -796,11 +809,18 @@ class ColumnTransformer(BaseTransformer):
796
809
  Transformed dataset.
797
810
  """
798
811
  if isinstance(dataset, DataFrame):
812
+ expected_type_inferred = ""
813
+ # when it is classifier, infer the datatype from label columns
814
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
815
+ expected_type_inferred = convert_sp_to_sf_type(
816
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
817
+ )
818
+
799
819
  output_df = self._batch_inference(
800
820
  dataset=dataset,
801
821
  inference_method="predict",
802
822
  expected_output_cols_list=self.output_cols,
803
- expected_output_cols_type="",
823
+ expected_output_cols_type=expected_type_inferred,
804
824
  )
805
825
  elif isinstance(dataset, pd.DataFrame):
806
826
  output_df = self._sklearn_inference(
@@ -873,10 +893,10 @@ class ColumnTransformer(BaseTransformer):
873
893
 
874
894
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
875
895
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
876
- Returns an empty list if current object is not a classifier or not yet fitted.
896
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
877
897
  """
878
898
  if getattr(self._sklearn_object, "classes_", None) is None:
879
- return []
899
+ return [output_cols_prefix]
880
900
 
881
901
  classes = self._sklearn_object.classes_
882
902
  if isinstance(classes, numpy.ndarray):
@@ -1101,7 +1121,7 @@ class ColumnTransformer(BaseTransformer):
1101
1121
  cp.dump(self._sklearn_object, local_score_file)
1102
1122
 
1103
1123
  # Create temp stage to run score.
1104
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1124
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1105
1125
  session = dataset._session
1106
1126
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1107
1127
  SqlResultValidator(
@@ -1115,8 +1135,9 @@ class ColumnTransformer(BaseTransformer):
1115
1135
  expected_value=f"Stage area {score_stage_name} successfully created."
1116
1136
  ).validate()
1117
1137
 
1118
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1119
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1138
+ # Use posixpath to construct stage paths
1139
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1140
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1120
1141
  statement_params = telemetry.get_function_usage_statement_params(
1121
1142
  project=_PROJECT,
1122
1143
  subproject=_SUBPROJECT,
@@ -1142,6 +1163,7 @@ class ColumnTransformer(BaseTransformer):
1142
1163
  replace=True,
1143
1164
  session=session,
1144
1165
  statement_params=statement_params,
1166
+ anonymous=True
1145
1167
  )
1146
1168
  def score_wrapper_sproc(
1147
1169
  session: Session,
@@ -1149,7 +1171,8 @@ class ColumnTransformer(BaseTransformer):
1149
1171
  stage_score_file_name: str,
1150
1172
  input_cols: List[str],
1151
1173
  label_cols: List[str],
1152
- sample_weight_col: Optional[str]
1174
+ sample_weight_col: Optional[str],
1175
+ statement_params: Dict[str, str]
1153
1176
  ) -> float:
1154
1177
  import cloudpickle as cp
1155
1178
  import numpy as np
@@ -1199,14 +1222,14 @@ class ColumnTransformer(BaseTransformer):
1199
1222
  api_calls=[Session.call],
1200
1223
  custom_tags=dict([("autogen", True)]),
1201
1224
  )
1202
- score = session.call(
1203
- score_sproc_name,
1225
+ score = score_wrapper_sproc(
1226
+ session,
1204
1227
  query,
1205
1228
  stage_score_file_name,
1206
1229
  identifier.get_unescaped_names(self.input_cols),
1207
1230
  identifier.get_unescaped_names(self.label_cols),
1208
1231
  identifier.get_unescaped_names(self.sample_weight_col),
1209
- statement_params=statement_params,
1232
+ statement_params,
1210
1233
  )
1211
1234
 
1212
1235
  cleanup_temp_files([local_score_file_name])
@@ -1224,18 +1247,20 @@ class ColumnTransformer(BaseTransformer):
1224
1247
  if self._sklearn_object._estimator_type == 'classifier':
1225
1248
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1226
1249
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1227
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1250
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1251
+ ([] if self._drop_input_cols else inputs) + outputs)
1228
1252
  # For regressor, the type of predict is float64
1229
1253
  elif self._sklearn_object._estimator_type == 'regressor':
1230
1254
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1231
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1232
-
1255
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1256
+ ([] if self._drop_input_cols else inputs) + outputs)
1233
1257
  for prob_func in PROB_FUNCTIONS:
1234
1258
  if hasattr(self, prob_func):
1235
1259
  output_cols_prefix: str = f"{prob_func}_"
1236
1260
  output_column_names = self._get_output_column_names(output_cols_prefix)
1237
1261
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1238
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1262
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1263
+ ([] if self._drop_input_cols else inputs) + outputs)
1239
1264
 
1240
1265
  @property
1241
1266
  def model_signatures(self) -> Dict[str, ModelSignature]:
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -214,7 +216,6 @@ class TransformedTargetRegressor(BaseTransformer):
214
216
  sample_weight_col: Optional[str] = None,
215
217
  ) -> None:
216
218
  super().__init__()
217
- self.id = str(uuid4()).replace("-", "_").upper()
218
219
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
219
220
 
220
221
  self._deps = list(deps)
@@ -238,6 +239,15 @@ class TransformedTargetRegressor(BaseTransformer):
238
239
  self.set_drop_input_cols(drop_input_cols)
239
240
  self.set_sample_weight_col(sample_weight_col)
240
241
 
242
+ def _get_rand_id(self) -> str:
243
+ """
244
+ Generate random id to be used in sproc and stage names.
245
+
246
+ Returns:
247
+ Random id string usable in sproc, table, and stage names.
248
+ """
249
+ return str(uuid4()).replace("-", "_").upper()
250
+
241
251
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
242
252
  """
243
253
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -316,7 +326,7 @@ class TransformedTargetRegressor(BaseTransformer):
316
326
  cp.dump(self._sklearn_object, local_transform_file)
317
327
 
318
328
  # Create temp stage to run fit.
319
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
329
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
320
330
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
321
331
  SqlResultValidator(
322
332
  session=session,
@@ -329,11 +339,12 @@ class TransformedTargetRegressor(BaseTransformer):
329
339
  expected_value=f"Stage area {transform_stage_name} successfully created."
330
340
  ).validate()
331
341
 
332
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
342
+ # Use posixpath to construct stage paths
343
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
344
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
333
345
  local_result_file_name = get_temp_file_path()
334
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
335
346
 
336
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
347
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
337
348
  statement_params = telemetry.get_function_usage_statement_params(
338
349
  project=_PROJECT,
339
350
  subproject=_SUBPROJECT,
@@ -359,6 +370,7 @@ class TransformedTargetRegressor(BaseTransformer):
359
370
  replace=True,
360
371
  session=session,
361
372
  statement_params=statement_params,
373
+ anonymous=True
362
374
  )
363
375
  def fit_wrapper_sproc(
364
376
  session: Session,
@@ -367,7 +379,8 @@ class TransformedTargetRegressor(BaseTransformer):
367
379
  stage_result_file_name: str,
368
380
  input_cols: List[str],
369
381
  label_cols: List[str],
370
- sample_weight_col: Optional[str]
382
+ sample_weight_col: Optional[str],
383
+ statement_params: Dict[str, str]
371
384
  ) -> str:
372
385
  import cloudpickle as cp
373
386
  import numpy as np
@@ -434,15 +447,15 @@ class TransformedTargetRegressor(BaseTransformer):
434
447
  api_calls=[Session.call],
435
448
  custom_tags=dict([("autogen", True)]),
436
449
  )
437
- sproc_export_file_name = session.call(
438
- fit_sproc_name,
450
+ sproc_export_file_name = fit_wrapper_sproc(
451
+ session,
439
452
  query,
440
453
  stage_transform_file_name,
441
454
  stage_result_file_name,
442
455
  identifier.get_unescaped_names(self.input_cols),
443
456
  identifier.get_unescaped_names(self.label_cols),
444
457
  identifier.get_unescaped_names(self.sample_weight_col),
445
- statement_params=statement_params,
458
+ statement_params,
446
459
  )
447
460
 
448
461
  if "|" in sproc_export_file_name:
@@ -452,7 +465,7 @@ class TransformedTargetRegressor(BaseTransformer):
452
465
  print("\n".join(fields[1:]))
453
466
 
454
467
  session.file.get(
455
- os.path.join(stage_result_file_name, sproc_export_file_name),
468
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
456
469
  local_result_file_name,
457
470
  statement_params=statement_params
458
471
  )
@@ -498,7 +511,7 @@ class TransformedTargetRegressor(BaseTransformer):
498
511
 
499
512
  # Register vectorized UDF for batch inference
500
513
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
501
- safe_id=self.id, method=inference_method)
514
+ safe_id=self._get_rand_id(), method=inference_method)
502
515
 
503
516
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
504
517
  # will try to pickle all of self which fails.
@@ -590,7 +603,7 @@ class TransformedTargetRegressor(BaseTransformer):
590
603
  return transformed_pandas_df.to_dict("records")
591
604
 
592
605
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
593
- safe_id=self.id
606
+ safe_id=self._get_rand_id()
594
607
  )
595
608
 
596
609
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -757,11 +770,18 @@ class TransformedTargetRegressor(BaseTransformer):
757
770
  Transformed dataset.
758
771
  """
759
772
  if isinstance(dataset, DataFrame):
773
+ expected_type_inferred = "float"
774
+ # when it is classifier, infer the datatype from label columns
775
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
776
+ expected_type_inferred = convert_sp_to_sf_type(
777
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
778
+ )
779
+
760
780
  output_df = self._batch_inference(
761
781
  dataset=dataset,
762
782
  inference_method="predict",
763
783
  expected_output_cols_list=self.output_cols,
764
- expected_output_cols_type="float",
784
+ expected_output_cols_type=expected_type_inferred,
765
785
  )
766
786
  elif isinstance(dataset, pd.DataFrame):
767
787
  output_df = self._sklearn_inference(
@@ -832,10 +852,10 @@ class TransformedTargetRegressor(BaseTransformer):
832
852
 
833
853
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
834
854
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
835
- Returns an empty list if current object is not a classifier or not yet fitted.
855
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
836
856
  """
837
857
  if getattr(self._sklearn_object, "classes_", None) is None:
838
- return []
858
+ return [output_cols_prefix]
839
859
 
840
860
  classes = self._sklearn_object.classes_
841
861
  if isinstance(classes, numpy.ndarray):
@@ -1060,7 +1080,7 @@ class TransformedTargetRegressor(BaseTransformer):
1060
1080
  cp.dump(self._sklearn_object, local_score_file)
1061
1081
 
1062
1082
  # Create temp stage to run score.
1063
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1083
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1064
1084
  session = dataset._session
1065
1085
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1066
1086
  SqlResultValidator(
@@ -1074,8 +1094,9 @@ class TransformedTargetRegressor(BaseTransformer):
1074
1094
  expected_value=f"Stage area {score_stage_name} successfully created."
1075
1095
  ).validate()
1076
1096
 
1077
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1078
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1097
+ # Use posixpath to construct stage paths
1098
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1099
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1079
1100
  statement_params = telemetry.get_function_usage_statement_params(
1080
1101
  project=_PROJECT,
1081
1102
  subproject=_SUBPROJECT,
@@ -1101,6 +1122,7 @@ class TransformedTargetRegressor(BaseTransformer):
1101
1122
  replace=True,
1102
1123
  session=session,
1103
1124
  statement_params=statement_params,
1125
+ anonymous=True
1104
1126
  )
1105
1127
  def score_wrapper_sproc(
1106
1128
  session: Session,
@@ -1108,7 +1130,8 @@ class TransformedTargetRegressor(BaseTransformer):
1108
1130
  stage_score_file_name: str,
1109
1131
  input_cols: List[str],
1110
1132
  label_cols: List[str],
1111
- sample_weight_col: Optional[str]
1133
+ sample_weight_col: Optional[str],
1134
+ statement_params: Dict[str, str]
1112
1135
  ) -> float:
1113
1136
  import cloudpickle as cp
1114
1137
  import numpy as np
@@ -1158,14 +1181,14 @@ class TransformedTargetRegressor(BaseTransformer):
1158
1181
  api_calls=[Session.call],
1159
1182
  custom_tags=dict([("autogen", True)]),
1160
1183
  )
1161
- score = session.call(
1162
- score_sproc_name,
1184
+ score = score_wrapper_sproc(
1185
+ session,
1163
1186
  query,
1164
1187
  stage_score_file_name,
1165
1188
  identifier.get_unescaped_names(self.input_cols),
1166
1189
  identifier.get_unescaped_names(self.label_cols),
1167
1190
  identifier.get_unescaped_names(self.sample_weight_col),
1168
- statement_params=statement_params,
1191
+ statement_params,
1169
1192
  )
1170
1193
 
1171
1194
  cleanup_temp_files([local_score_file_name])
@@ -1183,18 +1206,20 @@ class TransformedTargetRegressor(BaseTransformer):
1183
1206
  if self._sklearn_object._estimator_type == 'classifier':
1184
1207
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1185
1208
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1186
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1209
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1210
+ ([] if self._drop_input_cols else inputs) + outputs)
1187
1211
  # For regressor, the type of predict is float64
1188
1212
  elif self._sklearn_object._estimator_type == 'regressor':
1189
1213
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1190
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1191
-
1214
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1215
+ ([] if self._drop_input_cols else inputs) + outputs)
1192
1216
  for prob_func in PROB_FUNCTIONS:
1193
1217
  if hasattr(self, prob_func):
1194
1218
  output_cols_prefix: str = f"{prob_func}_"
1195
1219
  output_column_names = self._get_output_column_names(output_cols_prefix)
1196
1220
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1197
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1221
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1222
+ ([] if self._drop_input_cols else inputs) + outputs)
1198
1223
 
1199
1224
  @property
1200
1225
  def model_signatures(self) -> Dict[str, ModelSignature]: