snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. snowflake/ml/_internal/env_utils.py +2 -1
  2. snowflake/ml/_internal/file_utils.py +29 -7
  3. snowflake/ml/_internal/telemetry.py +5 -8
  4. snowflake/ml/_internal/utils/uri.py +7 -2
  5. snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
  6. snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
  7. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
  8. snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
  9. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
  10. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
  11. snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
  12. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
  13. snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
  14. snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
  15. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
  16. snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
  17. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
  18. snowflake/ml/model/_deployer.py +14 -27
  19. snowflake/ml/model/_env.py +4 -4
  20. snowflake/ml/model/_handlers/custom.py +14 -2
  21. snowflake/ml/model/_handlers/pytorch.py +186 -0
  22. snowflake/ml/model/_handlers/sklearn.py +14 -9
  23. snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
  24. snowflake/ml/model/_handlers/torchscript.py +180 -0
  25. snowflake/ml/model/_handlers/xgboost.py +19 -9
  26. snowflake/ml/model/_model.py +3 -2
  27. snowflake/ml/model/_model_meta.py +12 -7
  28. snowflake/ml/model/model_signature.py +446 -66
  29. snowflake/ml/model/type_hints.py +23 -4
  30. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
  31. snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
  32. snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
  33. snowflake/ml/modeling/cluster/birch.py +51 -26
  34. snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
  35. snowflake/ml/modeling/cluster/dbscan.py +51 -26
  36. snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
  37. snowflake/ml/modeling/cluster/k_means.py +51 -26
  38. snowflake/ml/modeling/cluster/mean_shift.py +51 -26
  39. snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
  40. snowflake/ml/modeling/cluster/optics.py +51 -26
  41. snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
  42. snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
  43. snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
  44. snowflake/ml/modeling/compose/column_transformer.py +51 -26
  45. snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
  46. snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
  47. snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
  48. snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
  49. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
  50. snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
  51. snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
  52. snowflake/ml/modeling/covariance/oas.py +51 -26
  53. snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
  54. snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
  55. snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
  56. snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
  57. snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
  58. snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
  59. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
  60. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
  61. snowflake/ml/modeling/decomposition/pca.py +51 -26
  62. snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
  63. snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
  64. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
  65. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
  66. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
  67. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
  68. snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
  69. snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
  70. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
  71. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
  72. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
  73. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
  74. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
  75. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
  76. snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
  77. snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
  78. snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
  79. snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
  80. snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
  81. snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
  82. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
  83. snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
  84. snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
  85. snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
  86. snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
  87. snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
  88. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
  89. snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
  90. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
  91. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
  92. snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
  93. snowflake/ml/modeling/impute/knn_imputer.py +51 -26
  94. snowflake/ml/modeling/impute/missing_indicator.py +51 -26
  95. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
  96. snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
  97. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
  98. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
  99. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
  100. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
  101. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
  102. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
  103. snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
  104. snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
  105. snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
  106. snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
  107. snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
  108. snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
  109. snowflake/ml/modeling/linear_model/lars.py +51 -26
  110. snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
  111. snowflake/ml/modeling/linear_model/lasso.py +51 -26
  112. snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
  113. snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
  114. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
  115. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
  116. snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
  117. snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
  118. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
  119. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
  120. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
  121. snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
  122. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
  123. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
  124. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
  125. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
  126. snowflake/ml/modeling/linear_model/perceptron.py +51 -26
  127. snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
  128. snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
  129. snowflake/ml/modeling/linear_model/ridge.py +51 -26
  130. snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
  131. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
  132. snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
  133. snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
  134. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
  135. snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
  136. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
  137. snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
  138. snowflake/ml/modeling/manifold/isomap.py +51 -26
  139. snowflake/ml/modeling/manifold/mds.py +51 -26
  140. snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
  141. snowflake/ml/modeling/manifold/tsne.py +51 -26
  142. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
  143. snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
  144. snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
  145. snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
  146. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
  147. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
  148. snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
  149. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
  150. snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
  151. snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
  152. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
  153. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
  154. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
  155. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
  156. snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
  157. snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
  158. snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
  159. snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
  160. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
  161. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
  162. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
  163. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
  164. snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
  165. snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
  166. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
  167. snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
  168. snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
  169. snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
  170. snowflake/ml/modeling/svm/linear_svc.py +51 -26
  171. snowflake/ml/modeling/svm/linear_svr.py +51 -26
  172. snowflake/ml/modeling/svm/nu_svc.py +51 -26
  173. snowflake/ml/modeling/svm/nu_svr.py +51 -26
  174. snowflake/ml/modeling/svm/svc.py +51 -26
  175. snowflake/ml/modeling/svm/svr.py +51 -26
  176. snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
  177. snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
  178. snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
  179. snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
  180. snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
  181. snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
  182. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
  183. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
  184. snowflake/ml/registry/model_registry.py +74 -56
  185. snowflake/ml/version.py +1 -1
  186. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
  187. snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
  188. snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
  189. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -262,7 +264,6 @@ class BisectingKMeans(BaseTransformer):
262
264
  sample_weight_col: Optional[str] = None,
263
265
  ) -> None:
264
266
  super().__init__()
265
- self.id = str(uuid4()).replace("-", "_").upper()
266
267
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
267
268
 
268
269
  self._deps = list(deps)
@@ -291,6 +292,15 @@ class BisectingKMeans(BaseTransformer):
291
292
  self.set_drop_input_cols(drop_input_cols)
292
293
  self.set_sample_weight_col(sample_weight_col)
293
294
 
295
+ def _get_rand_id(self) -> str:
296
+ """
297
+ Generate random id to be used in sproc and stage names.
298
+
299
+ Returns:
300
+ Random id string usable in sproc, table, and stage names.
301
+ """
302
+ return str(uuid4()).replace("-", "_").upper()
303
+
294
304
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
295
305
  """
296
306
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -369,7 +379,7 @@ class BisectingKMeans(BaseTransformer):
369
379
  cp.dump(self._sklearn_object, local_transform_file)
370
380
 
371
381
  # Create temp stage to run fit.
372
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
382
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
373
383
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
374
384
  SqlResultValidator(
375
385
  session=session,
@@ -382,11 +392,12 @@ class BisectingKMeans(BaseTransformer):
382
392
  expected_value=f"Stage area {transform_stage_name} successfully created."
383
393
  ).validate()
384
394
 
385
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
395
+ # Use posixpath to construct stage paths
396
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
397
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
386
398
  local_result_file_name = get_temp_file_path()
387
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
388
399
 
389
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
400
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
390
401
  statement_params = telemetry.get_function_usage_statement_params(
391
402
  project=_PROJECT,
392
403
  subproject=_SUBPROJECT,
@@ -412,6 +423,7 @@ class BisectingKMeans(BaseTransformer):
412
423
  replace=True,
413
424
  session=session,
414
425
  statement_params=statement_params,
426
+ anonymous=True
415
427
  )
416
428
  def fit_wrapper_sproc(
417
429
  session: Session,
@@ -420,7 +432,8 @@ class BisectingKMeans(BaseTransformer):
420
432
  stage_result_file_name: str,
421
433
  input_cols: List[str],
422
434
  label_cols: List[str],
423
- sample_weight_col: Optional[str]
435
+ sample_weight_col: Optional[str],
436
+ statement_params: Dict[str, str]
424
437
  ) -> str:
425
438
  import cloudpickle as cp
426
439
  import numpy as np
@@ -487,15 +500,15 @@ class BisectingKMeans(BaseTransformer):
487
500
  api_calls=[Session.call],
488
501
  custom_tags=dict([("autogen", True)]),
489
502
  )
490
- sproc_export_file_name = session.call(
491
- fit_sproc_name,
503
+ sproc_export_file_name = fit_wrapper_sproc(
504
+ session,
492
505
  query,
493
506
  stage_transform_file_name,
494
507
  stage_result_file_name,
495
508
  identifier.get_unescaped_names(self.input_cols),
496
509
  identifier.get_unescaped_names(self.label_cols),
497
510
  identifier.get_unescaped_names(self.sample_weight_col),
498
- statement_params=statement_params,
511
+ statement_params,
499
512
  )
500
513
 
501
514
  if "|" in sproc_export_file_name:
@@ -505,7 +518,7 @@ class BisectingKMeans(BaseTransformer):
505
518
  print("\n".join(fields[1:]))
506
519
 
507
520
  session.file.get(
508
- os.path.join(stage_result_file_name, sproc_export_file_name),
521
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
509
522
  local_result_file_name,
510
523
  statement_params=statement_params
511
524
  )
@@ -551,7 +564,7 @@ class BisectingKMeans(BaseTransformer):
551
564
 
552
565
  # Register vectorized UDF for batch inference
553
566
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
554
- safe_id=self.id, method=inference_method)
567
+ safe_id=self._get_rand_id(), method=inference_method)
555
568
 
556
569
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
557
570
  # will try to pickle all of self which fails.
@@ -643,7 +656,7 @@ class BisectingKMeans(BaseTransformer):
643
656
  return transformed_pandas_df.to_dict("records")
644
657
 
645
658
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
646
- safe_id=self.id
659
+ safe_id=self._get_rand_id()
647
660
  )
648
661
 
649
662
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -810,11 +823,18 @@ class BisectingKMeans(BaseTransformer):
810
823
  Transformed dataset.
811
824
  """
812
825
  if isinstance(dataset, DataFrame):
826
+ expected_type_inferred = ""
827
+ # when it is classifier, infer the datatype from label columns
828
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
829
+ expected_type_inferred = convert_sp_to_sf_type(
830
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
831
+ )
832
+
813
833
  output_df = self._batch_inference(
814
834
  dataset=dataset,
815
835
  inference_method="predict",
816
836
  expected_output_cols_list=self.output_cols,
817
- expected_output_cols_type="",
837
+ expected_output_cols_type=expected_type_inferred,
818
838
  )
819
839
  elif isinstance(dataset, pd.DataFrame):
820
840
  output_df = self._sklearn_inference(
@@ -887,10 +907,10 @@ class BisectingKMeans(BaseTransformer):
887
907
 
888
908
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
889
909
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
890
- Returns an empty list if current object is not a classifier or not yet fitted.
910
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
891
911
  """
892
912
  if getattr(self._sklearn_object, "classes_", None) is None:
893
- return []
913
+ return [output_cols_prefix]
894
914
 
895
915
  classes = self._sklearn_object.classes_
896
916
  if isinstance(classes, numpy.ndarray):
@@ -1115,7 +1135,7 @@ class BisectingKMeans(BaseTransformer):
1115
1135
  cp.dump(self._sklearn_object, local_score_file)
1116
1136
 
1117
1137
  # Create temp stage to run score.
1118
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1138
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1119
1139
  session = dataset._session
1120
1140
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1121
1141
  SqlResultValidator(
@@ -1129,8 +1149,9 @@ class BisectingKMeans(BaseTransformer):
1129
1149
  expected_value=f"Stage area {score_stage_name} successfully created."
1130
1150
  ).validate()
1131
1151
 
1132
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1133
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1152
+ # Use posixpath to construct stage paths
1153
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1154
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1134
1155
  statement_params = telemetry.get_function_usage_statement_params(
1135
1156
  project=_PROJECT,
1136
1157
  subproject=_SUBPROJECT,
@@ -1156,6 +1177,7 @@ class BisectingKMeans(BaseTransformer):
1156
1177
  replace=True,
1157
1178
  session=session,
1158
1179
  statement_params=statement_params,
1180
+ anonymous=True
1159
1181
  )
1160
1182
  def score_wrapper_sproc(
1161
1183
  session: Session,
@@ -1163,7 +1185,8 @@ class BisectingKMeans(BaseTransformer):
1163
1185
  stage_score_file_name: str,
1164
1186
  input_cols: List[str],
1165
1187
  label_cols: List[str],
1166
- sample_weight_col: Optional[str]
1188
+ sample_weight_col: Optional[str],
1189
+ statement_params: Dict[str, str]
1167
1190
  ) -> float:
1168
1191
  import cloudpickle as cp
1169
1192
  import numpy as np
@@ -1213,14 +1236,14 @@ class BisectingKMeans(BaseTransformer):
1213
1236
  api_calls=[Session.call],
1214
1237
  custom_tags=dict([("autogen", True)]),
1215
1238
  )
1216
- score = session.call(
1217
- score_sproc_name,
1239
+ score = score_wrapper_sproc(
1240
+ session,
1218
1241
  query,
1219
1242
  stage_score_file_name,
1220
1243
  identifier.get_unescaped_names(self.input_cols),
1221
1244
  identifier.get_unescaped_names(self.label_cols),
1222
1245
  identifier.get_unescaped_names(self.sample_weight_col),
1223
- statement_params=statement_params,
1246
+ statement_params,
1224
1247
  )
1225
1248
 
1226
1249
  cleanup_temp_files([local_score_file_name])
@@ -1238,18 +1261,20 @@ class BisectingKMeans(BaseTransformer):
1238
1261
  if self._sklearn_object._estimator_type == 'classifier':
1239
1262
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1240
1263
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1241
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1264
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1265
+ ([] if self._drop_input_cols else inputs) + outputs)
1242
1266
  # For regressor, the type of predict is float64
1243
1267
  elif self._sklearn_object._estimator_type == 'regressor':
1244
1268
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1245
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1246
-
1269
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1270
+ ([] if self._drop_input_cols else inputs) + outputs)
1247
1271
  for prob_func in PROB_FUNCTIONS:
1248
1272
  if hasattr(self, prob_func):
1249
1273
  output_cols_prefix: str = f"{prob_func}_"
1250
1274
  output_column_names = self._get_output_column_names(output_cols_prefix)
1251
1275
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1252
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1276
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1277
+ ([] if self._drop_input_cols else inputs) + outputs)
1253
1278
 
1254
1279
  @property
1255
1280
  def model_signatures(self) -> Dict[str, ModelSignature]:
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -232,7 +234,6 @@ class DBSCAN(BaseTransformer):
232
234
  sample_weight_col: Optional[str] = None,
233
235
  ) -> None:
234
236
  super().__init__()
235
- self.id = str(uuid4()).replace("-", "_").upper()
236
237
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
237
238
 
238
239
  self._deps = list(deps)
@@ -259,6 +260,15 @@ class DBSCAN(BaseTransformer):
259
260
  self.set_drop_input_cols(drop_input_cols)
260
261
  self.set_sample_weight_col(sample_weight_col)
261
262
 
263
+ def _get_rand_id(self) -> str:
264
+ """
265
+ Generate random id to be used in sproc and stage names.
266
+
267
+ Returns:
268
+ Random id string usable in sproc, table, and stage names.
269
+ """
270
+ return str(uuid4()).replace("-", "_").upper()
271
+
262
272
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
263
273
  """
264
274
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -337,7 +347,7 @@ class DBSCAN(BaseTransformer):
337
347
  cp.dump(self._sklearn_object, local_transform_file)
338
348
 
339
349
  # Create temp stage to run fit.
340
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
350
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
341
351
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
342
352
  SqlResultValidator(
343
353
  session=session,
@@ -350,11 +360,12 @@ class DBSCAN(BaseTransformer):
350
360
  expected_value=f"Stage area {transform_stage_name} successfully created."
351
361
  ).validate()
352
362
 
353
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
363
+ # Use posixpath to construct stage paths
364
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
365
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
354
366
  local_result_file_name = get_temp_file_path()
355
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
356
367
 
357
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
368
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
358
369
  statement_params = telemetry.get_function_usage_statement_params(
359
370
  project=_PROJECT,
360
371
  subproject=_SUBPROJECT,
@@ -380,6 +391,7 @@ class DBSCAN(BaseTransformer):
380
391
  replace=True,
381
392
  session=session,
382
393
  statement_params=statement_params,
394
+ anonymous=True
383
395
  )
384
396
  def fit_wrapper_sproc(
385
397
  session: Session,
@@ -388,7 +400,8 @@ class DBSCAN(BaseTransformer):
388
400
  stage_result_file_name: str,
389
401
  input_cols: List[str],
390
402
  label_cols: List[str],
391
- sample_weight_col: Optional[str]
403
+ sample_weight_col: Optional[str],
404
+ statement_params: Dict[str, str]
392
405
  ) -> str:
393
406
  import cloudpickle as cp
394
407
  import numpy as np
@@ -455,15 +468,15 @@ class DBSCAN(BaseTransformer):
455
468
  api_calls=[Session.call],
456
469
  custom_tags=dict([("autogen", True)]),
457
470
  )
458
- sproc_export_file_name = session.call(
459
- fit_sproc_name,
471
+ sproc_export_file_name = fit_wrapper_sproc(
472
+ session,
460
473
  query,
461
474
  stage_transform_file_name,
462
475
  stage_result_file_name,
463
476
  identifier.get_unescaped_names(self.input_cols),
464
477
  identifier.get_unescaped_names(self.label_cols),
465
478
  identifier.get_unescaped_names(self.sample_weight_col),
466
- statement_params=statement_params,
479
+ statement_params,
467
480
  )
468
481
 
469
482
  if "|" in sproc_export_file_name:
@@ -473,7 +486,7 @@ class DBSCAN(BaseTransformer):
473
486
  print("\n".join(fields[1:]))
474
487
 
475
488
  session.file.get(
476
- os.path.join(stage_result_file_name, sproc_export_file_name),
489
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
477
490
  local_result_file_name,
478
491
  statement_params=statement_params
479
492
  )
@@ -519,7 +532,7 @@ class DBSCAN(BaseTransformer):
519
532
 
520
533
  # Register vectorized UDF for batch inference
521
534
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
522
- safe_id=self.id, method=inference_method)
535
+ safe_id=self._get_rand_id(), method=inference_method)
523
536
 
524
537
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
525
538
  # will try to pickle all of self which fails.
@@ -611,7 +624,7 @@ class DBSCAN(BaseTransformer):
611
624
  return transformed_pandas_df.to_dict("records")
612
625
 
613
626
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
614
- safe_id=self.id
627
+ safe_id=self._get_rand_id()
615
628
  )
616
629
 
617
630
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -776,11 +789,18 @@ class DBSCAN(BaseTransformer):
776
789
  Transformed dataset.
777
790
  """
778
791
  if isinstance(dataset, DataFrame):
792
+ expected_type_inferred = ""
793
+ # when it is classifier, infer the datatype from label columns
794
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
795
+ expected_type_inferred = convert_sp_to_sf_type(
796
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
797
+ )
798
+
779
799
  output_df = self._batch_inference(
780
800
  dataset=dataset,
781
801
  inference_method="predict",
782
802
  expected_output_cols_list=self.output_cols,
783
- expected_output_cols_type="",
803
+ expected_output_cols_type=expected_type_inferred,
784
804
  )
785
805
  elif isinstance(dataset, pd.DataFrame):
786
806
  output_df = self._sklearn_inference(
@@ -851,10 +871,10 @@ class DBSCAN(BaseTransformer):
851
871
 
852
872
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
853
873
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
854
- Returns an empty list if current object is not a classifier or not yet fitted.
874
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
855
875
  """
856
876
  if getattr(self._sklearn_object, "classes_", None) is None:
857
- return []
877
+ return [output_cols_prefix]
858
878
 
859
879
  classes = self._sklearn_object.classes_
860
880
  if isinstance(classes, numpy.ndarray):
@@ -1079,7 +1099,7 @@ class DBSCAN(BaseTransformer):
1079
1099
  cp.dump(self._sklearn_object, local_score_file)
1080
1100
 
1081
1101
  # Create temp stage to run score.
1082
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1102
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1083
1103
  session = dataset._session
1084
1104
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1085
1105
  SqlResultValidator(
@@ -1093,8 +1113,9 @@ class DBSCAN(BaseTransformer):
1093
1113
  expected_value=f"Stage area {score_stage_name} successfully created."
1094
1114
  ).validate()
1095
1115
 
1096
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1097
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1116
+ # Use posixpath to construct stage paths
1117
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1118
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1098
1119
  statement_params = telemetry.get_function_usage_statement_params(
1099
1120
  project=_PROJECT,
1100
1121
  subproject=_SUBPROJECT,
@@ -1120,6 +1141,7 @@ class DBSCAN(BaseTransformer):
1120
1141
  replace=True,
1121
1142
  session=session,
1122
1143
  statement_params=statement_params,
1144
+ anonymous=True
1123
1145
  )
1124
1146
  def score_wrapper_sproc(
1125
1147
  session: Session,
@@ -1127,7 +1149,8 @@ class DBSCAN(BaseTransformer):
1127
1149
  stage_score_file_name: str,
1128
1150
  input_cols: List[str],
1129
1151
  label_cols: List[str],
1130
- sample_weight_col: Optional[str]
1152
+ sample_weight_col: Optional[str],
1153
+ statement_params: Dict[str, str]
1131
1154
  ) -> float:
1132
1155
  import cloudpickle as cp
1133
1156
  import numpy as np
@@ -1177,14 +1200,14 @@ class DBSCAN(BaseTransformer):
1177
1200
  api_calls=[Session.call],
1178
1201
  custom_tags=dict([("autogen", True)]),
1179
1202
  )
1180
- score = session.call(
1181
- score_sproc_name,
1203
+ score = score_wrapper_sproc(
1204
+ session,
1182
1205
  query,
1183
1206
  stage_score_file_name,
1184
1207
  identifier.get_unescaped_names(self.input_cols),
1185
1208
  identifier.get_unescaped_names(self.label_cols),
1186
1209
  identifier.get_unescaped_names(self.sample_weight_col),
1187
- statement_params=statement_params,
1210
+ statement_params,
1188
1211
  )
1189
1212
 
1190
1213
  cleanup_temp_files([local_score_file_name])
@@ -1202,18 +1225,20 @@ class DBSCAN(BaseTransformer):
1202
1225
  if self._sklearn_object._estimator_type == 'classifier':
1203
1226
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1204
1227
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1205
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1228
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1229
+ ([] if self._drop_input_cols else inputs) + outputs)
1206
1230
  # For regressor, the type of predict is float64
1207
1231
  elif self._sklearn_object._estimator_type == 'regressor':
1208
1232
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1209
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1210
-
1233
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1234
+ ([] if self._drop_input_cols else inputs) + outputs)
1211
1235
  for prob_func in PROB_FUNCTIONS:
1212
1236
  if hasattr(self, prob_func):
1213
1237
  output_cols_prefix: str = f"{prob_func}_"
1214
1238
  output_column_names = self._get_output_column_names(output_cols_prefix)
1215
1239
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1216
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1240
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1241
+ ([] if self._drop_input_cols else inputs) + outputs)
1217
1242
 
1218
1243
  @property
1219
1244
  def model_signatures(self) -> Dict[str, ModelSignature]: