snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. snowflake/ml/_internal/env_utils.py +2 -1
  2. snowflake/ml/_internal/file_utils.py +29 -7
  3. snowflake/ml/_internal/telemetry.py +5 -8
  4. snowflake/ml/_internal/utils/uri.py +7 -2
  5. snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
  6. snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
  7. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
  8. snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
  9. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
  10. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
  11. snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
  12. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
  13. snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
  14. snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
  15. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
  16. snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
  17. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
  18. snowflake/ml/model/_deployer.py +14 -27
  19. snowflake/ml/model/_env.py +4 -4
  20. snowflake/ml/model/_handlers/custom.py +14 -2
  21. snowflake/ml/model/_handlers/pytorch.py +186 -0
  22. snowflake/ml/model/_handlers/sklearn.py +14 -9
  23. snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
  24. snowflake/ml/model/_handlers/torchscript.py +180 -0
  25. snowflake/ml/model/_handlers/xgboost.py +19 -9
  26. snowflake/ml/model/_model.py +3 -2
  27. snowflake/ml/model/_model_meta.py +12 -7
  28. snowflake/ml/model/model_signature.py +446 -66
  29. snowflake/ml/model/type_hints.py +23 -4
  30. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
  31. snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
  32. snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
  33. snowflake/ml/modeling/cluster/birch.py +51 -26
  34. snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
  35. snowflake/ml/modeling/cluster/dbscan.py +51 -26
  36. snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
  37. snowflake/ml/modeling/cluster/k_means.py +51 -26
  38. snowflake/ml/modeling/cluster/mean_shift.py +51 -26
  39. snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
  40. snowflake/ml/modeling/cluster/optics.py +51 -26
  41. snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
  42. snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
  43. snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
  44. snowflake/ml/modeling/compose/column_transformer.py +51 -26
  45. snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
  46. snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
  47. snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
  48. snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
  49. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
  50. snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
  51. snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
  52. snowflake/ml/modeling/covariance/oas.py +51 -26
  53. snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
  54. snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
  55. snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
  56. snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
  57. snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
  58. snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
  59. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
  60. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
  61. snowflake/ml/modeling/decomposition/pca.py +51 -26
  62. snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
  63. snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
  64. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
  65. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
  66. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
  67. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
  68. snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
  69. snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
  70. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
  71. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
  72. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
  73. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
  74. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
  75. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
  76. snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
  77. snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
  78. snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
  79. snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
  80. snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
  81. snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
  82. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
  83. snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
  84. snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
  85. snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
  86. snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
  87. snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
  88. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
  89. snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
  90. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
  91. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
  92. snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
  93. snowflake/ml/modeling/impute/knn_imputer.py +51 -26
  94. snowflake/ml/modeling/impute/missing_indicator.py +51 -26
  95. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
  96. snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
  97. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
  98. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
  99. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
  100. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
  101. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
  102. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
  103. snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
  104. snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
  105. snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
  106. snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
  107. snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
  108. snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
  109. snowflake/ml/modeling/linear_model/lars.py +51 -26
  110. snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
  111. snowflake/ml/modeling/linear_model/lasso.py +51 -26
  112. snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
  113. snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
  114. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
  115. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
  116. snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
  117. snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
  118. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
  119. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
  120. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
  121. snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
  122. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
  123. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
  124. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
  125. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
  126. snowflake/ml/modeling/linear_model/perceptron.py +51 -26
  127. snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
  128. snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
  129. snowflake/ml/modeling/linear_model/ridge.py +51 -26
  130. snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
  131. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
  132. snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
  133. snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
  134. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
  135. snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
  136. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
  137. snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
  138. snowflake/ml/modeling/manifold/isomap.py +51 -26
  139. snowflake/ml/modeling/manifold/mds.py +51 -26
  140. snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
  141. snowflake/ml/modeling/manifold/tsne.py +51 -26
  142. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
  143. snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
  144. snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
  145. snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
  146. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
  147. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
  148. snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
  149. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
  150. snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
  151. snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
  152. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
  153. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
  154. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
  155. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
  156. snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
  157. snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
  158. snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
  159. snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
  160. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
  161. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
  162. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
  163. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
  164. snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
  165. snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
  166. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
  167. snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
  168. snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
  169. snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
  170. snowflake/ml/modeling/svm/linear_svc.py +51 -26
  171. snowflake/ml/modeling/svm/linear_svr.py +51 -26
  172. snowflake/ml/modeling/svm/nu_svc.py +51 -26
  173. snowflake/ml/modeling/svm/nu_svr.py +51 -26
  174. snowflake/ml/modeling/svm/svc.py +51 -26
  175. snowflake/ml/modeling/svm/svr.py +51 -26
  176. snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
  177. snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
  178. snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
  179. snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
  180. snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
  181. snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
  182. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
  183. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
  184. snowflake/ml/registry/model_registry.py +74 -56
  185. snowflake/ml/version.py +1 -1
  186. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
  187. snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
  188. snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
  189. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -256,7 +258,6 @@ class AgglomerativeClustering(BaseTransformer):
256
258
  sample_weight_col: Optional[str] = None,
257
259
  ) -> None:
258
260
  super().__init__()
259
- self.id = str(uuid4()).replace("-", "_").upper()
260
261
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
261
262
 
262
263
  self._deps = list(deps)
@@ -284,6 +285,15 @@ class AgglomerativeClustering(BaseTransformer):
284
285
  self.set_drop_input_cols(drop_input_cols)
285
286
  self.set_sample_weight_col(sample_weight_col)
286
287
 
288
+ def _get_rand_id(self) -> str:
289
+ """
290
+ Generate random id to be used in sproc and stage names.
291
+
292
+ Returns:
293
+ Random id string usable in sproc, table, and stage names.
294
+ """
295
+ return str(uuid4()).replace("-", "_").upper()
296
+
287
297
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
288
298
  """
289
299
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -362,7 +372,7 @@ class AgglomerativeClustering(BaseTransformer):
362
372
  cp.dump(self._sklearn_object, local_transform_file)
363
373
 
364
374
  # Create temp stage to run fit.
365
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
375
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
366
376
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
367
377
  SqlResultValidator(
368
378
  session=session,
@@ -375,11 +385,12 @@ class AgglomerativeClustering(BaseTransformer):
375
385
  expected_value=f"Stage area {transform_stage_name} successfully created."
376
386
  ).validate()
377
387
 
378
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
388
+ # Use posixpath to construct stage paths
389
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
390
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
379
391
  local_result_file_name = get_temp_file_path()
380
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
381
392
 
382
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
393
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
383
394
  statement_params = telemetry.get_function_usage_statement_params(
384
395
  project=_PROJECT,
385
396
  subproject=_SUBPROJECT,
@@ -405,6 +416,7 @@ class AgglomerativeClustering(BaseTransformer):
405
416
  replace=True,
406
417
  session=session,
407
418
  statement_params=statement_params,
419
+ anonymous=True
408
420
  )
409
421
  def fit_wrapper_sproc(
410
422
  session: Session,
@@ -413,7 +425,8 @@ class AgglomerativeClustering(BaseTransformer):
413
425
  stage_result_file_name: str,
414
426
  input_cols: List[str],
415
427
  label_cols: List[str],
416
- sample_weight_col: Optional[str]
428
+ sample_weight_col: Optional[str],
429
+ statement_params: Dict[str, str]
417
430
  ) -> str:
418
431
  import cloudpickle as cp
419
432
  import numpy as np
@@ -480,15 +493,15 @@ class AgglomerativeClustering(BaseTransformer):
480
493
  api_calls=[Session.call],
481
494
  custom_tags=dict([("autogen", True)]),
482
495
  )
483
- sproc_export_file_name = session.call(
484
- fit_sproc_name,
496
+ sproc_export_file_name = fit_wrapper_sproc(
497
+ session,
485
498
  query,
486
499
  stage_transform_file_name,
487
500
  stage_result_file_name,
488
501
  identifier.get_unescaped_names(self.input_cols),
489
502
  identifier.get_unescaped_names(self.label_cols),
490
503
  identifier.get_unescaped_names(self.sample_weight_col),
491
- statement_params=statement_params,
504
+ statement_params,
492
505
  )
493
506
 
494
507
  if "|" in sproc_export_file_name:
@@ -498,7 +511,7 @@ class AgglomerativeClustering(BaseTransformer):
498
511
  print("\n".join(fields[1:]))
499
512
 
500
513
  session.file.get(
501
- os.path.join(stage_result_file_name, sproc_export_file_name),
514
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
502
515
  local_result_file_name,
503
516
  statement_params=statement_params
504
517
  )
@@ -544,7 +557,7 @@ class AgglomerativeClustering(BaseTransformer):
544
557
 
545
558
  # Register vectorized UDF for batch inference
546
559
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
547
- safe_id=self.id, method=inference_method)
560
+ safe_id=self._get_rand_id(), method=inference_method)
548
561
 
549
562
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
550
563
  # will try to pickle all of self which fails.
@@ -636,7 +649,7 @@ class AgglomerativeClustering(BaseTransformer):
636
649
  return transformed_pandas_df.to_dict("records")
637
650
 
638
651
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
639
- safe_id=self.id
652
+ safe_id=self._get_rand_id()
640
653
  )
641
654
 
642
655
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -801,11 +814,18 @@ class AgglomerativeClustering(BaseTransformer):
801
814
  Transformed dataset.
802
815
  """
803
816
  if isinstance(dataset, DataFrame):
817
+ expected_type_inferred = ""
818
+ # when it is classifier, infer the datatype from label columns
819
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
820
+ expected_type_inferred = convert_sp_to_sf_type(
821
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
822
+ )
823
+
804
824
  output_df = self._batch_inference(
805
825
  dataset=dataset,
806
826
  inference_method="predict",
807
827
  expected_output_cols_list=self.output_cols,
808
- expected_output_cols_type="",
828
+ expected_output_cols_type=expected_type_inferred,
809
829
  )
810
830
  elif isinstance(dataset, pd.DataFrame):
811
831
  output_df = self._sklearn_inference(
@@ -876,10 +896,10 @@ class AgglomerativeClustering(BaseTransformer):
876
896
 
877
897
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
878
898
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
879
- Returns an empty list if current object is not a classifier or not yet fitted.
899
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
880
900
  """
881
901
  if getattr(self._sklearn_object, "classes_", None) is None:
882
- return []
902
+ return [output_cols_prefix]
883
903
 
884
904
  classes = self._sklearn_object.classes_
885
905
  if isinstance(classes, numpy.ndarray):
@@ -1104,7 +1124,7 @@ class AgglomerativeClustering(BaseTransformer):
1104
1124
  cp.dump(self._sklearn_object, local_score_file)
1105
1125
 
1106
1126
  # Create temp stage to run score.
1107
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1127
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1108
1128
  session = dataset._session
1109
1129
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1110
1130
  SqlResultValidator(
@@ -1118,8 +1138,9 @@ class AgglomerativeClustering(BaseTransformer):
1118
1138
  expected_value=f"Stage area {score_stage_name} successfully created."
1119
1139
  ).validate()
1120
1140
 
1121
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1122
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1141
+ # Use posixpath to construct stage paths
1142
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1143
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1123
1144
  statement_params = telemetry.get_function_usage_statement_params(
1124
1145
  project=_PROJECT,
1125
1146
  subproject=_SUBPROJECT,
@@ -1145,6 +1166,7 @@ class AgglomerativeClustering(BaseTransformer):
1145
1166
  replace=True,
1146
1167
  session=session,
1147
1168
  statement_params=statement_params,
1169
+ anonymous=True
1148
1170
  )
1149
1171
  def score_wrapper_sproc(
1150
1172
  session: Session,
@@ -1152,7 +1174,8 @@ class AgglomerativeClustering(BaseTransformer):
1152
1174
  stage_score_file_name: str,
1153
1175
  input_cols: List[str],
1154
1176
  label_cols: List[str],
1155
- sample_weight_col: Optional[str]
1177
+ sample_weight_col: Optional[str],
1178
+ statement_params: Dict[str, str]
1156
1179
  ) -> float:
1157
1180
  import cloudpickle as cp
1158
1181
  import numpy as np
@@ -1202,14 +1225,14 @@ class AgglomerativeClustering(BaseTransformer):
1202
1225
  api_calls=[Session.call],
1203
1226
  custom_tags=dict([("autogen", True)]),
1204
1227
  )
1205
- score = session.call(
1206
- score_sproc_name,
1228
+ score = score_wrapper_sproc(
1229
+ session,
1207
1230
  query,
1208
1231
  stage_score_file_name,
1209
1232
  identifier.get_unescaped_names(self.input_cols),
1210
1233
  identifier.get_unescaped_names(self.label_cols),
1211
1234
  identifier.get_unescaped_names(self.sample_weight_col),
1212
- statement_params=statement_params,
1235
+ statement_params,
1213
1236
  )
1214
1237
 
1215
1238
  cleanup_temp_files([local_score_file_name])
@@ -1227,18 +1250,20 @@ class AgglomerativeClustering(BaseTransformer):
1227
1250
  if self._sklearn_object._estimator_type == 'classifier':
1228
1251
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1229
1252
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1230
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1253
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1254
+ ([] if self._drop_input_cols else inputs) + outputs)
1231
1255
  # For regressor, the type of predict is float64
1232
1256
  elif self._sklearn_object._estimator_type == 'regressor':
1233
1257
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1234
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1235
-
1258
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1259
+ ([] if self._drop_input_cols else inputs) + outputs)
1236
1260
  for prob_func in PROB_FUNCTIONS:
1237
1261
  if hasattr(self, prob_func):
1238
1262
  output_cols_prefix: str = f"{prob_func}_"
1239
1263
  output_column_names = self._get_output_column_names(output_cols_prefix)
1240
1264
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1241
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1265
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1266
+ ([] if self._drop_input_cols else inputs) + outputs)
1242
1267
 
1243
1268
  @property
1244
1269
  def model_signatures(self) -> Dict[str, ModelSignature]:
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -218,7 +220,6 @@ class Birch(BaseTransformer):
218
220
  sample_weight_col: Optional[str] = None,
219
221
  ) -> None:
220
222
  super().__init__()
221
- self.id = str(uuid4()).replace("-", "_").upper()
222
223
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
223
224
 
224
225
  self._deps = list(deps)
@@ -242,6 +243,15 @@ class Birch(BaseTransformer):
242
243
  self.set_drop_input_cols(drop_input_cols)
243
244
  self.set_sample_weight_col(sample_weight_col)
244
245
 
246
+ def _get_rand_id(self) -> str:
247
+ """
248
+ Generate random id to be used in sproc and stage names.
249
+
250
+ Returns:
251
+ Random id string usable in sproc, table, and stage names.
252
+ """
253
+ return str(uuid4()).replace("-", "_").upper()
254
+
245
255
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
246
256
  """
247
257
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -320,7 +330,7 @@ class Birch(BaseTransformer):
320
330
  cp.dump(self._sklearn_object, local_transform_file)
321
331
 
322
332
  # Create temp stage to run fit.
323
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
333
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
324
334
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
325
335
  SqlResultValidator(
326
336
  session=session,
@@ -333,11 +343,12 @@ class Birch(BaseTransformer):
333
343
  expected_value=f"Stage area {transform_stage_name} successfully created."
334
344
  ).validate()
335
345
 
336
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
346
+ # Use posixpath to construct stage paths
347
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
348
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
337
349
  local_result_file_name = get_temp_file_path()
338
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
339
350
 
340
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
351
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
341
352
  statement_params = telemetry.get_function_usage_statement_params(
342
353
  project=_PROJECT,
343
354
  subproject=_SUBPROJECT,
@@ -363,6 +374,7 @@ class Birch(BaseTransformer):
363
374
  replace=True,
364
375
  session=session,
365
376
  statement_params=statement_params,
377
+ anonymous=True
366
378
  )
367
379
  def fit_wrapper_sproc(
368
380
  session: Session,
@@ -371,7 +383,8 @@ class Birch(BaseTransformer):
371
383
  stage_result_file_name: str,
372
384
  input_cols: List[str],
373
385
  label_cols: List[str],
374
- sample_weight_col: Optional[str]
386
+ sample_weight_col: Optional[str],
387
+ statement_params: Dict[str, str]
375
388
  ) -> str:
376
389
  import cloudpickle as cp
377
390
  import numpy as np
@@ -438,15 +451,15 @@ class Birch(BaseTransformer):
438
451
  api_calls=[Session.call],
439
452
  custom_tags=dict([("autogen", True)]),
440
453
  )
441
- sproc_export_file_name = session.call(
442
- fit_sproc_name,
454
+ sproc_export_file_name = fit_wrapper_sproc(
455
+ session,
443
456
  query,
444
457
  stage_transform_file_name,
445
458
  stage_result_file_name,
446
459
  identifier.get_unescaped_names(self.input_cols),
447
460
  identifier.get_unescaped_names(self.label_cols),
448
461
  identifier.get_unescaped_names(self.sample_weight_col),
449
- statement_params=statement_params,
462
+ statement_params,
450
463
  )
451
464
 
452
465
  if "|" in sproc_export_file_name:
@@ -456,7 +469,7 @@ class Birch(BaseTransformer):
456
469
  print("\n".join(fields[1:]))
457
470
 
458
471
  session.file.get(
459
- os.path.join(stage_result_file_name, sproc_export_file_name),
472
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
460
473
  local_result_file_name,
461
474
  statement_params=statement_params
462
475
  )
@@ -502,7 +515,7 @@ class Birch(BaseTransformer):
502
515
 
503
516
  # Register vectorized UDF for batch inference
504
517
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
505
- safe_id=self.id, method=inference_method)
518
+ safe_id=self._get_rand_id(), method=inference_method)
506
519
 
507
520
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
508
521
  # will try to pickle all of self which fails.
@@ -594,7 +607,7 @@ class Birch(BaseTransformer):
594
607
  return transformed_pandas_df.to_dict("records")
595
608
 
596
609
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
597
- safe_id=self.id
610
+ safe_id=self._get_rand_id()
598
611
  )
599
612
 
600
613
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -761,11 +774,18 @@ class Birch(BaseTransformer):
761
774
  Transformed dataset.
762
775
  """
763
776
  if isinstance(dataset, DataFrame):
777
+ expected_type_inferred = ""
778
+ # when it is classifier, infer the datatype from label columns
779
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
780
+ expected_type_inferred = convert_sp_to_sf_type(
781
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
782
+ )
783
+
764
784
  output_df = self._batch_inference(
765
785
  dataset=dataset,
766
786
  inference_method="predict",
767
787
  expected_output_cols_list=self.output_cols,
768
- expected_output_cols_type="",
788
+ expected_output_cols_type=expected_type_inferred,
769
789
  )
770
790
  elif isinstance(dataset, pd.DataFrame):
771
791
  output_df = self._sklearn_inference(
@@ -838,10 +858,10 @@ class Birch(BaseTransformer):
838
858
 
839
859
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
840
860
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
841
- Returns an empty list if current object is not a classifier or not yet fitted.
861
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
842
862
  """
843
863
  if getattr(self._sklearn_object, "classes_", None) is None:
844
- return []
864
+ return [output_cols_prefix]
845
865
 
846
866
  classes = self._sklearn_object.classes_
847
867
  if isinstance(classes, numpy.ndarray):
@@ -1066,7 +1086,7 @@ class Birch(BaseTransformer):
1066
1086
  cp.dump(self._sklearn_object, local_score_file)
1067
1087
 
1068
1088
  # Create temp stage to run score.
1069
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1089
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1070
1090
  session = dataset._session
1071
1091
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1072
1092
  SqlResultValidator(
@@ -1080,8 +1100,9 @@ class Birch(BaseTransformer):
1080
1100
  expected_value=f"Stage area {score_stage_name} successfully created."
1081
1101
  ).validate()
1082
1102
 
1083
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1084
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1103
+ # Use posixpath to construct stage paths
1104
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1105
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1085
1106
  statement_params = telemetry.get_function_usage_statement_params(
1086
1107
  project=_PROJECT,
1087
1108
  subproject=_SUBPROJECT,
@@ -1107,6 +1128,7 @@ class Birch(BaseTransformer):
1107
1128
  replace=True,
1108
1129
  session=session,
1109
1130
  statement_params=statement_params,
1131
+ anonymous=True
1110
1132
  )
1111
1133
  def score_wrapper_sproc(
1112
1134
  session: Session,
@@ -1114,7 +1136,8 @@ class Birch(BaseTransformer):
1114
1136
  stage_score_file_name: str,
1115
1137
  input_cols: List[str],
1116
1138
  label_cols: List[str],
1117
- sample_weight_col: Optional[str]
1139
+ sample_weight_col: Optional[str],
1140
+ statement_params: Dict[str, str]
1118
1141
  ) -> float:
1119
1142
  import cloudpickle as cp
1120
1143
  import numpy as np
@@ -1164,14 +1187,14 @@ class Birch(BaseTransformer):
1164
1187
  api_calls=[Session.call],
1165
1188
  custom_tags=dict([("autogen", True)]),
1166
1189
  )
1167
- score = session.call(
1168
- score_sproc_name,
1190
+ score = score_wrapper_sproc(
1191
+ session,
1169
1192
  query,
1170
1193
  stage_score_file_name,
1171
1194
  identifier.get_unescaped_names(self.input_cols),
1172
1195
  identifier.get_unescaped_names(self.label_cols),
1173
1196
  identifier.get_unescaped_names(self.sample_weight_col),
1174
- statement_params=statement_params,
1197
+ statement_params,
1175
1198
  )
1176
1199
 
1177
1200
  cleanup_temp_files([local_score_file_name])
@@ -1189,18 +1212,20 @@ class Birch(BaseTransformer):
1189
1212
  if self._sklearn_object._estimator_type == 'classifier':
1190
1213
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1191
1214
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1192
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1215
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1216
+ ([] if self._drop_input_cols else inputs) + outputs)
1193
1217
  # For regressor, the type of predict is float64
1194
1218
  elif self._sklearn_object._estimator_type == 'regressor':
1195
1219
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1196
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1197
-
1220
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1221
+ ([] if self._drop_input_cols else inputs) + outputs)
1198
1222
  for prob_func in PROB_FUNCTIONS:
1199
1223
  if hasattr(self, prob_func):
1200
1224
  output_cols_prefix: str = f"{prob_func}_"
1201
1225
  output_column_names = self._get_output_column_names(output_cols_prefix)
1202
1226
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1203
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1227
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1228
+ ([] if self._drop_input_cols else inputs) + outputs)
1204
1229
 
1205
1230
  @property
1206
1231
  def model_signatures(self) -> Dict[str, ModelSignature]: