snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. snowflake/ml/_internal/env_utils.py +2 -1
  2. snowflake/ml/_internal/file_utils.py +29 -7
  3. snowflake/ml/_internal/telemetry.py +5 -8
  4. snowflake/ml/_internal/utils/uri.py +7 -2
  5. snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
  6. snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
  7. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
  8. snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
  9. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
  10. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
  11. snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
  12. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
  13. snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
  14. snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
  15. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
  16. snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
  17. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
  18. snowflake/ml/model/_deployer.py +14 -27
  19. snowflake/ml/model/_env.py +4 -4
  20. snowflake/ml/model/_handlers/custom.py +14 -2
  21. snowflake/ml/model/_handlers/pytorch.py +186 -0
  22. snowflake/ml/model/_handlers/sklearn.py +14 -9
  23. snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
  24. snowflake/ml/model/_handlers/torchscript.py +180 -0
  25. snowflake/ml/model/_handlers/xgboost.py +19 -9
  26. snowflake/ml/model/_model.py +3 -2
  27. snowflake/ml/model/_model_meta.py +12 -7
  28. snowflake/ml/model/model_signature.py +446 -66
  29. snowflake/ml/model/type_hints.py +23 -4
  30. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
  31. snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
  32. snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
  33. snowflake/ml/modeling/cluster/birch.py +51 -26
  34. snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
  35. snowflake/ml/modeling/cluster/dbscan.py +51 -26
  36. snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
  37. snowflake/ml/modeling/cluster/k_means.py +51 -26
  38. snowflake/ml/modeling/cluster/mean_shift.py +51 -26
  39. snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
  40. snowflake/ml/modeling/cluster/optics.py +51 -26
  41. snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
  42. snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
  43. snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
  44. snowflake/ml/modeling/compose/column_transformer.py +51 -26
  45. snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
  46. snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
  47. snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
  48. snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
  49. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
  50. snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
  51. snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
  52. snowflake/ml/modeling/covariance/oas.py +51 -26
  53. snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
  54. snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
  55. snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
  56. snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
  57. snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
  58. snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
  59. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
  60. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
  61. snowflake/ml/modeling/decomposition/pca.py +51 -26
  62. snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
  63. snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
  64. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
  65. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
  66. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
  67. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
  68. snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
  69. snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
  70. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
  71. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
  72. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
  73. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
  74. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
  75. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
  76. snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
  77. snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
  78. snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
  79. snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
  80. snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
  81. snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
  82. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
  83. snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
  84. snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
  85. snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
  86. snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
  87. snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
  88. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
  89. snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
  90. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
  91. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
  92. snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
  93. snowflake/ml/modeling/impute/knn_imputer.py +51 -26
  94. snowflake/ml/modeling/impute/missing_indicator.py +51 -26
  95. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
  96. snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
  97. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
  98. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
  99. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
  100. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
  101. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
  102. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
  103. snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
  104. snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
  105. snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
  106. snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
  107. snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
  108. snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
  109. snowflake/ml/modeling/linear_model/lars.py +51 -26
  110. snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
  111. snowflake/ml/modeling/linear_model/lasso.py +51 -26
  112. snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
  113. snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
  114. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
  115. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
  116. snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
  117. snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
  118. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
  119. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
  120. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
  121. snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
  122. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
  123. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
  124. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
  125. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
  126. snowflake/ml/modeling/linear_model/perceptron.py +51 -26
  127. snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
  128. snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
  129. snowflake/ml/modeling/linear_model/ridge.py +51 -26
  130. snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
  131. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
  132. snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
  133. snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
  134. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
  135. snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
  136. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
  137. snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
  138. snowflake/ml/modeling/manifold/isomap.py +51 -26
  139. snowflake/ml/modeling/manifold/mds.py +51 -26
  140. snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
  141. snowflake/ml/modeling/manifold/tsne.py +51 -26
  142. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
  143. snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
  144. snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
  145. snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
  146. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
  147. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
  148. snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
  149. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
  150. snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
  151. snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
  152. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
  153. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
  154. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
  155. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
  156. snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
  157. snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
  158. snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
  159. snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
  160. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
  161. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
  162. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
  163. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
  164. snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
  165. snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
  166. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
  167. snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
  168. snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
  169. snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
  170. snowflake/ml/modeling/svm/linear_svc.py +51 -26
  171. snowflake/ml/modeling/svm/linear_svr.py +51 -26
  172. snowflake/ml/modeling/svm/nu_svc.py +51 -26
  173. snowflake/ml/modeling/svm/nu_svr.py +51 -26
  174. snowflake/ml/modeling/svm/svc.py +51 -26
  175. snowflake/ml/modeling/svm/svr.py +51 -26
  176. snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
  177. snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
  178. snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
  179. snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
  180. snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
  181. snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
  182. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
  183. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
  184. snowflake/ml/registry/model_registry.py +74 -56
  185. snowflake/ml/version.py +1 -1
  186. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
  187. snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
  188. snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
  189. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -324,7 +326,6 @@ class RandomForestRegressor(BaseTransformer):
324
326
  sample_weight_col: Optional[str] = None,
325
327
  ) -> None:
326
328
  super().__init__()
327
- self.id = str(uuid4()).replace("-", "_").upper()
328
329
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
329
330
 
330
331
  self._deps = list(deps)
@@ -360,6 +361,15 @@ class RandomForestRegressor(BaseTransformer):
360
361
  self.set_drop_input_cols(drop_input_cols)
361
362
  self.set_sample_weight_col(sample_weight_col)
362
363
 
364
+ def _get_rand_id(self) -> str:
365
+ """
366
+ Generate random id to be used in sproc and stage names.
367
+
368
+ Returns:
369
+ Random id string usable in sproc, table, and stage names.
370
+ """
371
+ return str(uuid4()).replace("-", "_").upper()
372
+
363
373
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
364
374
  """
365
375
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -438,7 +448,7 @@ class RandomForestRegressor(BaseTransformer):
438
448
  cp.dump(self._sklearn_object, local_transform_file)
439
449
 
440
450
  # Create temp stage to run fit.
441
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
451
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
442
452
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
443
453
  SqlResultValidator(
444
454
  session=session,
@@ -451,11 +461,12 @@ class RandomForestRegressor(BaseTransformer):
451
461
  expected_value=f"Stage area {transform_stage_name} successfully created."
452
462
  ).validate()
453
463
 
454
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
464
+ # Use posixpath to construct stage paths
465
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
466
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
455
467
  local_result_file_name = get_temp_file_path()
456
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
457
468
 
458
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
469
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
459
470
  statement_params = telemetry.get_function_usage_statement_params(
460
471
  project=_PROJECT,
461
472
  subproject=_SUBPROJECT,
@@ -481,6 +492,7 @@ class RandomForestRegressor(BaseTransformer):
481
492
  replace=True,
482
493
  session=session,
483
494
  statement_params=statement_params,
495
+ anonymous=True
484
496
  )
485
497
  def fit_wrapper_sproc(
486
498
  session: Session,
@@ -489,7 +501,8 @@ class RandomForestRegressor(BaseTransformer):
489
501
  stage_result_file_name: str,
490
502
  input_cols: List[str],
491
503
  label_cols: List[str],
492
- sample_weight_col: Optional[str]
504
+ sample_weight_col: Optional[str],
505
+ statement_params: Dict[str, str]
493
506
  ) -> str:
494
507
  import cloudpickle as cp
495
508
  import numpy as np
@@ -556,15 +569,15 @@ class RandomForestRegressor(BaseTransformer):
556
569
  api_calls=[Session.call],
557
570
  custom_tags=dict([("autogen", True)]),
558
571
  )
559
- sproc_export_file_name = session.call(
560
- fit_sproc_name,
572
+ sproc_export_file_name = fit_wrapper_sproc(
573
+ session,
561
574
  query,
562
575
  stage_transform_file_name,
563
576
  stage_result_file_name,
564
577
  identifier.get_unescaped_names(self.input_cols),
565
578
  identifier.get_unescaped_names(self.label_cols),
566
579
  identifier.get_unescaped_names(self.sample_weight_col),
567
- statement_params=statement_params,
580
+ statement_params,
568
581
  )
569
582
 
570
583
  if "|" in sproc_export_file_name:
@@ -574,7 +587,7 @@ class RandomForestRegressor(BaseTransformer):
574
587
  print("\n".join(fields[1:]))
575
588
 
576
589
  session.file.get(
577
- os.path.join(stage_result_file_name, sproc_export_file_name),
590
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
578
591
  local_result_file_name,
579
592
  statement_params=statement_params
580
593
  )
@@ -620,7 +633,7 @@ class RandomForestRegressor(BaseTransformer):
620
633
 
621
634
  # Register vectorized UDF for batch inference
622
635
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
623
- safe_id=self.id, method=inference_method)
636
+ safe_id=self._get_rand_id(), method=inference_method)
624
637
 
625
638
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
626
639
  # will try to pickle all of self which fails.
@@ -712,7 +725,7 @@ class RandomForestRegressor(BaseTransformer):
712
725
  return transformed_pandas_df.to_dict("records")
713
726
 
714
727
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
715
- safe_id=self.id
728
+ safe_id=self._get_rand_id()
716
729
  )
717
730
 
718
731
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -879,11 +892,18 @@ class RandomForestRegressor(BaseTransformer):
879
892
  Transformed dataset.
880
893
  """
881
894
  if isinstance(dataset, DataFrame):
895
+ expected_type_inferred = "float"
896
+ # when it is classifier, infer the datatype from label columns
897
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
898
+ expected_type_inferred = convert_sp_to_sf_type(
899
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
900
+ )
901
+
882
902
  output_df = self._batch_inference(
883
903
  dataset=dataset,
884
904
  inference_method="predict",
885
905
  expected_output_cols_list=self.output_cols,
886
- expected_output_cols_type="float",
906
+ expected_output_cols_type=expected_type_inferred,
887
907
  )
888
908
  elif isinstance(dataset, pd.DataFrame):
889
909
  output_df = self._sklearn_inference(
@@ -954,10 +974,10 @@ class RandomForestRegressor(BaseTransformer):
954
974
 
955
975
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
956
976
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
957
- Returns an empty list if current object is not a classifier or not yet fitted.
977
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
958
978
  """
959
979
  if getattr(self._sklearn_object, "classes_", None) is None:
960
- return []
980
+ return [output_cols_prefix]
961
981
 
962
982
  classes = self._sklearn_object.classes_
963
983
  if isinstance(classes, numpy.ndarray):
@@ -1182,7 +1202,7 @@ class RandomForestRegressor(BaseTransformer):
1182
1202
  cp.dump(self._sklearn_object, local_score_file)
1183
1203
 
1184
1204
  # Create temp stage to run score.
1185
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1205
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1186
1206
  session = dataset._session
1187
1207
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1188
1208
  SqlResultValidator(
@@ -1196,8 +1216,9 @@ class RandomForestRegressor(BaseTransformer):
1196
1216
  expected_value=f"Stage area {score_stage_name} successfully created."
1197
1217
  ).validate()
1198
1218
 
1199
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1200
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1219
+ # Use posixpath to construct stage paths
1220
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1221
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1201
1222
  statement_params = telemetry.get_function_usage_statement_params(
1202
1223
  project=_PROJECT,
1203
1224
  subproject=_SUBPROJECT,
@@ -1223,6 +1244,7 @@ class RandomForestRegressor(BaseTransformer):
1223
1244
  replace=True,
1224
1245
  session=session,
1225
1246
  statement_params=statement_params,
1247
+ anonymous=True
1226
1248
  )
1227
1249
  def score_wrapper_sproc(
1228
1250
  session: Session,
@@ -1230,7 +1252,8 @@ class RandomForestRegressor(BaseTransformer):
1230
1252
  stage_score_file_name: str,
1231
1253
  input_cols: List[str],
1232
1254
  label_cols: List[str],
1233
- sample_weight_col: Optional[str]
1255
+ sample_weight_col: Optional[str],
1256
+ statement_params: Dict[str, str]
1234
1257
  ) -> float:
1235
1258
  import cloudpickle as cp
1236
1259
  import numpy as np
@@ -1280,14 +1303,14 @@ class RandomForestRegressor(BaseTransformer):
1280
1303
  api_calls=[Session.call],
1281
1304
  custom_tags=dict([("autogen", True)]),
1282
1305
  )
1283
- score = session.call(
1284
- score_sproc_name,
1306
+ score = score_wrapper_sproc(
1307
+ session,
1285
1308
  query,
1286
1309
  stage_score_file_name,
1287
1310
  identifier.get_unescaped_names(self.input_cols),
1288
1311
  identifier.get_unescaped_names(self.label_cols),
1289
1312
  identifier.get_unescaped_names(self.sample_weight_col),
1290
- statement_params=statement_params,
1313
+ statement_params,
1291
1314
  )
1292
1315
 
1293
1316
  cleanup_temp_files([local_score_file_name])
@@ -1305,18 +1328,20 @@ class RandomForestRegressor(BaseTransformer):
1305
1328
  if self._sklearn_object._estimator_type == 'classifier':
1306
1329
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1307
1330
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1308
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1331
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1332
+ ([] if self._drop_input_cols else inputs) + outputs)
1309
1333
  # For regressor, the type of predict is float64
1310
1334
  elif self._sklearn_object._estimator_type == 'regressor':
1311
1335
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1312
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1313
-
1336
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1337
+ ([] if self._drop_input_cols else inputs) + outputs)
1314
1338
  for prob_func in PROB_FUNCTIONS:
1315
1339
  if hasattr(self, prob_func):
1316
1340
  output_cols_prefix: str = f"{prob_func}_"
1317
1341
  output_column_names = self._get_output_column_names(output_cols_prefix)
1318
1342
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1319
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1343
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1344
+ ([] if self._drop_input_cols else inputs) + outputs)
1320
1345
 
1321
1346
  @property
1322
1347
  def model_signatures(self) -> Dict[str, ModelSignature]:
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -235,7 +237,6 @@ class StackingRegressor(BaseTransformer):
235
237
  sample_weight_col: Optional[str] = None,
236
238
  ) -> None:
237
239
  super().__init__()
238
- self.id = str(uuid4()).replace("-", "_").upper()
239
240
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
240
241
  deps = deps | _gather_dependencies(estimators)
241
242
  deps = deps | _gather_dependencies(final_estimator)
@@ -262,6 +263,15 @@ class StackingRegressor(BaseTransformer):
262
263
  self.set_drop_input_cols(drop_input_cols)
263
264
  self.set_sample_weight_col(sample_weight_col)
264
265
 
266
+ def _get_rand_id(self) -> str:
267
+ """
268
+ Generate random id to be used in sproc and stage names.
269
+
270
+ Returns:
271
+ Random id string usable in sproc, table, and stage names.
272
+ """
273
+ return str(uuid4()).replace("-", "_").upper()
274
+
265
275
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
266
276
  """
267
277
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -340,7 +350,7 @@ class StackingRegressor(BaseTransformer):
340
350
  cp.dump(self._sklearn_object, local_transform_file)
341
351
 
342
352
  # Create temp stage to run fit.
343
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
353
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
344
354
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
345
355
  SqlResultValidator(
346
356
  session=session,
@@ -353,11 +363,12 @@ class StackingRegressor(BaseTransformer):
353
363
  expected_value=f"Stage area {transform_stage_name} successfully created."
354
364
  ).validate()
355
365
 
356
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
366
+ # Use posixpath to construct stage paths
367
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
368
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
357
369
  local_result_file_name = get_temp_file_path()
358
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
359
370
 
360
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
371
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
361
372
  statement_params = telemetry.get_function_usage_statement_params(
362
373
  project=_PROJECT,
363
374
  subproject=_SUBPROJECT,
@@ -383,6 +394,7 @@ class StackingRegressor(BaseTransformer):
383
394
  replace=True,
384
395
  session=session,
385
396
  statement_params=statement_params,
397
+ anonymous=True
386
398
  )
387
399
  def fit_wrapper_sproc(
388
400
  session: Session,
@@ -391,7 +403,8 @@ class StackingRegressor(BaseTransformer):
391
403
  stage_result_file_name: str,
392
404
  input_cols: List[str],
393
405
  label_cols: List[str],
394
- sample_weight_col: Optional[str]
406
+ sample_weight_col: Optional[str],
407
+ statement_params: Dict[str, str]
395
408
  ) -> str:
396
409
  import cloudpickle as cp
397
410
  import numpy as np
@@ -458,15 +471,15 @@ class StackingRegressor(BaseTransformer):
458
471
  api_calls=[Session.call],
459
472
  custom_tags=dict([("autogen", True)]),
460
473
  )
461
- sproc_export_file_name = session.call(
462
- fit_sproc_name,
474
+ sproc_export_file_name = fit_wrapper_sproc(
475
+ session,
463
476
  query,
464
477
  stage_transform_file_name,
465
478
  stage_result_file_name,
466
479
  identifier.get_unescaped_names(self.input_cols),
467
480
  identifier.get_unescaped_names(self.label_cols),
468
481
  identifier.get_unescaped_names(self.sample_weight_col),
469
- statement_params=statement_params,
482
+ statement_params,
470
483
  )
471
484
 
472
485
  if "|" in sproc_export_file_name:
@@ -476,7 +489,7 @@ class StackingRegressor(BaseTransformer):
476
489
  print("\n".join(fields[1:]))
477
490
 
478
491
  session.file.get(
479
- os.path.join(stage_result_file_name, sproc_export_file_name),
492
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
480
493
  local_result_file_name,
481
494
  statement_params=statement_params
482
495
  )
@@ -522,7 +535,7 @@ class StackingRegressor(BaseTransformer):
522
535
 
523
536
  # Register vectorized UDF for batch inference
524
537
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
525
- safe_id=self.id, method=inference_method)
538
+ safe_id=self._get_rand_id(), method=inference_method)
526
539
 
527
540
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
528
541
  # will try to pickle all of self which fails.
@@ -614,7 +627,7 @@ class StackingRegressor(BaseTransformer):
614
627
  return transformed_pandas_df.to_dict("records")
615
628
 
616
629
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
617
- safe_id=self.id
630
+ safe_id=self._get_rand_id()
618
631
  )
619
632
 
620
633
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -781,11 +794,18 @@ class StackingRegressor(BaseTransformer):
781
794
  Transformed dataset.
782
795
  """
783
796
  if isinstance(dataset, DataFrame):
797
+ expected_type_inferred = "float"
798
+ # when it is classifier, infer the datatype from label columns
799
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
800
+ expected_type_inferred = convert_sp_to_sf_type(
801
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
802
+ )
803
+
784
804
  output_df = self._batch_inference(
785
805
  dataset=dataset,
786
806
  inference_method="predict",
787
807
  expected_output_cols_list=self.output_cols,
788
- expected_output_cols_type="float",
808
+ expected_output_cols_type=expected_type_inferred,
789
809
  )
790
810
  elif isinstance(dataset, pd.DataFrame):
791
811
  output_df = self._sklearn_inference(
@@ -858,10 +878,10 @@ class StackingRegressor(BaseTransformer):
858
878
 
859
879
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
860
880
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
861
- Returns an empty list if current object is not a classifier or not yet fitted.
881
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
862
882
  """
863
883
  if getattr(self._sklearn_object, "classes_", None) is None:
864
- return []
884
+ return [output_cols_prefix]
865
885
 
866
886
  classes = self._sklearn_object.classes_
867
887
  if isinstance(classes, numpy.ndarray):
@@ -1086,7 +1106,7 @@ class StackingRegressor(BaseTransformer):
1086
1106
  cp.dump(self._sklearn_object, local_score_file)
1087
1107
 
1088
1108
  # Create temp stage to run score.
1089
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1109
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1090
1110
  session = dataset._session
1091
1111
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1092
1112
  SqlResultValidator(
@@ -1100,8 +1120,9 @@ class StackingRegressor(BaseTransformer):
1100
1120
  expected_value=f"Stage area {score_stage_name} successfully created."
1101
1121
  ).validate()
1102
1122
 
1103
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1104
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1123
+ # Use posixpath to construct stage paths
1124
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1125
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1105
1126
  statement_params = telemetry.get_function_usage_statement_params(
1106
1127
  project=_PROJECT,
1107
1128
  subproject=_SUBPROJECT,
@@ -1127,6 +1148,7 @@ class StackingRegressor(BaseTransformer):
1127
1148
  replace=True,
1128
1149
  session=session,
1129
1150
  statement_params=statement_params,
1151
+ anonymous=True
1130
1152
  )
1131
1153
  def score_wrapper_sproc(
1132
1154
  session: Session,
@@ -1134,7 +1156,8 @@ class StackingRegressor(BaseTransformer):
1134
1156
  stage_score_file_name: str,
1135
1157
  input_cols: List[str],
1136
1158
  label_cols: List[str],
1137
- sample_weight_col: Optional[str]
1159
+ sample_weight_col: Optional[str],
1160
+ statement_params: Dict[str, str]
1138
1161
  ) -> float:
1139
1162
  import cloudpickle as cp
1140
1163
  import numpy as np
@@ -1184,14 +1207,14 @@ class StackingRegressor(BaseTransformer):
1184
1207
  api_calls=[Session.call],
1185
1208
  custom_tags=dict([("autogen", True)]),
1186
1209
  )
1187
- score = session.call(
1188
- score_sproc_name,
1210
+ score = score_wrapper_sproc(
1211
+ session,
1189
1212
  query,
1190
1213
  stage_score_file_name,
1191
1214
  identifier.get_unescaped_names(self.input_cols),
1192
1215
  identifier.get_unescaped_names(self.label_cols),
1193
1216
  identifier.get_unescaped_names(self.sample_weight_col),
1194
- statement_params=statement_params,
1217
+ statement_params,
1195
1218
  )
1196
1219
 
1197
1220
  cleanup_temp_files([local_score_file_name])
@@ -1209,18 +1232,20 @@ class StackingRegressor(BaseTransformer):
1209
1232
  if self._sklearn_object._estimator_type == 'classifier':
1210
1233
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1211
1234
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1212
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1235
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1236
+ ([] if self._drop_input_cols else inputs) + outputs)
1213
1237
  # For regressor, the type of predict is float64
1214
1238
  elif self._sklearn_object._estimator_type == 'regressor':
1215
1239
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1216
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1217
-
1240
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1241
+ ([] if self._drop_input_cols else inputs) + outputs)
1218
1242
  for prob_func in PROB_FUNCTIONS:
1219
1243
  if hasattr(self, prob_func):
1220
1244
  output_cols_prefix: str = f"{prob_func}_"
1221
1245
  output_column_names = self._get_output_column_names(output_cols_prefix)
1222
1246
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1223
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1247
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1248
+ ([] if self._drop_input_cols else inputs) + outputs)
1224
1249
 
1225
1250
  @property
1226
1251
  def model_signatures(self) -> Dict[str, ModelSignature]: