snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. snowflake/ml/_internal/env_utils.py +2 -1
  2. snowflake/ml/_internal/file_utils.py +29 -7
  3. snowflake/ml/_internal/telemetry.py +5 -8
  4. snowflake/ml/_internal/utils/uri.py +7 -2
  5. snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
  6. snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
  7. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
  8. snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
  9. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
  10. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
  11. snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
  12. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
  13. snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
  14. snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
  15. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
  16. snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
  17. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
  18. snowflake/ml/model/_deployer.py +14 -27
  19. snowflake/ml/model/_env.py +4 -4
  20. snowflake/ml/model/_handlers/custom.py +14 -2
  21. snowflake/ml/model/_handlers/pytorch.py +186 -0
  22. snowflake/ml/model/_handlers/sklearn.py +14 -9
  23. snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
  24. snowflake/ml/model/_handlers/torchscript.py +180 -0
  25. snowflake/ml/model/_handlers/xgboost.py +19 -9
  26. snowflake/ml/model/_model.py +3 -2
  27. snowflake/ml/model/_model_meta.py +12 -7
  28. snowflake/ml/model/model_signature.py +446 -66
  29. snowflake/ml/model/type_hints.py +23 -4
  30. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
  31. snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
  32. snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
  33. snowflake/ml/modeling/cluster/birch.py +51 -26
  34. snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
  35. snowflake/ml/modeling/cluster/dbscan.py +51 -26
  36. snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
  37. snowflake/ml/modeling/cluster/k_means.py +51 -26
  38. snowflake/ml/modeling/cluster/mean_shift.py +51 -26
  39. snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
  40. snowflake/ml/modeling/cluster/optics.py +51 -26
  41. snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
  42. snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
  43. snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
  44. snowflake/ml/modeling/compose/column_transformer.py +51 -26
  45. snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
  46. snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
  47. snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
  48. snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
  49. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
  50. snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
  51. snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
  52. snowflake/ml/modeling/covariance/oas.py +51 -26
  53. snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
  54. snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
  55. snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
  56. snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
  57. snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
  58. snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
  59. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
  60. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
  61. snowflake/ml/modeling/decomposition/pca.py +51 -26
  62. snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
  63. snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
  64. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
  65. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
  66. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
  67. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
  68. snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
  69. snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
  70. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
  71. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
  72. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
  73. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
  74. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
  75. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
  76. snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
  77. snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
  78. snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
  79. snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
  80. snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
  81. snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
  82. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
  83. snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
  84. snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
  85. snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
  86. snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
  87. snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
  88. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
  89. snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
  90. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
  91. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
  92. snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
  93. snowflake/ml/modeling/impute/knn_imputer.py +51 -26
  94. snowflake/ml/modeling/impute/missing_indicator.py +51 -26
  95. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
  96. snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
  97. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
  98. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
  99. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
  100. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
  101. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
  102. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
  103. snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
  104. snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
  105. snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
  106. snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
  107. snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
  108. snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
  109. snowflake/ml/modeling/linear_model/lars.py +51 -26
  110. snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
  111. snowflake/ml/modeling/linear_model/lasso.py +51 -26
  112. snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
  113. snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
  114. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
  115. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
  116. snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
  117. snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
  118. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
  119. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
  120. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
  121. snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
  122. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
  123. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
  124. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
  125. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
  126. snowflake/ml/modeling/linear_model/perceptron.py +51 -26
  127. snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
  128. snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
  129. snowflake/ml/modeling/linear_model/ridge.py +51 -26
  130. snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
  131. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
  132. snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
  133. snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
  134. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
  135. snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
  136. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
  137. snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
  138. snowflake/ml/modeling/manifold/isomap.py +51 -26
  139. snowflake/ml/modeling/manifold/mds.py +51 -26
  140. snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
  141. snowflake/ml/modeling/manifold/tsne.py +51 -26
  142. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
  143. snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
  144. snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
  145. snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
  146. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
  147. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
  148. snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
  149. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
  150. snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
  151. snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
  152. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
  153. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
  154. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
  155. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
  156. snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
  157. snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
  158. snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
  159. snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
  160. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
  161. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
  162. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
  163. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
  164. snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
  165. snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
  166. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
  167. snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
  168. snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
  169. snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
  170. snowflake/ml/modeling/svm/linear_svc.py +51 -26
  171. snowflake/ml/modeling/svm/linear_svr.py +51 -26
  172. snowflake/ml/modeling/svm/nu_svc.py +51 -26
  173. snowflake/ml/modeling/svm/nu_svr.py +51 -26
  174. snowflake/ml/modeling/svm/svc.py +51 -26
  175. snowflake/ml/modeling/svm/svr.py +51 -26
  176. snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
  177. snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
  178. snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
  179. snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
  180. snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
  181. snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
  182. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
  183. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
  184. snowflake/ml/registry/model_registry.py +74 -56
  185. snowflake/ml/version.py +1 -1
  186. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
  187. snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
  188. snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
  189. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -250,7 +252,6 @@ class BaggingClassifier(BaseTransformer):
250
252
  sample_weight_col: Optional[str] = None,
251
253
  ) -> None:
252
254
  super().__init__()
253
- self.id = str(uuid4()).replace("-", "_").upper()
254
255
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
255
256
  deps = deps | _gather_dependencies(estimator)
256
257
  deps = deps | _gather_dependencies(base_estimator)
@@ -283,6 +284,15 @@ class BaggingClassifier(BaseTransformer):
283
284
  self.set_drop_input_cols(drop_input_cols)
284
285
  self.set_sample_weight_col(sample_weight_col)
285
286
 
287
+ def _get_rand_id(self) -> str:
288
+ """
289
+ Generate random id to be used in sproc and stage names.
290
+
291
+ Returns:
292
+ Random id string usable in sproc, table, and stage names.
293
+ """
294
+ return str(uuid4()).replace("-", "_").upper()
295
+
286
296
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
287
297
  """
288
298
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -361,7 +371,7 @@ class BaggingClassifier(BaseTransformer):
361
371
  cp.dump(self._sklearn_object, local_transform_file)
362
372
 
363
373
  # Create temp stage to run fit.
364
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
374
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
365
375
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
366
376
  SqlResultValidator(
367
377
  session=session,
@@ -374,11 +384,12 @@ class BaggingClassifier(BaseTransformer):
374
384
  expected_value=f"Stage area {transform_stage_name} successfully created."
375
385
  ).validate()
376
386
 
377
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
387
+ # Use posixpath to construct stage paths
388
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
389
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
378
390
  local_result_file_name = get_temp_file_path()
379
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
380
391
 
381
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
392
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
382
393
  statement_params = telemetry.get_function_usage_statement_params(
383
394
  project=_PROJECT,
384
395
  subproject=_SUBPROJECT,
@@ -404,6 +415,7 @@ class BaggingClassifier(BaseTransformer):
404
415
  replace=True,
405
416
  session=session,
406
417
  statement_params=statement_params,
418
+ anonymous=True
407
419
  )
408
420
  def fit_wrapper_sproc(
409
421
  session: Session,
@@ -412,7 +424,8 @@ class BaggingClassifier(BaseTransformer):
412
424
  stage_result_file_name: str,
413
425
  input_cols: List[str],
414
426
  label_cols: List[str],
415
- sample_weight_col: Optional[str]
427
+ sample_weight_col: Optional[str],
428
+ statement_params: Dict[str, str]
416
429
  ) -> str:
417
430
  import cloudpickle as cp
418
431
  import numpy as np
@@ -479,15 +492,15 @@ class BaggingClassifier(BaseTransformer):
479
492
  api_calls=[Session.call],
480
493
  custom_tags=dict([("autogen", True)]),
481
494
  )
482
- sproc_export_file_name = session.call(
483
- fit_sproc_name,
495
+ sproc_export_file_name = fit_wrapper_sproc(
496
+ session,
484
497
  query,
485
498
  stage_transform_file_name,
486
499
  stage_result_file_name,
487
500
  identifier.get_unescaped_names(self.input_cols),
488
501
  identifier.get_unescaped_names(self.label_cols),
489
502
  identifier.get_unescaped_names(self.sample_weight_col),
490
- statement_params=statement_params,
503
+ statement_params,
491
504
  )
492
505
 
493
506
  if "|" in sproc_export_file_name:
@@ -497,7 +510,7 @@ class BaggingClassifier(BaseTransformer):
497
510
  print("\n".join(fields[1:]))
498
511
 
499
512
  session.file.get(
500
- os.path.join(stage_result_file_name, sproc_export_file_name),
513
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
501
514
  local_result_file_name,
502
515
  statement_params=statement_params
503
516
  )
@@ -543,7 +556,7 @@ class BaggingClassifier(BaseTransformer):
543
556
 
544
557
  # Register vectorized UDF for batch inference
545
558
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
546
- safe_id=self.id, method=inference_method)
559
+ safe_id=self._get_rand_id(), method=inference_method)
547
560
 
548
561
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
549
562
  # will try to pickle all of self which fails.
@@ -635,7 +648,7 @@ class BaggingClassifier(BaseTransformer):
635
648
  return transformed_pandas_df.to_dict("records")
636
649
 
637
650
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
638
- safe_id=self.id
651
+ safe_id=self._get_rand_id()
639
652
  )
640
653
 
641
654
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -802,11 +815,18 @@ class BaggingClassifier(BaseTransformer):
802
815
  Transformed dataset.
803
816
  """
804
817
  if isinstance(dataset, DataFrame):
818
+ expected_type_inferred = ""
819
+ # when it is classifier, infer the datatype from label columns
820
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
821
+ expected_type_inferred = convert_sp_to_sf_type(
822
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
823
+ )
824
+
805
825
  output_df = self._batch_inference(
806
826
  dataset=dataset,
807
827
  inference_method="predict",
808
828
  expected_output_cols_list=self.output_cols,
809
- expected_output_cols_type="",
829
+ expected_output_cols_type=expected_type_inferred,
810
830
  )
811
831
  elif isinstance(dataset, pd.DataFrame):
812
832
  output_df = self._sklearn_inference(
@@ -877,10 +897,10 @@ class BaggingClassifier(BaseTransformer):
877
897
 
878
898
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
879
899
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
880
- Returns an empty list if current object is not a classifier or not yet fitted.
900
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
881
901
  """
882
902
  if getattr(self._sklearn_object, "classes_", None) is None:
883
- return []
903
+ return [output_cols_prefix]
884
904
 
885
905
  classes = self._sklearn_object.classes_
886
906
  if isinstance(classes, numpy.ndarray):
@@ -1111,7 +1131,7 @@ class BaggingClassifier(BaseTransformer):
1111
1131
  cp.dump(self._sklearn_object, local_score_file)
1112
1132
 
1113
1133
  # Create temp stage to run score.
1114
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1134
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1115
1135
  session = dataset._session
1116
1136
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1117
1137
  SqlResultValidator(
@@ -1125,8 +1145,9 @@ class BaggingClassifier(BaseTransformer):
1125
1145
  expected_value=f"Stage area {score_stage_name} successfully created."
1126
1146
  ).validate()
1127
1147
 
1128
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1129
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1148
+ # Use posixpath to construct stage paths
1149
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1150
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1130
1151
  statement_params = telemetry.get_function_usage_statement_params(
1131
1152
  project=_PROJECT,
1132
1153
  subproject=_SUBPROJECT,
@@ -1152,6 +1173,7 @@ class BaggingClassifier(BaseTransformer):
1152
1173
  replace=True,
1153
1174
  session=session,
1154
1175
  statement_params=statement_params,
1176
+ anonymous=True
1155
1177
  )
1156
1178
  def score_wrapper_sproc(
1157
1179
  session: Session,
@@ -1159,7 +1181,8 @@ class BaggingClassifier(BaseTransformer):
1159
1181
  stage_score_file_name: str,
1160
1182
  input_cols: List[str],
1161
1183
  label_cols: List[str],
1162
- sample_weight_col: Optional[str]
1184
+ sample_weight_col: Optional[str],
1185
+ statement_params: Dict[str, str]
1163
1186
  ) -> float:
1164
1187
  import cloudpickle as cp
1165
1188
  import numpy as np
@@ -1209,14 +1232,14 @@ class BaggingClassifier(BaseTransformer):
1209
1232
  api_calls=[Session.call],
1210
1233
  custom_tags=dict([("autogen", True)]),
1211
1234
  )
1212
- score = session.call(
1213
- score_sproc_name,
1235
+ score = score_wrapper_sproc(
1236
+ session,
1214
1237
  query,
1215
1238
  stage_score_file_name,
1216
1239
  identifier.get_unescaped_names(self.input_cols),
1217
1240
  identifier.get_unescaped_names(self.label_cols),
1218
1241
  identifier.get_unescaped_names(self.sample_weight_col),
1219
- statement_params=statement_params,
1242
+ statement_params,
1220
1243
  )
1221
1244
 
1222
1245
  cleanup_temp_files([local_score_file_name])
@@ -1234,18 +1257,20 @@ class BaggingClassifier(BaseTransformer):
1234
1257
  if self._sklearn_object._estimator_type == 'classifier':
1235
1258
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1236
1259
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1237
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1260
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1261
+ ([] if self._drop_input_cols else inputs) + outputs)
1238
1262
  # For regressor, the type of predict is float64
1239
1263
  elif self._sklearn_object._estimator_type == 'regressor':
1240
1264
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1241
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1242
-
1265
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1266
+ ([] if self._drop_input_cols else inputs) + outputs)
1243
1267
  for prob_func in PROB_FUNCTIONS:
1244
1268
  if hasattr(self, prob_func):
1245
1269
  output_cols_prefix: str = f"{prob_func}_"
1246
1270
  output_column_names = self._get_output_column_names(output_cols_prefix)
1247
1271
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1248
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1272
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1273
+ ([] if self._drop_input_cols else inputs) + outputs)
1249
1274
 
1250
1275
  @property
1251
1276
  def model_signatures(self) -> Dict[str, ModelSignature]:
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -250,7 +252,6 @@ class BaggingRegressor(BaseTransformer):
250
252
  sample_weight_col: Optional[str] = None,
251
253
  ) -> None:
252
254
  super().__init__()
253
- self.id = str(uuid4()).replace("-", "_").upper()
254
255
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
255
256
  deps = deps | _gather_dependencies(estimator)
256
257
  deps = deps | _gather_dependencies(base_estimator)
@@ -283,6 +284,15 @@ class BaggingRegressor(BaseTransformer):
283
284
  self.set_drop_input_cols(drop_input_cols)
284
285
  self.set_sample_weight_col(sample_weight_col)
285
286
 
287
+ def _get_rand_id(self) -> str:
288
+ """
289
+ Generate random id to be used in sproc and stage names.
290
+
291
+ Returns:
292
+ Random id string usable in sproc, table, and stage names.
293
+ """
294
+ return str(uuid4()).replace("-", "_").upper()
295
+
286
296
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
287
297
  """
288
298
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -361,7 +371,7 @@ class BaggingRegressor(BaseTransformer):
361
371
  cp.dump(self._sklearn_object, local_transform_file)
362
372
 
363
373
  # Create temp stage to run fit.
364
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
374
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
365
375
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
366
376
  SqlResultValidator(
367
377
  session=session,
@@ -374,11 +384,12 @@ class BaggingRegressor(BaseTransformer):
374
384
  expected_value=f"Stage area {transform_stage_name} successfully created."
375
385
  ).validate()
376
386
 
377
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
387
+ # Use posixpath to construct stage paths
388
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
389
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
378
390
  local_result_file_name = get_temp_file_path()
379
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
380
391
 
381
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
392
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
382
393
  statement_params = telemetry.get_function_usage_statement_params(
383
394
  project=_PROJECT,
384
395
  subproject=_SUBPROJECT,
@@ -404,6 +415,7 @@ class BaggingRegressor(BaseTransformer):
404
415
  replace=True,
405
416
  session=session,
406
417
  statement_params=statement_params,
418
+ anonymous=True
407
419
  )
408
420
  def fit_wrapper_sproc(
409
421
  session: Session,
@@ -412,7 +424,8 @@ class BaggingRegressor(BaseTransformer):
412
424
  stage_result_file_name: str,
413
425
  input_cols: List[str],
414
426
  label_cols: List[str],
415
- sample_weight_col: Optional[str]
427
+ sample_weight_col: Optional[str],
428
+ statement_params: Dict[str, str]
416
429
  ) -> str:
417
430
  import cloudpickle as cp
418
431
  import numpy as np
@@ -479,15 +492,15 @@ class BaggingRegressor(BaseTransformer):
479
492
  api_calls=[Session.call],
480
493
  custom_tags=dict([("autogen", True)]),
481
494
  )
482
- sproc_export_file_name = session.call(
483
- fit_sproc_name,
495
+ sproc_export_file_name = fit_wrapper_sproc(
496
+ session,
484
497
  query,
485
498
  stage_transform_file_name,
486
499
  stage_result_file_name,
487
500
  identifier.get_unescaped_names(self.input_cols),
488
501
  identifier.get_unescaped_names(self.label_cols),
489
502
  identifier.get_unescaped_names(self.sample_weight_col),
490
- statement_params=statement_params,
503
+ statement_params,
491
504
  )
492
505
 
493
506
  if "|" in sproc_export_file_name:
@@ -497,7 +510,7 @@ class BaggingRegressor(BaseTransformer):
497
510
  print("\n".join(fields[1:]))
498
511
 
499
512
  session.file.get(
500
- os.path.join(stage_result_file_name, sproc_export_file_name),
513
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
501
514
  local_result_file_name,
502
515
  statement_params=statement_params
503
516
  )
@@ -543,7 +556,7 @@ class BaggingRegressor(BaseTransformer):
543
556
 
544
557
  # Register vectorized UDF for batch inference
545
558
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
546
- safe_id=self.id, method=inference_method)
559
+ safe_id=self._get_rand_id(), method=inference_method)
547
560
 
548
561
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
549
562
  # will try to pickle all of self which fails.
@@ -635,7 +648,7 @@ class BaggingRegressor(BaseTransformer):
635
648
  return transformed_pandas_df.to_dict("records")
636
649
 
637
650
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
638
- safe_id=self.id
651
+ safe_id=self._get_rand_id()
639
652
  )
640
653
 
641
654
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -802,11 +815,18 @@ class BaggingRegressor(BaseTransformer):
802
815
  Transformed dataset.
803
816
  """
804
817
  if isinstance(dataset, DataFrame):
818
+ expected_type_inferred = "float"
819
+ # when it is classifier, infer the datatype from label columns
820
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
821
+ expected_type_inferred = convert_sp_to_sf_type(
822
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
823
+ )
824
+
805
825
  output_df = self._batch_inference(
806
826
  dataset=dataset,
807
827
  inference_method="predict",
808
828
  expected_output_cols_list=self.output_cols,
809
- expected_output_cols_type="float",
829
+ expected_output_cols_type=expected_type_inferred,
810
830
  )
811
831
  elif isinstance(dataset, pd.DataFrame):
812
832
  output_df = self._sklearn_inference(
@@ -877,10 +897,10 @@ class BaggingRegressor(BaseTransformer):
877
897
 
878
898
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
879
899
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
880
- Returns an empty list if current object is not a classifier or not yet fitted.
900
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
881
901
  """
882
902
  if getattr(self._sklearn_object, "classes_", None) is None:
883
- return []
903
+ return [output_cols_prefix]
884
904
 
885
905
  classes = self._sklearn_object.classes_
886
906
  if isinstance(classes, numpy.ndarray):
@@ -1105,7 +1125,7 @@ class BaggingRegressor(BaseTransformer):
1105
1125
  cp.dump(self._sklearn_object, local_score_file)
1106
1126
 
1107
1127
  # Create temp stage to run score.
1108
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1128
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1109
1129
  session = dataset._session
1110
1130
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1111
1131
  SqlResultValidator(
@@ -1119,8 +1139,9 @@ class BaggingRegressor(BaseTransformer):
1119
1139
  expected_value=f"Stage area {score_stage_name} successfully created."
1120
1140
  ).validate()
1121
1141
 
1122
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1123
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1142
+ # Use posixpath to construct stage paths
1143
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1144
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1124
1145
  statement_params = telemetry.get_function_usage_statement_params(
1125
1146
  project=_PROJECT,
1126
1147
  subproject=_SUBPROJECT,
@@ -1146,6 +1167,7 @@ class BaggingRegressor(BaseTransformer):
1146
1167
  replace=True,
1147
1168
  session=session,
1148
1169
  statement_params=statement_params,
1170
+ anonymous=True
1149
1171
  )
1150
1172
  def score_wrapper_sproc(
1151
1173
  session: Session,
@@ -1153,7 +1175,8 @@ class BaggingRegressor(BaseTransformer):
1153
1175
  stage_score_file_name: str,
1154
1176
  input_cols: List[str],
1155
1177
  label_cols: List[str],
1156
- sample_weight_col: Optional[str]
1178
+ sample_weight_col: Optional[str],
1179
+ statement_params: Dict[str, str]
1157
1180
  ) -> float:
1158
1181
  import cloudpickle as cp
1159
1182
  import numpy as np
@@ -1203,14 +1226,14 @@ class BaggingRegressor(BaseTransformer):
1203
1226
  api_calls=[Session.call],
1204
1227
  custom_tags=dict([("autogen", True)]),
1205
1228
  )
1206
- score = session.call(
1207
- score_sproc_name,
1229
+ score = score_wrapper_sproc(
1230
+ session,
1208
1231
  query,
1209
1232
  stage_score_file_name,
1210
1233
  identifier.get_unescaped_names(self.input_cols),
1211
1234
  identifier.get_unescaped_names(self.label_cols),
1212
1235
  identifier.get_unescaped_names(self.sample_weight_col),
1213
- statement_params=statement_params,
1236
+ statement_params,
1214
1237
  )
1215
1238
 
1216
1239
  cleanup_temp_files([local_score_file_name])
@@ -1228,18 +1251,20 @@ class BaggingRegressor(BaseTransformer):
1228
1251
  if self._sklearn_object._estimator_type == 'classifier':
1229
1252
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1230
1253
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1231
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1254
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1255
+ ([] if self._drop_input_cols else inputs) + outputs)
1232
1256
  # For regressor, the type of predict is float64
1233
1257
  elif self._sklearn_object._estimator_type == 'regressor':
1234
1258
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1235
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1236
-
1259
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1260
+ ([] if self._drop_input_cols else inputs) + outputs)
1237
1261
  for prob_func in PROB_FUNCTIONS:
1238
1262
  if hasattr(self, prob_func):
1239
1263
  output_cols_prefix: str = f"{prob_func}_"
1240
1264
  output_column_names = self._get_output_column_names(output_cols_prefix)
1241
1265
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1242
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1266
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1267
+ ([] if self._drop_input_cols else inputs) + outputs)
1243
1268
 
1244
1269
  @property
1245
1270
  def model_signatures(self) -> Dict[str, ModelSignature]: