snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. snowflake/ml/_internal/env_utils.py +2 -1
  2. snowflake/ml/_internal/file_utils.py +29 -7
  3. snowflake/ml/_internal/telemetry.py +5 -8
  4. snowflake/ml/_internal/utils/uri.py +7 -2
  5. snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
  6. snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
  7. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
  8. snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
  9. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
  10. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
  11. snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
  12. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
  13. snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
  14. snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
  15. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
  16. snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
  17. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
  18. snowflake/ml/model/_deployer.py +14 -27
  19. snowflake/ml/model/_env.py +4 -4
  20. snowflake/ml/model/_handlers/custom.py +14 -2
  21. snowflake/ml/model/_handlers/pytorch.py +186 -0
  22. snowflake/ml/model/_handlers/sklearn.py +14 -9
  23. snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
  24. snowflake/ml/model/_handlers/torchscript.py +180 -0
  25. snowflake/ml/model/_handlers/xgboost.py +19 -9
  26. snowflake/ml/model/_model.py +3 -2
  27. snowflake/ml/model/_model_meta.py +12 -7
  28. snowflake/ml/model/model_signature.py +446 -66
  29. snowflake/ml/model/type_hints.py +23 -4
  30. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
  31. snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
  32. snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
  33. snowflake/ml/modeling/cluster/birch.py +51 -26
  34. snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
  35. snowflake/ml/modeling/cluster/dbscan.py +51 -26
  36. snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
  37. snowflake/ml/modeling/cluster/k_means.py +51 -26
  38. snowflake/ml/modeling/cluster/mean_shift.py +51 -26
  39. snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
  40. snowflake/ml/modeling/cluster/optics.py +51 -26
  41. snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
  42. snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
  43. snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
  44. snowflake/ml/modeling/compose/column_transformer.py +51 -26
  45. snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
  46. snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
  47. snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
  48. snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
  49. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
  50. snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
  51. snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
  52. snowflake/ml/modeling/covariance/oas.py +51 -26
  53. snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
  54. snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
  55. snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
  56. snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
  57. snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
  58. snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
  59. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
  60. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
  61. snowflake/ml/modeling/decomposition/pca.py +51 -26
  62. snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
  63. snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
  64. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
  65. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
  66. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
  67. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
  68. snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
  69. snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
  70. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
  71. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
  72. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
  73. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
  74. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
  75. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
  76. snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
  77. snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
  78. snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
  79. snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
  80. snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
  81. snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
  82. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
  83. snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
  84. snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
  85. snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
  86. snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
  87. snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
  88. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
  89. snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
  90. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
  91. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
  92. snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
  93. snowflake/ml/modeling/impute/knn_imputer.py +51 -26
  94. snowflake/ml/modeling/impute/missing_indicator.py +51 -26
  95. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
  96. snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
  97. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
  98. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
  99. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
  100. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
  101. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
  102. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
  103. snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
  104. snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
  105. snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
  106. snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
  107. snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
  108. snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
  109. snowflake/ml/modeling/linear_model/lars.py +51 -26
  110. snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
  111. snowflake/ml/modeling/linear_model/lasso.py +51 -26
  112. snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
  113. snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
  114. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
  115. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
  116. snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
  117. snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
  118. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
  119. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
  120. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
  121. snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
  122. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
  123. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
  124. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
  125. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
  126. snowflake/ml/modeling/linear_model/perceptron.py +51 -26
  127. snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
  128. snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
  129. snowflake/ml/modeling/linear_model/ridge.py +51 -26
  130. snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
  131. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
  132. snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
  133. snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
  134. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
  135. snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
  136. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
  137. snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
  138. snowflake/ml/modeling/manifold/isomap.py +51 -26
  139. snowflake/ml/modeling/manifold/mds.py +51 -26
  140. snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
  141. snowflake/ml/modeling/manifold/tsne.py +51 -26
  142. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
  143. snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
  144. snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
  145. snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
  146. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
  147. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
  148. snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
  149. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
  150. snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
  151. snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
  152. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
  153. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
  154. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
  155. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
  156. snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
  157. snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
  158. snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
  159. snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
  160. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
  161. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
  162. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
  163. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
  164. snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
  165. snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
  166. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
  167. snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
  168. snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
  169. snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
  170. snowflake/ml/modeling/svm/linear_svc.py +51 -26
  171. snowflake/ml/modeling/svm/linear_svr.py +51 -26
  172. snowflake/ml/modeling/svm/nu_svc.py +51 -26
  173. snowflake/ml/modeling/svm/nu_svr.py +51 -26
  174. snowflake/ml/modeling/svm/svc.py +51 -26
  175. snowflake/ml/modeling/svm/svr.py +51 -26
  176. snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
  177. snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
  178. snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
  179. snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
  180. snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
  181. snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
  182. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
  183. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
  184. snowflake/ml/registry/model_registry.py +74 -56
  185. snowflake/ml/version.py +1 -1
  186. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
  187. snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
  188. snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
  189. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -224,7 +226,6 @@ class AdaBoostClassifier(BaseTransformer):
224
226
  sample_weight_col: Optional[str] = None,
225
227
  ) -> None:
226
228
  super().__init__()
227
- self.id = str(uuid4()).replace("-", "_").upper()
228
229
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
229
230
  deps = deps | _gather_dependencies(estimator)
230
231
  deps = deps | _gather_dependencies(base_estimator)
@@ -251,6 +252,15 @@ class AdaBoostClassifier(BaseTransformer):
251
252
  self.set_drop_input_cols(drop_input_cols)
252
253
  self.set_sample_weight_col(sample_weight_col)
253
254
 
255
+ def _get_rand_id(self) -> str:
256
+ """
257
+ Generate random id to be used in sproc and stage names.
258
+
259
+ Returns:
260
+ Random id string usable in sproc, table, and stage names.
261
+ """
262
+ return str(uuid4()).replace("-", "_").upper()
263
+
254
264
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
255
265
  """
256
266
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -329,7 +339,7 @@ class AdaBoostClassifier(BaseTransformer):
329
339
  cp.dump(self._sklearn_object, local_transform_file)
330
340
 
331
341
  # Create temp stage to run fit.
332
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
342
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
333
343
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
334
344
  SqlResultValidator(
335
345
  session=session,
@@ -342,11 +352,12 @@ class AdaBoostClassifier(BaseTransformer):
342
352
  expected_value=f"Stage area {transform_stage_name} successfully created."
343
353
  ).validate()
344
354
 
345
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
355
+ # Use posixpath to construct stage paths
356
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
357
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
346
358
  local_result_file_name = get_temp_file_path()
347
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
348
359
 
349
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
360
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
350
361
  statement_params = telemetry.get_function_usage_statement_params(
351
362
  project=_PROJECT,
352
363
  subproject=_SUBPROJECT,
@@ -372,6 +383,7 @@ class AdaBoostClassifier(BaseTransformer):
372
383
  replace=True,
373
384
  session=session,
374
385
  statement_params=statement_params,
386
+ anonymous=True
375
387
  )
376
388
  def fit_wrapper_sproc(
377
389
  session: Session,
@@ -380,7 +392,8 @@ class AdaBoostClassifier(BaseTransformer):
380
392
  stage_result_file_name: str,
381
393
  input_cols: List[str],
382
394
  label_cols: List[str],
383
- sample_weight_col: Optional[str]
395
+ sample_weight_col: Optional[str],
396
+ statement_params: Dict[str, str]
384
397
  ) -> str:
385
398
  import cloudpickle as cp
386
399
  import numpy as np
@@ -447,15 +460,15 @@ class AdaBoostClassifier(BaseTransformer):
447
460
  api_calls=[Session.call],
448
461
  custom_tags=dict([("autogen", True)]),
449
462
  )
450
- sproc_export_file_name = session.call(
451
- fit_sproc_name,
463
+ sproc_export_file_name = fit_wrapper_sproc(
464
+ session,
452
465
  query,
453
466
  stage_transform_file_name,
454
467
  stage_result_file_name,
455
468
  identifier.get_unescaped_names(self.input_cols),
456
469
  identifier.get_unescaped_names(self.label_cols),
457
470
  identifier.get_unescaped_names(self.sample_weight_col),
458
- statement_params=statement_params,
471
+ statement_params,
459
472
  )
460
473
 
461
474
  if "|" in sproc_export_file_name:
@@ -465,7 +478,7 @@ class AdaBoostClassifier(BaseTransformer):
465
478
  print("\n".join(fields[1:]))
466
479
 
467
480
  session.file.get(
468
- os.path.join(stage_result_file_name, sproc_export_file_name),
481
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
469
482
  local_result_file_name,
470
483
  statement_params=statement_params
471
484
  )
@@ -511,7 +524,7 @@ class AdaBoostClassifier(BaseTransformer):
511
524
 
512
525
  # Register vectorized UDF for batch inference
513
526
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
514
- safe_id=self.id, method=inference_method)
527
+ safe_id=self._get_rand_id(), method=inference_method)
515
528
 
516
529
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
517
530
  # will try to pickle all of self which fails.
@@ -603,7 +616,7 @@ class AdaBoostClassifier(BaseTransformer):
603
616
  return transformed_pandas_df.to_dict("records")
604
617
 
605
618
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
606
- safe_id=self.id
619
+ safe_id=self._get_rand_id()
607
620
  )
608
621
 
609
622
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -770,11 +783,18 @@ class AdaBoostClassifier(BaseTransformer):
770
783
  Transformed dataset.
771
784
  """
772
785
  if isinstance(dataset, DataFrame):
786
+ expected_type_inferred = ""
787
+ # when it is classifier, infer the datatype from label columns
788
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
789
+ expected_type_inferred = convert_sp_to_sf_type(
790
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
791
+ )
792
+
773
793
  output_df = self._batch_inference(
774
794
  dataset=dataset,
775
795
  inference_method="predict",
776
796
  expected_output_cols_list=self.output_cols,
777
- expected_output_cols_type="",
797
+ expected_output_cols_type=expected_type_inferred,
778
798
  )
779
799
  elif isinstance(dataset, pd.DataFrame):
780
800
  output_df = self._sklearn_inference(
@@ -845,10 +865,10 @@ class AdaBoostClassifier(BaseTransformer):
845
865
 
846
866
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
847
867
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
848
- Returns an empty list if current object is not a classifier or not yet fitted.
868
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
849
869
  """
850
870
  if getattr(self._sklearn_object, "classes_", None) is None:
851
- return []
871
+ return [output_cols_prefix]
852
872
 
853
873
  classes = self._sklearn_object.classes_
854
874
  if isinstance(classes, numpy.ndarray):
@@ -1079,7 +1099,7 @@ class AdaBoostClassifier(BaseTransformer):
1079
1099
  cp.dump(self._sklearn_object, local_score_file)
1080
1100
 
1081
1101
  # Create temp stage to run score.
1082
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1102
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1083
1103
  session = dataset._session
1084
1104
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1085
1105
  SqlResultValidator(
@@ -1093,8 +1113,9 @@ class AdaBoostClassifier(BaseTransformer):
1093
1113
  expected_value=f"Stage area {score_stage_name} successfully created."
1094
1114
  ).validate()
1095
1115
 
1096
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1097
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1116
+ # Use posixpath to construct stage paths
1117
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1118
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1098
1119
  statement_params = telemetry.get_function_usage_statement_params(
1099
1120
  project=_PROJECT,
1100
1121
  subproject=_SUBPROJECT,
@@ -1120,6 +1141,7 @@ class AdaBoostClassifier(BaseTransformer):
1120
1141
  replace=True,
1121
1142
  session=session,
1122
1143
  statement_params=statement_params,
1144
+ anonymous=True
1123
1145
  )
1124
1146
  def score_wrapper_sproc(
1125
1147
  session: Session,
@@ -1127,7 +1149,8 @@ class AdaBoostClassifier(BaseTransformer):
1127
1149
  stage_score_file_name: str,
1128
1150
  input_cols: List[str],
1129
1151
  label_cols: List[str],
1130
- sample_weight_col: Optional[str]
1152
+ sample_weight_col: Optional[str],
1153
+ statement_params: Dict[str, str]
1131
1154
  ) -> float:
1132
1155
  import cloudpickle as cp
1133
1156
  import numpy as np
@@ -1177,14 +1200,14 @@ class AdaBoostClassifier(BaseTransformer):
1177
1200
  api_calls=[Session.call],
1178
1201
  custom_tags=dict([("autogen", True)]),
1179
1202
  )
1180
- score = session.call(
1181
- score_sproc_name,
1203
+ score = score_wrapper_sproc(
1204
+ session,
1182
1205
  query,
1183
1206
  stage_score_file_name,
1184
1207
  identifier.get_unescaped_names(self.input_cols),
1185
1208
  identifier.get_unescaped_names(self.label_cols),
1186
1209
  identifier.get_unescaped_names(self.sample_weight_col),
1187
- statement_params=statement_params,
1210
+ statement_params,
1188
1211
  )
1189
1212
 
1190
1213
  cleanup_temp_files([local_score_file_name])
@@ -1202,18 +1225,20 @@ class AdaBoostClassifier(BaseTransformer):
1202
1225
  if self._sklearn_object._estimator_type == 'classifier':
1203
1226
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1204
1227
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1205
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1228
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1229
+ ([] if self._drop_input_cols else inputs) + outputs)
1206
1230
  # For regressor, the type of predict is float64
1207
1231
  elif self._sklearn_object._estimator_type == 'regressor':
1208
1232
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1209
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1210
-
1233
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1234
+ ([] if self._drop_input_cols else inputs) + outputs)
1211
1235
  for prob_func in PROB_FUNCTIONS:
1212
1236
  if hasattr(self, prob_func):
1213
1237
  output_cols_prefix: str = f"{prob_func}_"
1214
1238
  output_column_names = self._get_output_column_names(output_cols_prefix)
1215
1239
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1216
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1240
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1241
+ ([] if self._drop_input_cols else inputs) + outputs)
1217
1242
 
1218
1243
  @property
1219
1244
  def model_signatures(self) -> Dict[str, ModelSignature]:
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -221,7 +223,6 @@ class AdaBoostRegressor(BaseTransformer):
221
223
  sample_weight_col: Optional[str] = None,
222
224
  ) -> None:
223
225
  super().__init__()
224
- self.id = str(uuid4()).replace("-", "_").upper()
225
226
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
226
227
  deps = deps | _gather_dependencies(estimator)
227
228
  deps = deps | _gather_dependencies(base_estimator)
@@ -248,6 +249,15 @@ class AdaBoostRegressor(BaseTransformer):
248
249
  self.set_drop_input_cols(drop_input_cols)
249
250
  self.set_sample_weight_col(sample_weight_col)
250
251
 
252
+ def _get_rand_id(self) -> str:
253
+ """
254
+ Generate random id to be used in sproc and stage names.
255
+
256
+ Returns:
257
+ Random id string usable in sproc, table, and stage names.
258
+ """
259
+ return str(uuid4()).replace("-", "_").upper()
260
+
251
261
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
252
262
  """
253
263
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -326,7 +336,7 @@ class AdaBoostRegressor(BaseTransformer):
326
336
  cp.dump(self._sklearn_object, local_transform_file)
327
337
 
328
338
  # Create temp stage to run fit.
329
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
339
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
330
340
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
331
341
  SqlResultValidator(
332
342
  session=session,
@@ -339,11 +349,12 @@ class AdaBoostRegressor(BaseTransformer):
339
349
  expected_value=f"Stage area {transform_stage_name} successfully created."
340
350
  ).validate()
341
351
 
342
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
352
+ # Use posixpath to construct stage paths
353
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
354
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
343
355
  local_result_file_name = get_temp_file_path()
344
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
345
356
 
346
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
357
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
347
358
  statement_params = telemetry.get_function_usage_statement_params(
348
359
  project=_PROJECT,
349
360
  subproject=_SUBPROJECT,
@@ -369,6 +380,7 @@ class AdaBoostRegressor(BaseTransformer):
369
380
  replace=True,
370
381
  session=session,
371
382
  statement_params=statement_params,
383
+ anonymous=True
372
384
  )
373
385
  def fit_wrapper_sproc(
374
386
  session: Session,
@@ -377,7 +389,8 @@ class AdaBoostRegressor(BaseTransformer):
377
389
  stage_result_file_name: str,
378
390
  input_cols: List[str],
379
391
  label_cols: List[str],
380
- sample_weight_col: Optional[str]
392
+ sample_weight_col: Optional[str],
393
+ statement_params: Dict[str, str]
381
394
  ) -> str:
382
395
  import cloudpickle as cp
383
396
  import numpy as np
@@ -444,15 +457,15 @@ class AdaBoostRegressor(BaseTransformer):
444
457
  api_calls=[Session.call],
445
458
  custom_tags=dict([("autogen", True)]),
446
459
  )
447
- sproc_export_file_name = session.call(
448
- fit_sproc_name,
460
+ sproc_export_file_name = fit_wrapper_sproc(
461
+ session,
449
462
  query,
450
463
  stage_transform_file_name,
451
464
  stage_result_file_name,
452
465
  identifier.get_unescaped_names(self.input_cols),
453
466
  identifier.get_unescaped_names(self.label_cols),
454
467
  identifier.get_unescaped_names(self.sample_weight_col),
455
- statement_params=statement_params,
468
+ statement_params,
456
469
  )
457
470
 
458
471
  if "|" in sproc_export_file_name:
@@ -462,7 +475,7 @@ class AdaBoostRegressor(BaseTransformer):
462
475
  print("\n".join(fields[1:]))
463
476
 
464
477
  session.file.get(
465
- os.path.join(stage_result_file_name, sproc_export_file_name),
478
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
466
479
  local_result_file_name,
467
480
  statement_params=statement_params
468
481
  )
@@ -508,7 +521,7 @@ class AdaBoostRegressor(BaseTransformer):
508
521
 
509
522
  # Register vectorized UDF for batch inference
510
523
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
511
- safe_id=self.id, method=inference_method)
524
+ safe_id=self._get_rand_id(), method=inference_method)
512
525
 
513
526
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
514
527
  # will try to pickle all of self which fails.
@@ -600,7 +613,7 @@ class AdaBoostRegressor(BaseTransformer):
600
613
  return transformed_pandas_df.to_dict("records")
601
614
 
602
615
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
603
- safe_id=self.id
616
+ safe_id=self._get_rand_id()
604
617
  )
605
618
 
606
619
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -767,11 +780,18 @@ class AdaBoostRegressor(BaseTransformer):
767
780
  Transformed dataset.
768
781
  """
769
782
  if isinstance(dataset, DataFrame):
783
+ expected_type_inferred = "float"
784
+ # when it is classifier, infer the datatype from label columns
785
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
786
+ expected_type_inferred = convert_sp_to_sf_type(
787
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
788
+ )
789
+
770
790
  output_df = self._batch_inference(
771
791
  dataset=dataset,
772
792
  inference_method="predict",
773
793
  expected_output_cols_list=self.output_cols,
774
- expected_output_cols_type="float",
794
+ expected_output_cols_type=expected_type_inferred,
775
795
  )
776
796
  elif isinstance(dataset, pd.DataFrame):
777
797
  output_df = self._sklearn_inference(
@@ -842,10 +862,10 @@ class AdaBoostRegressor(BaseTransformer):
842
862
 
843
863
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
844
864
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
845
- Returns an empty list if current object is not a classifier or not yet fitted.
865
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
846
866
  """
847
867
  if getattr(self._sklearn_object, "classes_", None) is None:
848
- return []
868
+ return [output_cols_prefix]
849
869
 
850
870
  classes = self._sklearn_object.classes_
851
871
  if isinstance(classes, numpy.ndarray):
@@ -1070,7 +1090,7 @@ class AdaBoostRegressor(BaseTransformer):
1070
1090
  cp.dump(self._sklearn_object, local_score_file)
1071
1091
 
1072
1092
  # Create temp stage to run score.
1073
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1093
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1074
1094
  session = dataset._session
1075
1095
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1076
1096
  SqlResultValidator(
@@ -1084,8 +1104,9 @@ class AdaBoostRegressor(BaseTransformer):
1084
1104
  expected_value=f"Stage area {score_stage_name} successfully created."
1085
1105
  ).validate()
1086
1106
 
1087
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1088
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1107
+ # Use posixpath to construct stage paths
1108
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1109
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1089
1110
  statement_params = telemetry.get_function_usage_statement_params(
1090
1111
  project=_PROJECT,
1091
1112
  subproject=_SUBPROJECT,
@@ -1111,6 +1132,7 @@ class AdaBoostRegressor(BaseTransformer):
1111
1132
  replace=True,
1112
1133
  session=session,
1113
1134
  statement_params=statement_params,
1135
+ anonymous=True
1114
1136
  )
1115
1137
  def score_wrapper_sproc(
1116
1138
  session: Session,
@@ -1118,7 +1140,8 @@ class AdaBoostRegressor(BaseTransformer):
1118
1140
  stage_score_file_name: str,
1119
1141
  input_cols: List[str],
1120
1142
  label_cols: List[str],
1121
- sample_weight_col: Optional[str]
1143
+ sample_weight_col: Optional[str],
1144
+ statement_params: Dict[str, str]
1122
1145
  ) -> float:
1123
1146
  import cloudpickle as cp
1124
1147
  import numpy as np
@@ -1168,14 +1191,14 @@ class AdaBoostRegressor(BaseTransformer):
1168
1191
  api_calls=[Session.call],
1169
1192
  custom_tags=dict([("autogen", True)]),
1170
1193
  )
1171
- score = session.call(
1172
- score_sproc_name,
1194
+ score = score_wrapper_sproc(
1195
+ session,
1173
1196
  query,
1174
1197
  stage_score_file_name,
1175
1198
  identifier.get_unescaped_names(self.input_cols),
1176
1199
  identifier.get_unescaped_names(self.label_cols),
1177
1200
  identifier.get_unescaped_names(self.sample_weight_col),
1178
- statement_params=statement_params,
1201
+ statement_params,
1179
1202
  )
1180
1203
 
1181
1204
  cleanup_temp_files([local_score_file_name])
@@ -1193,18 +1216,20 @@ class AdaBoostRegressor(BaseTransformer):
1193
1216
  if self._sklearn_object._estimator_type == 'classifier':
1194
1217
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1195
1218
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1196
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1219
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1220
+ ([] if self._drop_input_cols else inputs) + outputs)
1197
1221
  # For regressor, the type of predict is float64
1198
1222
  elif self._sklearn_object._estimator_type == 'regressor':
1199
1223
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1200
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1201
-
1224
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1225
+ ([] if self._drop_input_cols else inputs) + outputs)
1202
1226
  for prob_func in PROB_FUNCTIONS:
1203
1227
  if hasattr(self, prob_func):
1204
1228
  output_cols_prefix: str = f"{prob_func}_"
1205
1229
  output_column_names = self._get_output_column_names(output_cols_prefix)
1206
1230
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1207
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1231
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1232
+ ([] if self._drop_input_cols else inputs) + outputs)
1208
1233
 
1209
1234
  @property
1210
1235
  def model_signatures(self) -> Dict[str, ModelSignature]: