snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. snowflake/ml/_internal/env_utils.py +2 -1
  2. snowflake/ml/_internal/file_utils.py +29 -7
  3. snowflake/ml/_internal/telemetry.py +5 -8
  4. snowflake/ml/_internal/utils/uri.py +7 -2
  5. snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
  6. snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
  7. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
  8. snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
  9. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
  10. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
  11. snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
  12. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
  13. snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
  14. snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
  15. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
  16. snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
  17. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
  18. snowflake/ml/model/_deployer.py +14 -27
  19. snowflake/ml/model/_env.py +4 -4
  20. snowflake/ml/model/_handlers/custom.py +14 -2
  21. snowflake/ml/model/_handlers/pytorch.py +186 -0
  22. snowflake/ml/model/_handlers/sklearn.py +14 -9
  23. snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
  24. snowflake/ml/model/_handlers/torchscript.py +180 -0
  25. snowflake/ml/model/_handlers/xgboost.py +19 -9
  26. snowflake/ml/model/_model.py +3 -2
  27. snowflake/ml/model/_model_meta.py +12 -7
  28. snowflake/ml/model/model_signature.py +446 -66
  29. snowflake/ml/model/type_hints.py +23 -4
  30. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
  31. snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
  32. snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
  33. snowflake/ml/modeling/cluster/birch.py +51 -26
  34. snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
  35. snowflake/ml/modeling/cluster/dbscan.py +51 -26
  36. snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
  37. snowflake/ml/modeling/cluster/k_means.py +51 -26
  38. snowflake/ml/modeling/cluster/mean_shift.py +51 -26
  39. snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
  40. snowflake/ml/modeling/cluster/optics.py +51 -26
  41. snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
  42. snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
  43. snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
  44. snowflake/ml/modeling/compose/column_transformer.py +51 -26
  45. snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
  46. snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
  47. snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
  48. snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
  49. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
  50. snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
  51. snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
  52. snowflake/ml/modeling/covariance/oas.py +51 -26
  53. snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
  54. snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
  55. snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
  56. snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
  57. snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
  58. snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
  59. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
  60. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
  61. snowflake/ml/modeling/decomposition/pca.py +51 -26
  62. snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
  63. snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
  64. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
  65. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
  66. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
  67. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
  68. snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
  69. snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
  70. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
  71. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
  72. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
  73. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
  74. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
  75. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
  76. snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
  77. snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
  78. snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
  79. snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
  80. snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
  81. snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
  82. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
  83. snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
  84. snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
  85. snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
  86. snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
  87. snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
  88. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
  89. snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
  90. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
  91. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
  92. snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
  93. snowflake/ml/modeling/impute/knn_imputer.py +51 -26
  94. snowflake/ml/modeling/impute/missing_indicator.py +51 -26
  95. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
  96. snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
  97. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
  98. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
  99. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
  100. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
  101. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
  102. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
  103. snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
  104. snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
  105. snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
  106. snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
  107. snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
  108. snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
  109. snowflake/ml/modeling/linear_model/lars.py +51 -26
  110. snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
  111. snowflake/ml/modeling/linear_model/lasso.py +51 -26
  112. snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
  113. snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
  114. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
  115. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
  116. snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
  117. snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
  118. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
  119. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
  120. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
  121. snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
  122. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
  123. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
  124. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
  125. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
  126. snowflake/ml/modeling/linear_model/perceptron.py +51 -26
  127. snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
  128. snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
  129. snowflake/ml/modeling/linear_model/ridge.py +51 -26
  130. snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
  131. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
  132. snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
  133. snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
  134. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
  135. snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
  136. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
  137. snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
  138. snowflake/ml/modeling/manifold/isomap.py +51 -26
  139. snowflake/ml/modeling/manifold/mds.py +51 -26
  140. snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
  141. snowflake/ml/modeling/manifold/tsne.py +51 -26
  142. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
  143. snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
  144. snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
  145. snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
  146. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
  147. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
  148. snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
  149. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
  150. snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
  151. snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
  152. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
  153. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
  154. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
  155. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
  156. snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
  157. snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
  158. snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
  159. snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
  160. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
  161. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
  162. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
  163. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
  164. snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
  165. snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
  166. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
  167. snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
  168. snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
  169. snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
  170. snowflake/ml/modeling/svm/linear_svc.py +51 -26
  171. snowflake/ml/modeling/svm/linear_svr.py +51 -26
  172. snowflake/ml/modeling/svm/nu_svc.py +51 -26
  173. snowflake/ml/modeling/svm/nu_svr.py +51 -26
  174. snowflake/ml/modeling/svm/svc.py +51 -26
  175. snowflake/ml/modeling/svm/svr.py +51 -26
  176. snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
  177. snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
  178. snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
  179. snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
  180. snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
  181. snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
  182. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
  183. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
  184. snowflake/ml/registry/model_registry.py +74 -56
  185. snowflake/ml/version.py +1 -1
  186. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
  187. snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
  188. snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
  189. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -26,6 +27,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
26
27
  from snowflake.snowpark import DataFrame, Session
27
28
  from snowflake.snowpark.functions import pandas_udf, sproc
28
29
  from snowflake.snowpark.types import PandasSeries
30
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
29
31
 
30
32
  from snowflake.ml.model.model_signature import (
31
33
  DataType,
@@ -200,7 +202,6 @@ class LGBMRegressor(BaseTransformer):
200
202
  **kwargs,
201
203
  ) -> None:
202
204
  super().__init__()
203
- self.id = str(uuid4()).replace("-", "_").upper()
204
205
  deps: Set[str] = set([f'numpy=={np.__version__}', f'lightgbm=={lightgbm.__version__}', f'cloudpickle=={cp.__version__}'])
205
206
 
206
207
  self._deps = list(deps)
@@ -240,6 +241,15 @@ class LGBMRegressor(BaseTransformer):
240
241
  self.set_drop_input_cols(drop_input_cols)
241
242
  self.set_sample_weight_col(sample_weight_col)
242
243
 
244
+ def _get_rand_id(self) -> str:
245
+ """
246
+ Generate random id to be used in sproc and stage names.
247
+
248
+ Returns:
249
+ Random id string usable in sproc, table, and stage names.
250
+ """
251
+ return str(uuid4()).replace("-", "_").upper()
252
+
243
253
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
244
254
  """
245
255
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -318,7 +328,7 @@ class LGBMRegressor(BaseTransformer):
318
328
  cp.dump(self._sklearn_object, local_transform_file)
319
329
 
320
330
  # Create temp stage to run fit.
321
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
331
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
322
332
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
323
333
  SqlResultValidator(
324
334
  session=session,
@@ -331,11 +341,12 @@ class LGBMRegressor(BaseTransformer):
331
341
  expected_value=f"Stage area {transform_stage_name} successfully created."
332
342
  ).validate()
333
343
 
334
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
344
+ # Use posixpath to construct stage paths
345
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
346
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
335
347
  local_result_file_name = get_temp_file_path()
336
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
337
348
 
338
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
349
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
339
350
  statement_params = telemetry.get_function_usage_statement_params(
340
351
  project=_PROJECT,
341
352
  subproject=_SUBPROJECT,
@@ -361,6 +372,7 @@ class LGBMRegressor(BaseTransformer):
361
372
  replace=True,
362
373
  session=session,
363
374
  statement_params=statement_params,
375
+ anonymous=True
364
376
  )
365
377
  def fit_wrapper_sproc(
366
378
  session: Session,
@@ -369,7 +381,8 @@ class LGBMRegressor(BaseTransformer):
369
381
  stage_result_file_name: str,
370
382
  input_cols: List[str],
371
383
  label_cols: List[str],
372
- sample_weight_col: Optional[str]
384
+ sample_weight_col: Optional[str],
385
+ statement_params: Dict[str, str]
373
386
  ) -> str:
374
387
  import cloudpickle as cp
375
388
  import numpy as np
@@ -436,15 +449,15 @@ class LGBMRegressor(BaseTransformer):
436
449
  api_calls=[Session.call],
437
450
  custom_tags=dict([("autogen", True)]),
438
451
  )
439
- sproc_export_file_name = session.call(
440
- fit_sproc_name,
452
+ sproc_export_file_name = fit_wrapper_sproc(
453
+ session,
441
454
  query,
442
455
  stage_transform_file_name,
443
456
  stage_result_file_name,
444
457
  identifier.get_unescaped_names(self.input_cols),
445
458
  identifier.get_unescaped_names(self.label_cols),
446
459
  identifier.get_unescaped_names(self.sample_weight_col),
447
- statement_params=statement_params,
460
+ statement_params,
448
461
  )
449
462
 
450
463
  if "|" in sproc_export_file_name:
@@ -454,7 +467,7 @@ class LGBMRegressor(BaseTransformer):
454
467
  print("\n".join(fields[1:]))
455
468
 
456
469
  session.file.get(
457
- os.path.join(stage_result_file_name, sproc_export_file_name),
470
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
458
471
  local_result_file_name,
459
472
  statement_params=statement_params
460
473
  )
@@ -500,7 +513,7 @@ class LGBMRegressor(BaseTransformer):
500
513
 
501
514
  # Register vectorized UDF for batch inference
502
515
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
503
- safe_id=self.id, method=inference_method)
516
+ safe_id=self._get_rand_id(), method=inference_method)
504
517
 
505
518
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
506
519
  # will try to pickle all of self which fails.
@@ -592,7 +605,7 @@ class LGBMRegressor(BaseTransformer):
592
605
  return transformed_pandas_df.to_dict("records")
593
606
 
594
607
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
595
- safe_id=self.id
608
+ safe_id=self._get_rand_id()
596
609
  )
597
610
 
598
611
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -759,11 +772,18 @@ class LGBMRegressor(BaseTransformer):
759
772
  Transformed dataset.
760
773
  """
761
774
  if isinstance(dataset, DataFrame):
775
+ expected_type_inferred = "float"
776
+ # when it is classifier, infer the datatype from label columns
777
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
778
+ expected_type_inferred = convert_sp_to_sf_type(
779
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
780
+ )
781
+
762
782
  output_df = self._batch_inference(
763
783
  dataset=dataset,
764
784
  inference_method="predict",
765
785
  expected_output_cols_list=self.output_cols,
766
- expected_output_cols_type="float",
786
+ expected_output_cols_type=expected_type_inferred,
767
787
  )
768
788
  elif isinstance(dataset, pd.DataFrame):
769
789
  output_df = self._sklearn_inference(
@@ -834,10 +854,10 @@ class LGBMRegressor(BaseTransformer):
834
854
 
835
855
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
836
856
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
837
- Returns an empty list if current object is not a classifier or not yet fitted.
857
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
838
858
  """
839
859
  if getattr(self._sklearn_object, "classes_", None) is None:
840
- return []
860
+ return [output_cols_prefix]
841
861
 
842
862
  classes = self._sklearn_object.classes_
843
863
  if isinstance(classes, numpy.ndarray):
@@ -1062,7 +1082,7 @@ class LGBMRegressor(BaseTransformer):
1062
1082
  cp.dump(self._sklearn_object, local_score_file)
1063
1083
 
1064
1084
  # Create temp stage to run score.
1065
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1085
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1066
1086
  session = dataset._session
1067
1087
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1068
1088
  SqlResultValidator(
@@ -1076,8 +1096,9 @@ class LGBMRegressor(BaseTransformer):
1076
1096
  expected_value=f"Stage area {score_stage_name} successfully created."
1077
1097
  ).validate()
1078
1098
 
1079
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1080
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1099
+ # Use posixpath to construct stage paths
1100
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1101
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1081
1102
  statement_params = telemetry.get_function_usage_statement_params(
1082
1103
  project=_PROJECT,
1083
1104
  subproject=_SUBPROJECT,
@@ -1103,6 +1124,7 @@ class LGBMRegressor(BaseTransformer):
1103
1124
  replace=True,
1104
1125
  session=session,
1105
1126
  statement_params=statement_params,
1127
+ anonymous=True
1106
1128
  )
1107
1129
  def score_wrapper_sproc(
1108
1130
  session: Session,
@@ -1110,7 +1132,8 @@ class LGBMRegressor(BaseTransformer):
1110
1132
  stage_score_file_name: str,
1111
1133
  input_cols: List[str],
1112
1134
  label_cols: List[str],
1113
- sample_weight_col: Optional[str]
1135
+ sample_weight_col: Optional[str],
1136
+ statement_params: Dict[str, str]
1114
1137
  ) -> float:
1115
1138
  import cloudpickle as cp
1116
1139
  import numpy as np
@@ -1160,14 +1183,14 @@ class LGBMRegressor(BaseTransformer):
1160
1183
  api_calls=[Session.call],
1161
1184
  custom_tags=dict([("autogen", True)]),
1162
1185
  )
1163
- score = session.call(
1164
- score_sproc_name,
1186
+ score = score_wrapper_sproc(
1187
+ session,
1165
1188
  query,
1166
1189
  stage_score_file_name,
1167
1190
  identifier.get_unescaped_names(self.input_cols),
1168
1191
  identifier.get_unescaped_names(self.label_cols),
1169
1192
  identifier.get_unescaped_names(self.sample_weight_col),
1170
- statement_params=statement_params,
1193
+ statement_params,
1171
1194
  )
1172
1195
 
1173
1196
  cleanup_temp_files([local_score_file_name])
@@ -1185,18 +1208,20 @@ class LGBMRegressor(BaseTransformer):
1185
1208
  if self._sklearn_object._estimator_type == 'classifier':
1186
1209
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1187
1210
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1188
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1211
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1212
+ ([] if self._drop_input_cols else inputs) + outputs)
1189
1213
  # For regressor, the type of predict is float64
1190
1214
  elif self._sklearn_object._estimator_type == 'regressor':
1191
1215
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1192
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1193
-
1216
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1217
+ ([] if self._drop_input_cols else inputs) + outputs)
1194
1218
  for prob_func in PROB_FUNCTIONS:
1195
1219
  if hasattr(self, prob_func):
1196
1220
  output_cols_prefix: str = f"{prob_func}_"
1197
1221
  output_column_names = self._get_output_column_names(output_cols_prefix)
1198
1222
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1199
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1223
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1224
+ ([] if self._drop_input_cols else inputs) + outputs)
1200
1225
 
1201
1226
  @property
1202
1227
  def model_signatures(self) -> Dict[str, ModelSignature]:
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -230,7 +232,6 @@ class ARDRegression(BaseTransformer):
230
232
  sample_weight_col: Optional[str] = None,
231
233
  ) -> None:
232
234
  super().__init__()
233
- self.id = str(uuid4()).replace("-", "_").upper()
234
235
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
235
236
 
236
237
  self._deps = list(deps)
@@ -260,6 +261,15 @@ class ARDRegression(BaseTransformer):
260
261
  self.set_drop_input_cols(drop_input_cols)
261
262
  self.set_sample_weight_col(sample_weight_col)
262
263
 
264
+ def _get_rand_id(self) -> str:
265
+ """
266
+ Generate random id to be used in sproc and stage names.
267
+
268
+ Returns:
269
+ Random id string usable in sproc, table, and stage names.
270
+ """
271
+ return str(uuid4()).replace("-", "_").upper()
272
+
263
273
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
264
274
  """
265
275
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -338,7 +348,7 @@ class ARDRegression(BaseTransformer):
338
348
  cp.dump(self._sklearn_object, local_transform_file)
339
349
 
340
350
  # Create temp stage to run fit.
341
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
351
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
342
352
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
343
353
  SqlResultValidator(
344
354
  session=session,
@@ -351,11 +361,12 @@ class ARDRegression(BaseTransformer):
351
361
  expected_value=f"Stage area {transform_stage_name} successfully created."
352
362
  ).validate()
353
363
 
354
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
364
+ # Use posixpath to construct stage paths
365
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
366
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
355
367
  local_result_file_name = get_temp_file_path()
356
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
357
368
 
358
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
369
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
359
370
  statement_params = telemetry.get_function_usage_statement_params(
360
371
  project=_PROJECT,
361
372
  subproject=_SUBPROJECT,
@@ -381,6 +392,7 @@ class ARDRegression(BaseTransformer):
381
392
  replace=True,
382
393
  session=session,
383
394
  statement_params=statement_params,
395
+ anonymous=True
384
396
  )
385
397
  def fit_wrapper_sproc(
386
398
  session: Session,
@@ -389,7 +401,8 @@ class ARDRegression(BaseTransformer):
389
401
  stage_result_file_name: str,
390
402
  input_cols: List[str],
391
403
  label_cols: List[str],
392
- sample_weight_col: Optional[str]
404
+ sample_weight_col: Optional[str],
405
+ statement_params: Dict[str, str]
393
406
  ) -> str:
394
407
  import cloudpickle as cp
395
408
  import numpy as np
@@ -456,15 +469,15 @@ class ARDRegression(BaseTransformer):
456
469
  api_calls=[Session.call],
457
470
  custom_tags=dict([("autogen", True)]),
458
471
  )
459
- sproc_export_file_name = session.call(
460
- fit_sproc_name,
472
+ sproc_export_file_name = fit_wrapper_sproc(
473
+ session,
461
474
  query,
462
475
  stage_transform_file_name,
463
476
  stage_result_file_name,
464
477
  identifier.get_unescaped_names(self.input_cols),
465
478
  identifier.get_unescaped_names(self.label_cols),
466
479
  identifier.get_unescaped_names(self.sample_weight_col),
467
- statement_params=statement_params,
480
+ statement_params,
468
481
  )
469
482
 
470
483
  if "|" in sproc_export_file_name:
@@ -474,7 +487,7 @@ class ARDRegression(BaseTransformer):
474
487
  print("\n".join(fields[1:]))
475
488
 
476
489
  session.file.get(
477
- os.path.join(stage_result_file_name, sproc_export_file_name),
490
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
478
491
  local_result_file_name,
479
492
  statement_params=statement_params
480
493
  )
@@ -520,7 +533,7 @@ class ARDRegression(BaseTransformer):
520
533
 
521
534
  # Register vectorized UDF for batch inference
522
535
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
523
- safe_id=self.id, method=inference_method)
536
+ safe_id=self._get_rand_id(), method=inference_method)
524
537
 
525
538
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
526
539
  # will try to pickle all of self which fails.
@@ -612,7 +625,7 @@ class ARDRegression(BaseTransformer):
612
625
  return transformed_pandas_df.to_dict("records")
613
626
 
614
627
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
615
- safe_id=self.id
628
+ safe_id=self._get_rand_id()
616
629
  )
617
630
 
618
631
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -779,11 +792,18 @@ class ARDRegression(BaseTransformer):
779
792
  Transformed dataset.
780
793
  """
781
794
  if isinstance(dataset, DataFrame):
795
+ expected_type_inferred = "float"
796
+ # when it is classifier, infer the datatype from label columns
797
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
798
+ expected_type_inferred = convert_sp_to_sf_type(
799
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
800
+ )
801
+
782
802
  output_df = self._batch_inference(
783
803
  dataset=dataset,
784
804
  inference_method="predict",
785
805
  expected_output_cols_list=self.output_cols,
786
- expected_output_cols_type="float",
806
+ expected_output_cols_type=expected_type_inferred,
787
807
  )
788
808
  elif isinstance(dataset, pd.DataFrame):
789
809
  output_df = self._sklearn_inference(
@@ -854,10 +874,10 @@ class ARDRegression(BaseTransformer):
854
874
 
855
875
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
856
876
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
857
- Returns an empty list if current object is not a classifier or not yet fitted.
877
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
858
878
  """
859
879
  if getattr(self._sklearn_object, "classes_", None) is None:
860
- return []
880
+ return [output_cols_prefix]
861
881
 
862
882
  classes = self._sklearn_object.classes_
863
883
  if isinstance(classes, numpy.ndarray):
@@ -1082,7 +1102,7 @@ class ARDRegression(BaseTransformer):
1082
1102
  cp.dump(self._sklearn_object, local_score_file)
1083
1103
 
1084
1104
  # Create temp stage to run score.
1085
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1105
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1086
1106
  session = dataset._session
1087
1107
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1088
1108
  SqlResultValidator(
@@ -1096,8 +1116,9 @@ class ARDRegression(BaseTransformer):
1096
1116
  expected_value=f"Stage area {score_stage_name} successfully created."
1097
1117
  ).validate()
1098
1118
 
1099
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1100
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1119
+ # Use posixpath to construct stage paths
1120
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1121
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1101
1122
  statement_params = telemetry.get_function_usage_statement_params(
1102
1123
  project=_PROJECT,
1103
1124
  subproject=_SUBPROJECT,
@@ -1123,6 +1144,7 @@ class ARDRegression(BaseTransformer):
1123
1144
  replace=True,
1124
1145
  session=session,
1125
1146
  statement_params=statement_params,
1147
+ anonymous=True
1126
1148
  )
1127
1149
  def score_wrapper_sproc(
1128
1150
  session: Session,
@@ -1130,7 +1152,8 @@ class ARDRegression(BaseTransformer):
1130
1152
  stage_score_file_name: str,
1131
1153
  input_cols: List[str],
1132
1154
  label_cols: List[str],
1133
- sample_weight_col: Optional[str]
1155
+ sample_weight_col: Optional[str],
1156
+ statement_params: Dict[str, str]
1134
1157
  ) -> float:
1135
1158
  import cloudpickle as cp
1136
1159
  import numpy as np
@@ -1180,14 +1203,14 @@ class ARDRegression(BaseTransformer):
1180
1203
  api_calls=[Session.call],
1181
1204
  custom_tags=dict([("autogen", True)]),
1182
1205
  )
1183
- score = session.call(
1184
- score_sproc_name,
1206
+ score = score_wrapper_sproc(
1207
+ session,
1185
1208
  query,
1186
1209
  stage_score_file_name,
1187
1210
  identifier.get_unescaped_names(self.input_cols),
1188
1211
  identifier.get_unescaped_names(self.label_cols),
1189
1212
  identifier.get_unescaped_names(self.sample_weight_col),
1190
- statement_params=statement_params,
1213
+ statement_params,
1191
1214
  )
1192
1215
 
1193
1216
  cleanup_temp_files([local_score_file_name])
@@ -1205,18 +1228,20 @@ class ARDRegression(BaseTransformer):
1205
1228
  if self._sklearn_object._estimator_type == 'classifier':
1206
1229
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1207
1230
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1208
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1231
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1232
+ ([] if self._drop_input_cols else inputs) + outputs)
1209
1233
  # For regressor, the type of predict is float64
1210
1234
  elif self._sklearn_object._estimator_type == 'regressor':
1211
1235
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1212
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1213
-
1236
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1237
+ ([] if self._drop_input_cols else inputs) + outputs)
1214
1238
  for prob_func in PROB_FUNCTIONS:
1215
1239
  if hasattr(self, prob_func):
1216
1240
  output_cols_prefix: str = f"{prob_func}_"
1217
1241
  output_column_names = self._get_output_column_names(output_cols_prefix)
1218
1242
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1219
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1243
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1244
+ ([] if self._drop_input_cols else inputs) + outputs)
1220
1245
 
1221
1246
  @property
1222
1247
  def model_signatures(self) -> Dict[str, ModelSignature]: