snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. snowflake/ml/_internal/env_utils.py +2 -1
  2. snowflake/ml/_internal/file_utils.py +29 -7
  3. snowflake/ml/_internal/telemetry.py +5 -8
  4. snowflake/ml/_internal/utils/uri.py +7 -2
  5. snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
  6. snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
  7. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
  8. snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
  9. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
  10. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
  11. snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
  12. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
  13. snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
  14. snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
  15. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
  16. snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
  17. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
  18. snowflake/ml/model/_deployer.py +14 -27
  19. snowflake/ml/model/_env.py +4 -4
  20. snowflake/ml/model/_handlers/custom.py +14 -2
  21. snowflake/ml/model/_handlers/pytorch.py +186 -0
  22. snowflake/ml/model/_handlers/sklearn.py +14 -9
  23. snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
  24. snowflake/ml/model/_handlers/torchscript.py +180 -0
  25. snowflake/ml/model/_handlers/xgboost.py +19 -9
  26. snowflake/ml/model/_model.py +3 -2
  27. snowflake/ml/model/_model_meta.py +12 -7
  28. snowflake/ml/model/model_signature.py +446 -66
  29. snowflake/ml/model/type_hints.py +23 -4
  30. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
  31. snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
  32. snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
  33. snowflake/ml/modeling/cluster/birch.py +51 -26
  34. snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
  35. snowflake/ml/modeling/cluster/dbscan.py +51 -26
  36. snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
  37. snowflake/ml/modeling/cluster/k_means.py +51 -26
  38. snowflake/ml/modeling/cluster/mean_shift.py +51 -26
  39. snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
  40. snowflake/ml/modeling/cluster/optics.py +51 -26
  41. snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
  42. snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
  43. snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
  44. snowflake/ml/modeling/compose/column_transformer.py +51 -26
  45. snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
  46. snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
  47. snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
  48. snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
  49. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
  50. snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
  51. snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
  52. snowflake/ml/modeling/covariance/oas.py +51 -26
  53. snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
  54. snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
  55. snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
  56. snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
  57. snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
  58. snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
  59. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
  60. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
  61. snowflake/ml/modeling/decomposition/pca.py +51 -26
  62. snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
  63. snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
  64. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
  65. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
  66. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
  67. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
  68. snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
  69. snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
  70. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
  71. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
  72. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
  73. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
  74. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
  75. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
  76. snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
  77. snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
  78. snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
  79. snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
  80. snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
  81. snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
  82. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
  83. snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
  84. snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
  85. snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
  86. snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
  87. snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
  88. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
  89. snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
  90. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
  91. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
  92. snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
  93. snowflake/ml/modeling/impute/knn_imputer.py +51 -26
  94. snowflake/ml/modeling/impute/missing_indicator.py +51 -26
  95. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
  96. snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
  97. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
  98. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
  99. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
  100. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
  101. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
  102. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
  103. snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
  104. snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
  105. snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
  106. snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
  107. snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
  108. snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
  109. snowflake/ml/modeling/linear_model/lars.py +51 -26
  110. snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
  111. snowflake/ml/modeling/linear_model/lasso.py +51 -26
  112. snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
  113. snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
  114. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
  115. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
  116. snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
  117. snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
  118. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
  119. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
  120. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
  121. snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
  122. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
  123. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
  124. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
  125. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
  126. snowflake/ml/modeling/linear_model/perceptron.py +51 -26
  127. snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
  128. snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
  129. snowflake/ml/modeling/linear_model/ridge.py +51 -26
  130. snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
  131. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
  132. snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
  133. snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
  134. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
  135. snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
  136. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
  137. snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
  138. snowflake/ml/modeling/manifold/isomap.py +51 -26
  139. snowflake/ml/modeling/manifold/mds.py +51 -26
  140. snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
  141. snowflake/ml/modeling/manifold/tsne.py +51 -26
  142. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
  143. snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
  144. snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
  145. snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
  146. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
  147. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
  148. snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
  149. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
  150. snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
  151. snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
  152. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
  153. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
  154. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
  155. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
  156. snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
  157. snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
  158. snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
  159. snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
  160. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
  161. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
  162. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
  163. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
  164. snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
  165. snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
  166. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
  167. snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
  168. snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
  169. snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
  170. snowflake/ml/modeling/svm/linear_svc.py +51 -26
  171. snowflake/ml/modeling/svm/linear_svr.py +51 -26
  172. snowflake/ml/modeling/svm/nu_svc.py +51 -26
  173. snowflake/ml/modeling/svm/nu_svr.py +51 -26
  174. snowflake/ml/modeling/svm/svc.py +51 -26
  175. snowflake/ml/modeling/svm/svr.py +51 -26
  176. snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
  177. snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
  178. snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
  179. snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
  180. snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
  181. snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
  182. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
  183. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
  184. snowflake/ml/registry/model_registry.py +74 -56
  185. snowflake/ml/version.py +1 -1
  186. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
  187. snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
  188. snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
  189. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -226,7 +228,6 @@ class KernelRidge(BaseTransformer):
226
228
  sample_weight_col: Optional[str] = None,
227
229
  ) -> None:
228
230
  super().__init__()
229
- self.id = str(uuid4()).replace("-", "_").upper()
230
231
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
231
232
 
232
233
  self._deps = list(deps)
@@ -251,6 +252,15 @@ class KernelRidge(BaseTransformer):
251
252
  self.set_drop_input_cols(drop_input_cols)
252
253
  self.set_sample_weight_col(sample_weight_col)
253
254
 
255
+ def _get_rand_id(self) -> str:
256
+ """
257
+ Generate random id to be used in sproc and stage names.
258
+
259
+ Returns:
260
+ Random id string usable in sproc, table, and stage names.
261
+ """
262
+ return str(uuid4()).replace("-", "_").upper()
263
+
254
264
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
255
265
  """
256
266
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -329,7 +339,7 @@ class KernelRidge(BaseTransformer):
329
339
  cp.dump(self._sklearn_object, local_transform_file)
330
340
 
331
341
  # Create temp stage to run fit.
332
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
342
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
333
343
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
334
344
  SqlResultValidator(
335
345
  session=session,
@@ -342,11 +352,12 @@ class KernelRidge(BaseTransformer):
342
352
  expected_value=f"Stage area {transform_stage_name} successfully created."
343
353
  ).validate()
344
354
 
345
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
355
+ # Use posixpath to construct stage paths
356
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
357
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
346
358
  local_result_file_name = get_temp_file_path()
347
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
348
359
 
349
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
360
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
350
361
  statement_params = telemetry.get_function_usage_statement_params(
351
362
  project=_PROJECT,
352
363
  subproject=_SUBPROJECT,
@@ -372,6 +383,7 @@ class KernelRidge(BaseTransformer):
372
383
  replace=True,
373
384
  session=session,
374
385
  statement_params=statement_params,
386
+ anonymous=True
375
387
  )
376
388
  def fit_wrapper_sproc(
377
389
  session: Session,
@@ -380,7 +392,8 @@ class KernelRidge(BaseTransformer):
380
392
  stage_result_file_name: str,
381
393
  input_cols: List[str],
382
394
  label_cols: List[str],
383
- sample_weight_col: Optional[str]
395
+ sample_weight_col: Optional[str],
396
+ statement_params: Dict[str, str]
384
397
  ) -> str:
385
398
  import cloudpickle as cp
386
399
  import numpy as np
@@ -447,15 +460,15 @@ class KernelRidge(BaseTransformer):
447
460
  api_calls=[Session.call],
448
461
  custom_tags=dict([("autogen", True)]),
449
462
  )
450
- sproc_export_file_name = session.call(
451
- fit_sproc_name,
463
+ sproc_export_file_name = fit_wrapper_sproc(
464
+ session,
452
465
  query,
453
466
  stage_transform_file_name,
454
467
  stage_result_file_name,
455
468
  identifier.get_unescaped_names(self.input_cols),
456
469
  identifier.get_unescaped_names(self.label_cols),
457
470
  identifier.get_unescaped_names(self.sample_weight_col),
458
- statement_params=statement_params,
471
+ statement_params,
459
472
  )
460
473
 
461
474
  if "|" in sproc_export_file_name:
@@ -465,7 +478,7 @@ class KernelRidge(BaseTransformer):
465
478
  print("\n".join(fields[1:]))
466
479
 
467
480
  session.file.get(
468
- os.path.join(stage_result_file_name, sproc_export_file_name),
481
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
469
482
  local_result_file_name,
470
483
  statement_params=statement_params
471
484
  )
@@ -511,7 +524,7 @@ class KernelRidge(BaseTransformer):
511
524
 
512
525
  # Register vectorized UDF for batch inference
513
526
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
514
- safe_id=self.id, method=inference_method)
527
+ safe_id=self._get_rand_id(), method=inference_method)
515
528
 
516
529
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
517
530
  # will try to pickle all of self which fails.
@@ -603,7 +616,7 @@ class KernelRidge(BaseTransformer):
603
616
  return transformed_pandas_df.to_dict("records")
604
617
 
605
618
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
606
- safe_id=self.id
619
+ safe_id=self._get_rand_id()
607
620
  )
608
621
 
609
622
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -770,11 +783,18 @@ class KernelRidge(BaseTransformer):
770
783
  Transformed dataset.
771
784
  """
772
785
  if isinstance(dataset, DataFrame):
786
+ expected_type_inferred = "float"
787
+ # when it is classifier, infer the datatype from label columns
788
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
789
+ expected_type_inferred = convert_sp_to_sf_type(
790
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
791
+ )
792
+
773
793
  output_df = self._batch_inference(
774
794
  dataset=dataset,
775
795
  inference_method="predict",
776
796
  expected_output_cols_list=self.output_cols,
777
- expected_output_cols_type="float",
797
+ expected_output_cols_type=expected_type_inferred,
778
798
  )
779
799
  elif isinstance(dataset, pd.DataFrame):
780
800
  output_df = self._sklearn_inference(
@@ -845,10 +865,10 @@ class KernelRidge(BaseTransformer):
845
865
 
846
866
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
847
867
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
848
- Returns an empty list if current object is not a classifier or not yet fitted.
868
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
849
869
  """
850
870
  if getattr(self._sklearn_object, "classes_", None) is None:
851
- return []
871
+ return [output_cols_prefix]
852
872
 
853
873
  classes = self._sklearn_object.classes_
854
874
  if isinstance(classes, numpy.ndarray):
@@ -1073,7 +1093,7 @@ class KernelRidge(BaseTransformer):
1073
1093
  cp.dump(self._sklearn_object, local_score_file)
1074
1094
 
1075
1095
  # Create temp stage to run score.
1076
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1096
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1077
1097
  session = dataset._session
1078
1098
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1079
1099
  SqlResultValidator(
@@ -1087,8 +1107,9 @@ class KernelRidge(BaseTransformer):
1087
1107
  expected_value=f"Stage area {score_stage_name} successfully created."
1088
1108
  ).validate()
1089
1109
 
1090
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1091
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1110
+ # Use posixpath to construct stage paths
1111
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1112
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1092
1113
  statement_params = telemetry.get_function_usage_statement_params(
1093
1114
  project=_PROJECT,
1094
1115
  subproject=_SUBPROJECT,
@@ -1114,6 +1135,7 @@ class KernelRidge(BaseTransformer):
1114
1135
  replace=True,
1115
1136
  session=session,
1116
1137
  statement_params=statement_params,
1138
+ anonymous=True
1117
1139
  )
1118
1140
  def score_wrapper_sproc(
1119
1141
  session: Session,
@@ -1121,7 +1143,8 @@ class KernelRidge(BaseTransformer):
1121
1143
  stage_score_file_name: str,
1122
1144
  input_cols: List[str],
1123
1145
  label_cols: List[str],
1124
- sample_weight_col: Optional[str]
1146
+ sample_weight_col: Optional[str],
1147
+ statement_params: Dict[str, str]
1125
1148
  ) -> float:
1126
1149
  import cloudpickle as cp
1127
1150
  import numpy as np
@@ -1171,14 +1194,14 @@ class KernelRidge(BaseTransformer):
1171
1194
  api_calls=[Session.call],
1172
1195
  custom_tags=dict([("autogen", True)]),
1173
1196
  )
1174
- score = session.call(
1175
- score_sproc_name,
1197
+ score = score_wrapper_sproc(
1198
+ session,
1176
1199
  query,
1177
1200
  stage_score_file_name,
1178
1201
  identifier.get_unescaped_names(self.input_cols),
1179
1202
  identifier.get_unescaped_names(self.label_cols),
1180
1203
  identifier.get_unescaped_names(self.sample_weight_col),
1181
- statement_params=statement_params,
1204
+ statement_params,
1182
1205
  )
1183
1206
 
1184
1207
  cleanup_temp_files([local_score_file_name])
@@ -1196,18 +1219,20 @@ class KernelRidge(BaseTransformer):
1196
1219
  if self._sklearn_object._estimator_type == 'classifier':
1197
1220
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1198
1221
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1199
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1222
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1223
+ ([] if self._drop_input_cols else inputs) + outputs)
1200
1224
  # For regressor, the type of predict is float64
1201
1225
  elif self._sklearn_object._estimator_type == 'regressor':
1202
1226
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1203
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1204
-
1227
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1228
+ ([] if self._drop_input_cols else inputs) + outputs)
1205
1229
  for prob_func in PROB_FUNCTIONS:
1206
1230
  if hasattr(self, prob_func):
1207
1231
  output_cols_prefix: str = f"{prob_func}_"
1208
1232
  output_column_names = self._get_output_column_names(output_cols_prefix)
1209
1233
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1210
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1234
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1235
+ ([] if self._drop_input_cols else inputs) + outputs)
1211
1236
 
1212
1237
  @property
1213
1238
  def model_signatures(self) -> Dict[str, ModelSignature]:
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -26,6 +27,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
26
27
  from snowflake.snowpark import DataFrame, Session
27
28
  from snowflake.snowpark.functions import pandas_udf, sproc
28
29
  from snowflake.snowpark.types import PandasSeries
30
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
29
31
 
30
32
  from snowflake.ml.model.model_signature import (
31
33
  DataType,
@@ -200,7 +202,6 @@ class LGBMClassifier(BaseTransformer):
200
202
  **kwargs,
201
203
  ) -> None:
202
204
  super().__init__()
203
- self.id = str(uuid4()).replace("-", "_").upper()
204
205
  deps: Set[str] = set([f'numpy=={np.__version__}', f'lightgbm=={lightgbm.__version__}', f'cloudpickle=={cp.__version__}'])
205
206
 
206
207
  self._deps = list(deps)
@@ -240,6 +241,15 @@ class LGBMClassifier(BaseTransformer):
240
241
  self.set_drop_input_cols(drop_input_cols)
241
242
  self.set_sample_weight_col(sample_weight_col)
242
243
 
244
+ def _get_rand_id(self) -> str:
245
+ """
246
+ Generate random id to be used in sproc and stage names.
247
+
248
+ Returns:
249
+ Random id string usable in sproc, table, and stage names.
250
+ """
251
+ return str(uuid4()).replace("-", "_").upper()
252
+
243
253
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
244
254
  """
245
255
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -318,7 +328,7 @@ class LGBMClassifier(BaseTransformer):
318
328
  cp.dump(self._sklearn_object, local_transform_file)
319
329
 
320
330
  # Create temp stage to run fit.
321
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
331
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
322
332
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
323
333
  SqlResultValidator(
324
334
  session=session,
@@ -331,11 +341,12 @@ class LGBMClassifier(BaseTransformer):
331
341
  expected_value=f"Stage area {transform_stage_name} successfully created."
332
342
  ).validate()
333
343
 
334
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
344
+ # Use posixpath to construct stage paths
345
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
346
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
335
347
  local_result_file_name = get_temp_file_path()
336
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
337
348
 
338
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
349
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
339
350
  statement_params = telemetry.get_function_usage_statement_params(
340
351
  project=_PROJECT,
341
352
  subproject=_SUBPROJECT,
@@ -361,6 +372,7 @@ class LGBMClassifier(BaseTransformer):
361
372
  replace=True,
362
373
  session=session,
363
374
  statement_params=statement_params,
375
+ anonymous=True
364
376
  )
365
377
  def fit_wrapper_sproc(
366
378
  session: Session,
@@ -369,7 +381,8 @@ class LGBMClassifier(BaseTransformer):
369
381
  stage_result_file_name: str,
370
382
  input_cols: List[str],
371
383
  label_cols: List[str],
372
- sample_weight_col: Optional[str]
384
+ sample_weight_col: Optional[str],
385
+ statement_params: Dict[str, str]
373
386
  ) -> str:
374
387
  import cloudpickle as cp
375
388
  import numpy as np
@@ -436,15 +449,15 @@ class LGBMClassifier(BaseTransformer):
436
449
  api_calls=[Session.call],
437
450
  custom_tags=dict([("autogen", True)]),
438
451
  )
439
- sproc_export_file_name = session.call(
440
- fit_sproc_name,
452
+ sproc_export_file_name = fit_wrapper_sproc(
453
+ session,
441
454
  query,
442
455
  stage_transform_file_name,
443
456
  stage_result_file_name,
444
457
  identifier.get_unescaped_names(self.input_cols),
445
458
  identifier.get_unescaped_names(self.label_cols),
446
459
  identifier.get_unescaped_names(self.sample_weight_col),
447
- statement_params=statement_params,
460
+ statement_params,
448
461
  )
449
462
 
450
463
  if "|" in sproc_export_file_name:
@@ -454,7 +467,7 @@ class LGBMClassifier(BaseTransformer):
454
467
  print("\n".join(fields[1:]))
455
468
 
456
469
  session.file.get(
457
- os.path.join(stage_result_file_name, sproc_export_file_name),
470
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
458
471
  local_result_file_name,
459
472
  statement_params=statement_params
460
473
  )
@@ -500,7 +513,7 @@ class LGBMClassifier(BaseTransformer):
500
513
 
501
514
  # Register vectorized UDF for batch inference
502
515
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
503
- safe_id=self.id, method=inference_method)
516
+ safe_id=self._get_rand_id(), method=inference_method)
504
517
 
505
518
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
506
519
  # will try to pickle all of self which fails.
@@ -592,7 +605,7 @@ class LGBMClassifier(BaseTransformer):
592
605
  return transformed_pandas_df.to_dict("records")
593
606
 
594
607
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
595
- safe_id=self.id
608
+ safe_id=self._get_rand_id()
596
609
  )
597
610
 
598
611
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -759,11 +772,18 @@ class LGBMClassifier(BaseTransformer):
759
772
  Transformed dataset.
760
773
  """
761
774
  if isinstance(dataset, DataFrame):
775
+ expected_type_inferred = ""
776
+ # when it is classifier, infer the datatype from label columns
777
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
778
+ expected_type_inferred = convert_sp_to_sf_type(
779
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
780
+ )
781
+
762
782
  output_df = self._batch_inference(
763
783
  dataset=dataset,
764
784
  inference_method="predict",
765
785
  expected_output_cols_list=self.output_cols,
766
- expected_output_cols_type="",
786
+ expected_output_cols_type=expected_type_inferred,
767
787
  )
768
788
  elif isinstance(dataset, pd.DataFrame):
769
789
  output_df = self._sklearn_inference(
@@ -834,10 +854,10 @@ class LGBMClassifier(BaseTransformer):
834
854
 
835
855
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
836
856
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
837
- Returns an empty list if current object is not a classifier or not yet fitted.
857
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
838
858
  """
839
859
  if getattr(self._sklearn_object, "classes_", None) is None:
840
- return []
860
+ return [output_cols_prefix]
841
861
 
842
862
  classes = self._sklearn_object.classes_
843
863
  if isinstance(classes, numpy.ndarray):
@@ -1066,7 +1086,7 @@ class LGBMClassifier(BaseTransformer):
1066
1086
  cp.dump(self._sklearn_object, local_score_file)
1067
1087
 
1068
1088
  # Create temp stage to run score.
1069
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1089
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1070
1090
  session = dataset._session
1071
1091
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1072
1092
  SqlResultValidator(
@@ -1080,8 +1100,9 @@ class LGBMClassifier(BaseTransformer):
1080
1100
  expected_value=f"Stage area {score_stage_name} successfully created."
1081
1101
  ).validate()
1082
1102
 
1083
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1084
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1103
+ # Use posixpath to construct stage paths
1104
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1105
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1085
1106
  statement_params = telemetry.get_function_usage_statement_params(
1086
1107
  project=_PROJECT,
1087
1108
  subproject=_SUBPROJECT,
@@ -1107,6 +1128,7 @@ class LGBMClassifier(BaseTransformer):
1107
1128
  replace=True,
1108
1129
  session=session,
1109
1130
  statement_params=statement_params,
1131
+ anonymous=True
1110
1132
  )
1111
1133
  def score_wrapper_sproc(
1112
1134
  session: Session,
@@ -1114,7 +1136,8 @@ class LGBMClassifier(BaseTransformer):
1114
1136
  stage_score_file_name: str,
1115
1137
  input_cols: List[str],
1116
1138
  label_cols: List[str],
1117
- sample_weight_col: Optional[str]
1139
+ sample_weight_col: Optional[str],
1140
+ statement_params: Dict[str, str]
1118
1141
  ) -> float:
1119
1142
  import cloudpickle as cp
1120
1143
  import numpy as np
@@ -1164,14 +1187,14 @@ class LGBMClassifier(BaseTransformer):
1164
1187
  api_calls=[Session.call],
1165
1188
  custom_tags=dict([("autogen", True)]),
1166
1189
  )
1167
- score = session.call(
1168
- score_sproc_name,
1190
+ score = score_wrapper_sproc(
1191
+ session,
1169
1192
  query,
1170
1193
  stage_score_file_name,
1171
1194
  identifier.get_unescaped_names(self.input_cols),
1172
1195
  identifier.get_unescaped_names(self.label_cols),
1173
1196
  identifier.get_unescaped_names(self.sample_weight_col),
1174
- statement_params=statement_params,
1197
+ statement_params,
1175
1198
  )
1176
1199
 
1177
1200
  cleanup_temp_files([local_score_file_name])
@@ -1189,18 +1212,20 @@ class LGBMClassifier(BaseTransformer):
1189
1212
  if self._sklearn_object._estimator_type == 'classifier':
1190
1213
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1191
1214
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1192
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1215
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1216
+ ([] if self._drop_input_cols else inputs) + outputs)
1193
1217
  # For regressor, the type of predict is float64
1194
1218
  elif self._sklearn_object._estimator_type == 'regressor':
1195
1219
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1196
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1197
-
1220
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1221
+ ([] if self._drop_input_cols else inputs) + outputs)
1198
1222
  for prob_func in PROB_FUNCTIONS:
1199
1223
  if hasattr(self, prob_func):
1200
1224
  output_cols_prefix: str = f"{prob_func}_"
1201
1225
  output_column_names = self._get_output_column_names(output_cols_prefix)
1202
1226
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1203
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1227
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1228
+ ([] if self._drop_input_cols else inputs) + outputs)
1204
1229
 
1205
1230
  @property
1206
1231
  def model_signatures(self) -> Dict[str, ModelSignature]: