snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. snowflake/ml/_internal/env_utils.py +2 -1
  2. snowflake/ml/_internal/file_utils.py +29 -7
  3. snowflake/ml/_internal/telemetry.py +5 -8
  4. snowflake/ml/_internal/utils/uri.py +7 -2
  5. snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
  6. snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
  7. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
  8. snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
  9. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
  10. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
  11. snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
  12. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
  13. snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
  14. snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
  15. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
  16. snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
  17. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
  18. snowflake/ml/model/_deployer.py +14 -27
  19. snowflake/ml/model/_env.py +4 -4
  20. snowflake/ml/model/_handlers/custom.py +14 -2
  21. snowflake/ml/model/_handlers/pytorch.py +186 -0
  22. snowflake/ml/model/_handlers/sklearn.py +14 -9
  23. snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
  24. snowflake/ml/model/_handlers/torchscript.py +180 -0
  25. snowflake/ml/model/_handlers/xgboost.py +19 -9
  26. snowflake/ml/model/_model.py +3 -2
  27. snowflake/ml/model/_model_meta.py +12 -7
  28. snowflake/ml/model/model_signature.py +446 -66
  29. snowflake/ml/model/type_hints.py +23 -4
  30. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
  31. snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
  32. snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
  33. snowflake/ml/modeling/cluster/birch.py +51 -26
  34. snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
  35. snowflake/ml/modeling/cluster/dbscan.py +51 -26
  36. snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
  37. snowflake/ml/modeling/cluster/k_means.py +51 -26
  38. snowflake/ml/modeling/cluster/mean_shift.py +51 -26
  39. snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
  40. snowflake/ml/modeling/cluster/optics.py +51 -26
  41. snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
  42. snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
  43. snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
  44. snowflake/ml/modeling/compose/column_transformer.py +51 -26
  45. snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
  46. snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
  47. snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
  48. snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
  49. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
  50. snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
  51. snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
  52. snowflake/ml/modeling/covariance/oas.py +51 -26
  53. snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
  54. snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
  55. snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
  56. snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
  57. snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
  58. snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
  59. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
  60. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
  61. snowflake/ml/modeling/decomposition/pca.py +51 -26
  62. snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
  63. snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
  64. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
  65. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
  66. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
  67. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
  68. snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
  69. snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
  70. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
  71. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
  72. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
  73. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
  74. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
  75. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
  76. snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
  77. snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
  78. snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
  79. snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
  80. snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
  81. snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
  82. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
  83. snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
  84. snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
  85. snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
  86. snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
  87. snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
  88. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
  89. snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
  90. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
  91. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
  92. snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
  93. snowflake/ml/modeling/impute/knn_imputer.py +51 -26
  94. snowflake/ml/modeling/impute/missing_indicator.py +51 -26
  95. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
  96. snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
  97. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
  98. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
  99. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
  100. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
  101. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
  102. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
  103. snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
  104. snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
  105. snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
  106. snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
  107. snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
  108. snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
  109. snowflake/ml/modeling/linear_model/lars.py +51 -26
  110. snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
  111. snowflake/ml/modeling/linear_model/lasso.py +51 -26
  112. snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
  113. snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
  114. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
  115. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
  116. snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
  117. snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
  118. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
  119. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
  120. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
  121. snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
  122. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
  123. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
  124. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
  125. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
  126. snowflake/ml/modeling/linear_model/perceptron.py +51 -26
  127. snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
  128. snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
  129. snowflake/ml/modeling/linear_model/ridge.py +51 -26
  130. snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
  131. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
  132. snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
  133. snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
  134. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
  135. snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
  136. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
  137. snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
  138. snowflake/ml/modeling/manifold/isomap.py +51 -26
  139. snowflake/ml/modeling/manifold/mds.py +51 -26
  140. snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
  141. snowflake/ml/modeling/manifold/tsne.py +51 -26
  142. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
  143. snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
  144. snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
  145. snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
  146. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
  147. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
  148. snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
  149. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
  150. snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
  151. snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
  152. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
  153. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
  154. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
  155. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
  156. snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
  157. snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
  158. snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
  159. snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
  160. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
  161. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
  162. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
  163. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
  164. snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
  165. snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
  166. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
  167. snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
  168. snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
  169. snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
  170. snowflake/ml/modeling/svm/linear_svc.py +51 -26
  171. snowflake/ml/modeling/svm/linear_svr.py +51 -26
  172. snowflake/ml/modeling/svm/nu_svc.py +51 -26
  173. snowflake/ml/modeling/svm/nu_svr.py +51 -26
  174. snowflake/ml/modeling/svm/svc.py +51 -26
  175. snowflake/ml/modeling/svm/svr.py +51 -26
  176. snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
  177. snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
  178. snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
  179. snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
  180. snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
  181. snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
  182. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
  183. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
  184. snowflake/ml/registry/model_registry.py +74 -56
  185. snowflake/ml/version.py +1 -1
  186. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
  187. snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
  188. snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
  189. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -189,7 +191,6 @@ class GaussianNB(BaseTransformer):
189
191
  sample_weight_col: Optional[str] = None,
190
192
  ) -> None:
191
193
  super().__init__()
192
- self.id = str(uuid4()).replace("-", "_").upper()
193
194
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
194
195
 
195
196
  self._deps = list(deps)
@@ -210,6 +211,15 @@ class GaussianNB(BaseTransformer):
210
211
  self.set_drop_input_cols(drop_input_cols)
211
212
  self.set_sample_weight_col(sample_weight_col)
212
213
 
214
+ def _get_rand_id(self) -> str:
215
+ """
216
+ Generate random id to be used in sproc and stage names.
217
+
218
+ Returns:
219
+ Random id string usable in sproc, table, and stage names.
220
+ """
221
+ return str(uuid4()).replace("-", "_").upper()
222
+
213
223
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
214
224
  """
215
225
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -288,7 +298,7 @@ class GaussianNB(BaseTransformer):
288
298
  cp.dump(self._sklearn_object, local_transform_file)
289
299
 
290
300
  # Create temp stage to run fit.
291
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
301
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
292
302
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
293
303
  SqlResultValidator(
294
304
  session=session,
@@ -301,11 +311,12 @@ class GaussianNB(BaseTransformer):
301
311
  expected_value=f"Stage area {transform_stage_name} successfully created."
302
312
  ).validate()
303
313
 
304
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
314
+ # Use posixpath to construct stage paths
315
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
316
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
305
317
  local_result_file_name = get_temp_file_path()
306
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
307
318
 
308
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
319
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
309
320
  statement_params = telemetry.get_function_usage_statement_params(
310
321
  project=_PROJECT,
311
322
  subproject=_SUBPROJECT,
@@ -331,6 +342,7 @@ class GaussianNB(BaseTransformer):
331
342
  replace=True,
332
343
  session=session,
333
344
  statement_params=statement_params,
345
+ anonymous=True
334
346
  )
335
347
  def fit_wrapper_sproc(
336
348
  session: Session,
@@ -339,7 +351,8 @@ class GaussianNB(BaseTransformer):
339
351
  stage_result_file_name: str,
340
352
  input_cols: List[str],
341
353
  label_cols: List[str],
342
- sample_weight_col: Optional[str]
354
+ sample_weight_col: Optional[str],
355
+ statement_params: Dict[str, str]
343
356
  ) -> str:
344
357
  import cloudpickle as cp
345
358
  import numpy as np
@@ -406,15 +419,15 @@ class GaussianNB(BaseTransformer):
406
419
  api_calls=[Session.call],
407
420
  custom_tags=dict([("autogen", True)]),
408
421
  )
409
- sproc_export_file_name = session.call(
410
- fit_sproc_name,
422
+ sproc_export_file_name = fit_wrapper_sproc(
423
+ session,
411
424
  query,
412
425
  stage_transform_file_name,
413
426
  stage_result_file_name,
414
427
  identifier.get_unescaped_names(self.input_cols),
415
428
  identifier.get_unescaped_names(self.label_cols),
416
429
  identifier.get_unescaped_names(self.sample_weight_col),
417
- statement_params=statement_params,
430
+ statement_params,
418
431
  )
419
432
 
420
433
  if "|" in sproc_export_file_name:
@@ -424,7 +437,7 @@ class GaussianNB(BaseTransformer):
424
437
  print("\n".join(fields[1:]))
425
438
 
426
439
  session.file.get(
427
- os.path.join(stage_result_file_name, sproc_export_file_name),
440
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
428
441
  local_result_file_name,
429
442
  statement_params=statement_params
430
443
  )
@@ -470,7 +483,7 @@ class GaussianNB(BaseTransformer):
470
483
 
471
484
  # Register vectorized UDF for batch inference
472
485
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
473
- safe_id=self.id, method=inference_method)
486
+ safe_id=self._get_rand_id(), method=inference_method)
474
487
 
475
488
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
476
489
  # will try to pickle all of self which fails.
@@ -562,7 +575,7 @@ class GaussianNB(BaseTransformer):
562
575
  return transformed_pandas_df.to_dict("records")
563
576
 
564
577
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
565
- safe_id=self.id
578
+ safe_id=self._get_rand_id()
566
579
  )
567
580
 
568
581
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -729,11 +742,18 @@ class GaussianNB(BaseTransformer):
729
742
  Transformed dataset.
730
743
  """
731
744
  if isinstance(dataset, DataFrame):
745
+ expected_type_inferred = ""
746
+ # when it is classifier, infer the datatype from label columns
747
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
748
+ expected_type_inferred = convert_sp_to_sf_type(
749
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
750
+ )
751
+
732
752
  output_df = self._batch_inference(
733
753
  dataset=dataset,
734
754
  inference_method="predict",
735
755
  expected_output_cols_list=self.output_cols,
736
- expected_output_cols_type="",
756
+ expected_output_cols_type=expected_type_inferred,
737
757
  )
738
758
  elif isinstance(dataset, pd.DataFrame):
739
759
  output_df = self._sklearn_inference(
@@ -804,10 +824,10 @@ class GaussianNB(BaseTransformer):
804
824
 
805
825
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
806
826
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
807
- Returns an empty list if current object is not a classifier or not yet fitted.
827
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
808
828
  """
809
829
  if getattr(self._sklearn_object, "classes_", None) is None:
810
- return []
830
+ return [output_cols_prefix]
811
831
 
812
832
  classes = self._sklearn_object.classes_
813
833
  if isinstance(classes, numpy.ndarray):
@@ -1036,7 +1056,7 @@ class GaussianNB(BaseTransformer):
1036
1056
  cp.dump(self._sklearn_object, local_score_file)
1037
1057
 
1038
1058
  # Create temp stage to run score.
1039
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1059
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1040
1060
  session = dataset._session
1041
1061
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1042
1062
  SqlResultValidator(
@@ -1050,8 +1070,9 @@ class GaussianNB(BaseTransformer):
1050
1070
  expected_value=f"Stage area {score_stage_name} successfully created."
1051
1071
  ).validate()
1052
1072
 
1053
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1054
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1073
+ # Use posixpath to construct stage paths
1074
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1075
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1055
1076
  statement_params = telemetry.get_function_usage_statement_params(
1056
1077
  project=_PROJECT,
1057
1078
  subproject=_SUBPROJECT,
@@ -1077,6 +1098,7 @@ class GaussianNB(BaseTransformer):
1077
1098
  replace=True,
1078
1099
  session=session,
1079
1100
  statement_params=statement_params,
1101
+ anonymous=True
1080
1102
  )
1081
1103
  def score_wrapper_sproc(
1082
1104
  session: Session,
@@ -1084,7 +1106,8 @@ class GaussianNB(BaseTransformer):
1084
1106
  stage_score_file_name: str,
1085
1107
  input_cols: List[str],
1086
1108
  label_cols: List[str],
1087
- sample_weight_col: Optional[str]
1109
+ sample_weight_col: Optional[str],
1110
+ statement_params: Dict[str, str]
1088
1111
  ) -> float:
1089
1112
  import cloudpickle as cp
1090
1113
  import numpy as np
@@ -1134,14 +1157,14 @@ class GaussianNB(BaseTransformer):
1134
1157
  api_calls=[Session.call],
1135
1158
  custom_tags=dict([("autogen", True)]),
1136
1159
  )
1137
- score = session.call(
1138
- score_sproc_name,
1160
+ score = score_wrapper_sproc(
1161
+ session,
1139
1162
  query,
1140
1163
  stage_score_file_name,
1141
1164
  identifier.get_unescaped_names(self.input_cols),
1142
1165
  identifier.get_unescaped_names(self.label_cols),
1143
1166
  identifier.get_unescaped_names(self.sample_weight_col),
1144
- statement_params=statement_params,
1167
+ statement_params,
1145
1168
  )
1146
1169
 
1147
1170
  cleanup_temp_files([local_score_file_name])
@@ -1159,18 +1182,20 @@ class GaussianNB(BaseTransformer):
1159
1182
  if self._sklearn_object._estimator_type == 'classifier':
1160
1183
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1161
1184
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1162
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1185
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1186
+ ([] if self._drop_input_cols else inputs) + outputs)
1163
1187
  # For regressor, the type of predict is float64
1164
1188
  elif self._sklearn_object._estimator_type == 'regressor':
1165
1189
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1166
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1167
-
1190
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1191
+ ([] if self._drop_input_cols else inputs) + outputs)
1168
1192
  for prob_func in PROB_FUNCTIONS:
1169
1193
  if hasattr(self, prob_func):
1170
1194
  output_cols_prefix: str = f"{prob_func}_"
1171
1195
  output_column_names = self._get_output_column_names(output_cols_prefix)
1172
1196
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1173
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1197
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1198
+ ([] if self._drop_input_cols else inputs) + outputs)
1174
1199
 
1175
1200
  @property
1176
1201
  def model_signatures(self) -> Dict[str, ModelSignature]:
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -200,7 +202,6 @@ class MultinomialNB(BaseTransformer):
200
202
  sample_weight_col: Optional[str] = None,
201
203
  ) -> None:
202
204
  super().__init__()
203
- self.id = str(uuid4()).replace("-", "_").upper()
204
205
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
205
206
 
206
207
  self._deps = list(deps)
@@ -223,6 +224,15 @@ class MultinomialNB(BaseTransformer):
223
224
  self.set_drop_input_cols(drop_input_cols)
224
225
  self.set_sample_weight_col(sample_weight_col)
225
226
 
227
+ def _get_rand_id(self) -> str:
228
+ """
229
+ Generate random id to be used in sproc and stage names.
230
+
231
+ Returns:
232
+ Random id string usable in sproc, table, and stage names.
233
+ """
234
+ return str(uuid4()).replace("-", "_").upper()
235
+
226
236
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
227
237
  """
228
238
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -301,7 +311,7 @@ class MultinomialNB(BaseTransformer):
301
311
  cp.dump(self._sklearn_object, local_transform_file)
302
312
 
303
313
  # Create temp stage to run fit.
304
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
314
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
305
315
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
306
316
  SqlResultValidator(
307
317
  session=session,
@@ -314,11 +324,12 @@ class MultinomialNB(BaseTransformer):
314
324
  expected_value=f"Stage area {transform_stage_name} successfully created."
315
325
  ).validate()
316
326
 
317
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
327
+ # Use posixpath to construct stage paths
328
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
329
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
318
330
  local_result_file_name = get_temp_file_path()
319
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
320
331
 
321
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
332
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
322
333
  statement_params = telemetry.get_function_usage_statement_params(
323
334
  project=_PROJECT,
324
335
  subproject=_SUBPROJECT,
@@ -344,6 +355,7 @@ class MultinomialNB(BaseTransformer):
344
355
  replace=True,
345
356
  session=session,
346
357
  statement_params=statement_params,
358
+ anonymous=True
347
359
  )
348
360
  def fit_wrapper_sproc(
349
361
  session: Session,
@@ -352,7 +364,8 @@ class MultinomialNB(BaseTransformer):
352
364
  stage_result_file_name: str,
353
365
  input_cols: List[str],
354
366
  label_cols: List[str],
355
- sample_weight_col: Optional[str]
367
+ sample_weight_col: Optional[str],
368
+ statement_params: Dict[str, str]
356
369
  ) -> str:
357
370
  import cloudpickle as cp
358
371
  import numpy as np
@@ -419,15 +432,15 @@ class MultinomialNB(BaseTransformer):
419
432
  api_calls=[Session.call],
420
433
  custom_tags=dict([("autogen", True)]),
421
434
  )
422
- sproc_export_file_name = session.call(
423
- fit_sproc_name,
435
+ sproc_export_file_name = fit_wrapper_sproc(
436
+ session,
424
437
  query,
425
438
  stage_transform_file_name,
426
439
  stage_result_file_name,
427
440
  identifier.get_unescaped_names(self.input_cols),
428
441
  identifier.get_unescaped_names(self.label_cols),
429
442
  identifier.get_unescaped_names(self.sample_weight_col),
430
- statement_params=statement_params,
443
+ statement_params,
431
444
  )
432
445
 
433
446
  if "|" in sproc_export_file_name:
@@ -437,7 +450,7 @@ class MultinomialNB(BaseTransformer):
437
450
  print("\n".join(fields[1:]))
438
451
 
439
452
  session.file.get(
440
- os.path.join(stage_result_file_name, sproc_export_file_name),
453
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
441
454
  local_result_file_name,
442
455
  statement_params=statement_params
443
456
  )
@@ -483,7 +496,7 @@ class MultinomialNB(BaseTransformer):
483
496
 
484
497
  # Register vectorized UDF for batch inference
485
498
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
486
- safe_id=self.id, method=inference_method)
499
+ safe_id=self._get_rand_id(), method=inference_method)
487
500
 
488
501
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
489
502
  # will try to pickle all of self which fails.
@@ -575,7 +588,7 @@ class MultinomialNB(BaseTransformer):
575
588
  return transformed_pandas_df.to_dict("records")
576
589
 
577
590
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
578
- safe_id=self.id
591
+ safe_id=self._get_rand_id()
579
592
  )
580
593
 
581
594
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -742,11 +755,18 @@ class MultinomialNB(BaseTransformer):
742
755
  Transformed dataset.
743
756
  """
744
757
  if isinstance(dataset, DataFrame):
758
+ expected_type_inferred = ""
759
+ # when it is classifier, infer the datatype from label columns
760
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
761
+ expected_type_inferred = convert_sp_to_sf_type(
762
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
763
+ )
764
+
745
765
  output_df = self._batch_inference(
746
766
  dataset=dataset,
747
767
  inference_method="predict",
748
768
  expected_output_cols_list=self.output_cols,
749
- expected_output_cols_type="",
769
+ expected_output_cols_type=expected_type_inferred,
750
770
  )
751
771
  elif isinstance(dataset, pd.DataFrame):
752
772
  output_df = self._sklearn_inference(
@@ -817,10 +837,10 @@ class MultinomialNB(BaseTransformer):
817
837
 
818
838
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
819
839
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
820
- Returns an empty list if current object is not a classifier or not yet fitted.
840
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
821
841
  """
822
842
  if getattr(self._sklearn_object, "classes_", None) is None:
823
- return []
843
+ return [output_cols_prefix]
824
844
 
825
845
  classes = self._sklearn_object.classes_
826
846
  if isinstance(classes, numpy.ndarray):
@@ -1049,7 +1069,7 @@ class MultinomialNB(BaseTransformer):
1049
1069
  cp.dump(self._sklearn_object, local_score_file)
1050
1070
 
1051
1071
  # Create temp stage to run score.
1052
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1072
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1053
1073
  session = dataset._session
1054
1074
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1055
1075
  SqlResultValidator(
@@ -1063,8 +1083,9 @@ class MultinomialNB(BaseTransformer):
1063
1083
  expected_value=f"Stage area {score_stage_name} successfully created."
1064
1084
  ).validate()
1065
1085
 
1066
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1067
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1086
+ # Use posixpath to construct stage paths
1087
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1088
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1068
1089
  statement_params = telemetry.get_function_usage_statement_params(
1069
1090
  project=_PROJECT,
1070
1091
  subproject=_SUBPROJECT,
@@ -1090,6 +1111,7 @@ class MultinomialNB(BaseTransformer):
1090
1111
  replace=True,
1091
1112
  session=session,
1092
1113
  statement_params=statement_params,
1114
+ anonymous=True
1093
1115
  )
1094
1116
  def score_wrapper_sproc(
1095
1117
  session: Session,
@@ -1097,7 +1119,8 @@ class MultinomialNB(BaseTransformer):
1097
1119
  stage_score_file_name: str,
1098
1120
  input_cols: List[str],
1099
1121
  label_cols: List[str],
1100
- sample_weight_col: Optional[str]
1122
+ sample_weight_col: Optional[str],
1123
+ statement_params: Dict[str, str]
1101
1124
  ) -> float:
1102
1125
  import cloudpickle as cp
1103
1126
  import numpy as np
@@ -1147,14 +1170,14 @@ class MultinomialNB(BaseTransformer):
1147
1170
  api_calls=[Session.call],
1148
1171
  custom_tags=dict([("autogen", True)]),
1149
1172
  )
1150
- score = session.call(
1151
- score_sproc_name,
1173
+ score = score_wrapper_sproc(
1174
+ session,
1152
1175
  query,
1153
1176
  stage_score_file_name,
1154
1177
  identifier.get_unescaped_names(self.input_cols),
1155
1178
  identifier.get_unescaped_names(self.label_cols),
1156
1179
  identifier.get_unescaped_names(self.sample_weight_col),
1157
- statement_params=statement_params,
1180
+ statement_params,
1158
1181
  )
1159
1182
 
1160
1183
  cleanup_temp_files([local_score_file_name])
@@ -1172,18 +1195,20 @@ class MultinomialNB(BaseTransformer):
1172
1195
  if self._sklearn_object._estimator_type == 'classifier':
1173
1196
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1174
1197
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1175
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1198
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1199
+ ([] if self._drop_input_cols else inputs) + outputs)
1176
1200
  # For regressor, the type of predict is float64
1177
1201
  elif self._sklearn_object._estimator_type == 'regressor':
1178
1202
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1179
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1180
-
1203
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1204
+ ([] if self._drop_input_cols else inputs) + outputs)
1181
1205
  for prob_func in PROB_FUNCTIONS:
1182
1206
  if hasattr(self, prob_func):
1183
1207
  output_cols_prefix: str = f"{prob_func}_"
1184
1208
  output_column_names = self._get_output_column_names(output_cols_prefix)
1185
1209
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1186
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1210
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1211
+ ([] if self._drop_input_cols else inputs) + outputs)
1187
1212
 
1188
1213
  @property
1189
1214
  def model_signatures(self) -> Dict[str, ModelSignature]: