snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. snowflake/ml/_internal/env_utils.py +2 -1
  2. snowflake/ml/_internal/file_utils.py +29 -7
  3. snowflake/ml/_internal/telemetry.py +5 -8
  4. snowflake/ml/_internal/utils/uri.py +7 -2
  5. snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
  6. snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
  7. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
  8. snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
  9. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
  10. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
  11. snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
  12. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
  13. snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
  14. snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
  15. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
  16. snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
  17. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
  18. snowflake/ml/model/_deployer.py +14 -27
  19. snowflake/ml/model/_env.py +4 -4
  20. snowflake/ml/model/_handlers/custom.py +14 -2
  21. snowflake/ml/model/_handlers/pytorch.py +186 -0
  22. snowflake/ml/model/_handlers/sklearn.py +14 -9
  23. snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
  24. snowflake/ml/model/_handlers/torchscript.py +180 -0
  25. snowflake/ml/model/_handlers/xgboost.py +19 -9
  26. snowflake/ml/model/_model.py +3 -2
  27. snowflake/ml/model/_model_meta.py +12 -7
  28. snowflake/ml/model/model_signature.py +446 -66
  29. snowflake/ml/model/type_hints.py +23 -4
  30. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
  31. snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
  32. snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
  33. snowflake/ml/modeling/cluster/birch.py +51 -26
  34. snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
  35. snowflake/ml/modeling/cluster/dbscan.py +51 -26
  36. snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
  37. snowflake/ml/modeling/cluster/k_means.py +51 -26
  38. snowflake/ml/modeling/cluster/mean_shift.py +51 -26
  39. snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
  40. snowflake/ml/modeling/cluster/optics.py +51 -26
  41. snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
  42. snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
  43. snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
  44. snowflake/ml/modeling/compose/column_transformer.py +51 -26
  45. snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
  46. snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
  47. snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
  48. snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
  49. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
  50. snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
  51. snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
  52. snowflake/ml/modeling/covariance/oas.py +51 -26
  53. snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
  54. snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
  55. snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
  56. snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
  57. snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
  58. snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
  59. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
  60. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
  61. snowflake/ml/modeling/decomposition/pca.py +51 -26
  62. snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
  63. snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
  64. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
  65. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
  66. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
  67. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
  68. snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
  69. snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
  70. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
  71. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
  72. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
  73. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
  74. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
  75. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
  76. snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
  77. snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
  78. snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
  79. snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
  80. snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
  81. snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
  82. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
  83. snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
  84. snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
  85. snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
  86. snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
  87. snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
  88. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
  89. snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
  90. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
  91. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
  92. snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
  93. snowflake/ml/modeling/impute/knn_imputer.py +51 -26
  94. snowflake/ml/modeling/impute/missing_indicator.py +51 -26
  95. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
  96. snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
  97. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
  98. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
  99. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
  100. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
  101. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
  102. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
  103. snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
  104. snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
  105. snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
  106. snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
  107. snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
  108. snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
  109. snowflake/ml/modeling/linear_model/lars.py +51 -26
  110. snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
  111. snowflake/ml/modeling/linear_model/lasso.py +51 -26
  112. snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
  113. snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
  114. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
  115. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
  116. snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
  117. snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
  118. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
  119. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
  120. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
  121. snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
  122. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
  123. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
  124. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
  125. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
  126. snowflake/ml/modeling/linear_model/perceptron.py +51 -26
  127. snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
  128. snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
  129. snowflake/ml/modeling/linear_model/ridge.py +51 -26
  130. snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
  131. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
  132. snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
  133. snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
  134. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
  135. snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
  136. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
  137. snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
  138. snowflake/ml/modeling/manifold/isomap.py +51 -26
  139. snowflake/ml/modeling/manifold/mds.py +51 -26
  140. snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
  141. snowflake/ml/modeling/manifold/tsne.py +51 -26
  142. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
  143. snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
  144. snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
  145. snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
  146. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
  147. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
  148. snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
  149. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
  150. snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
  151. snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
  152. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
  153. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
  154. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
  155. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
  156. snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
  157. snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
  158. snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
  159. snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
  160. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
  161. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
  162. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
  163. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
  164. snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
  165. snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
  166. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
  167. snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
  168. snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
  169. snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
  170. snowflake/ml/modeling/svm/linear_svc.py +51 -26
  171. snowflake/ml/modeling/svm/linear_svr.py +51 -26
  172. snowflake/ml/modeling/svm/nu_svc.py +51 -26
  173. snowflake/ml/modeling/svm/nu_svr.py +51 -26
  174. snowflake/ml/modeling/svm/svc.py +51 -26
  175. snowflake/ml/modeling/svm/svr.py +51 -26
  176. snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
  177. snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
  178. snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
  179. snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
  180. snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
  181. snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
  182. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
  183. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
  184. snowflake/ml/registry/model_registry.py +74 -56
  185. snowflake/ml/version.py +1 -1
  186. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
  187. snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
  188. snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
  189. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -211,7 +213,6 @@ class CategoricalNB(BaseTransformer):
211
213
  sample_weight_col: Optional[str] = None,
212
214
  ) -> None:
213
215
  super().__init__()
214
- self.id = str(uuid4()).replace("-", "_").upper()
215
216
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
216
217
 
217
218
  self._deps = list(deps)
@@ -235,6 +236,15 @@ class CategoricalNB(BaseTransformer):
235
236
  self.set_drop_input_cols(drop_input_cols)
236
237
  self.set_sample_weight_col(sample_weight_col)
237
238
 
239
+ def _get_rand_id(self) -> str:
240
+ """
241
+ Generate random id to be used in sproc and stage names.
242
+
243
+ Returns:
244
+ Random id string usable in sproc, table, and stage names.
245
+ """
246
+ return str(uuid4()).replace("-", "_").upper()
247
+
238
248
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
239
249
  """
240
250
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -313,7 +323,7 @@ class CategoricalNB(BaseTransformer):
313
323
  cp.dump(self._sklearn_object, local_transform_file)
314
324
 
315
325
  # Create temp stage to run fit.
316
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
326
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
317
327
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
318
328
  SqlResultValidator(
319
329
  session=session,
@@ -326,11 +336,12 @@ class CategoricalNB(BaseTransformer):
326
336
  expected_value=f"Stage area {transform_stage_name} successfully created."
327
337
  ).validate()
328
338
 
329
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
339
+ # Use posixpath to construct stage paths
340
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
341
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
330
342
  local_result_file_name = get_temp_file_path()
331
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
332
343
 
333
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
344
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
334
345
  statement_params = telemetry.get_function_usage_statement_params(
335
346
  project=_PROJECT,
336
347
  subproject=_SUBPROJECT,
@@ -356,6 +367,7 @@ class CategoricalNB(BaseTransformer):
356
367
  replace=True,
357
368
  session=session,
358
369
  statement_params=statement_params,
370
+ anonymous=True
359
371
  )
360
372
  def fit_wrapper_sproc(
361
373
  session: Session,
@@ -364,7 +376,8 @@ class CategoricalNB(BaseTransformer):
364
376
  stage_result_file_name: str,
365
377
  input_cols: List[str],
366
378
  label_cols: List[str],
367
- sample_weight_col: Optional[str]
379
+ sample_weight_col: Optional[str],
380
+ statement_params: Dict[str, str]
368
381
  ) -> str:
369
382
  import cloudpickle as cp
370
383
  import numpy as np
@@ -431,15 +444,15 @@ class CategoricalNB(BaseTransformer):
431
444
  api_calls=[Session.call],
432
445
  custom_tags=dict([("autogen", True)]),
433
446
  )
434
- sproc_export_file_name = session.call(
435
- fit_sproc_name,
447
+ sproc_export_file_name = fit_wrapper_sproc(
448
+ session,
436
449
  query,
437
450
  stage_transform_file_name,
438
451
  stage_result_file_name,
439
452
  identifier.get_unescaped_names(self.input_cols),
440
453
  identifier.get_unescaped_names(self.label_cols),
441
454
  identifier.get_unescaped_names(self.sample_weight_col),
442
- statement_params=statement_params,
455
+ statement_params,
443
456
  )
444
457
 
445
458
  if "|" in sproc_export_file_name:
@@ -449,7 +462,7 @@ class CategoricalNB(BaseTransformer):
449
462
  print("\n".join(fields[1:]))
450
463
 
451
464
  session.file.get(
452
- os.path.join(stage_result_file_name, sproc_export_file_name),
465
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
453
466
  local_result_file_name,
454
467
  statement_params=statement_params
455
468
  )
@@ -495,7 +508,7 @@ class CategoricalNB(BaseTransformer):
495
508
 
496
509
  # Register vectorized UDF for batch inference
497
510
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
498
- safe_id=self.id, method=inference_method)
511
+ safe_id=self._get_rand_id(), method=inference_method)
499
512
 
500
513
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
501
514
  # will try to pickle all of self which fails.
@@ -587,7 +600,7 @@ class CategoricalNB(BaseTransformer):
587
600
  return transformed_pandas_df.to_dict("records")
588
601
 
589
602
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
590
- safe_id=self.id
603
+ safe_id=self._get_rand_id()
591
604
  )
592
605
 
593
606
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -754,11 +767,18 @@ class CategoricalNB(BaseTransformer):
754
767
  Transformed dataset.
755
768
  """
756
769
  if isinstance(dataset, DataFrame):
770
+ expected_type_inferred = ""
771
+ # when it is classifier, infer the datatype from label columns
772
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
773
+ expected_type_inferred = convert_sp_to_sf_type(
774
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
775
+ )
776
+
757
777
  output_df = self._batch_inference(
758
778
  dataset=dataset,
759
779
  inference_method="predict",
760
780
  expected_output_cols_list=self.output_cols,
761
- expected_output_cols_type="",
781
+ expected_output_cols_type=expected_type_inferred,
762
782
  )
763
783
  elif isinstance(dataset, pd.DataFrame):
764
784
  output_df = self._sklearn_inference(
@@ -829,10 +849,10 @@ class CategoricalNB(BaseTransformer):
829
849
 
830
850
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
831
851
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
832
- Returns an empty list if current object is not a classifier or not yet fitted.
852
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
833
853
  """
834
854
  if getattr(self._sklearn_object, "classes_", None) is None:
835
- return []
855
+ return [output_cols_prefix]
836
856
 
837
857
  classes = self._sklearn_object.classes_
838
858
  if isinstance(classes, numpy.ndarray):
@@ -1061,7 +1081,7 @@ class CategoricalNB(BaseTransformer):
1061
1081
  cp.dump(self._sklearn_object, local_score_file)
1062
1082
 
1063
1083
  # Create temp stage to run score.
1064
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1084
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1065
1085
  session = dataset._session
1066
1086
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1067
1087
  SqlResultValidator(
@@ -1075,8 +1095,9 @@ class CategoricalNB(BaseTransformer):
1075
1095
  expected_value=f"Stage area {score_stage_name} successfully created."
1076
1096
  ).validate()
1077
1097
 
1078
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1079
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1098
+ # Use posixpath to construct stage paths
1099
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1100
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1080
1101
  statement_params = telemetry.get_function_usage_statement_params(
1081
1102
  project=_PROJECT,
1082
1103
  subproject=_SUBPROJECT,
@@ -1102,6 +1123,7 @@ class CategoricalNB(BaseTransformer):
1102
1123
  replace=True,
1103
1124
  session=session,
1104
1125
  statement_params=statement_params,
1126
+ anonymous=True
1105
1127
  )
1106
1128
  def score_wrapper_sproc(
1107
1129
  session: Session,
@@ -1109,7 +1131,8 @@ class CategoricalNB(BaseTransformer):
1109
1131
  stage_score_file_name: str,
1110
1132
  input_cols: List[str],
1111
1133
  label_cols: List[str],
1112
- sample_weight_col: Optional[str]
1134
+ sample_weight_col: Optional[str],
1135
+ statement_params: Dict[str, str]
1113
1136
  ) -> float:
1114
1137
  import cloudpickle as cp
1115
1138
  import numpy as np
@@ -1159,14 +1182,14 @@ class CategoricalNB(BaseTransformer):
1159
1182
  api_calls=[Session.call],
1160
1183
  custom_tags=dict([("autogen", True)]),
1161
1184
  )
1162
- score = session.call(
1163
- score_sproc_name,
1185
+ score = score_wrapper_sproc(
1186
+ session,
1164
1187
  query,
1165
1188
  stage_score_file_name,
1166
1189
  identifier.get_unescaped_names(self.input_cols),
1167
1190
  identifier.get_unescaped_names(self.label_cols),
1168
1191
  identifier.get_unescaped_names(self.sample_weight_col),
1169
- statement_params=statement_params,
1192
+ statement_params,
1170
1193
  )
1171
1194
 
1172
1195
  cleanup_temp_files([local_score_file_name])
@@ -1184,18 +1207,20 @@ class CategoricalNB(BaseTransformer):
1184
1207
  if self._sklearn_object._estimator_type == 'classifier':
1185
1208
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1186
1209
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1187
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1210
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1211
+ ([] if self._drop_input_cols else inputs) + outputs)
1188
1212
  # For regressor, the type of predict is float64
1189
1213
  elif self._sklearn_object._estimator_type == 'regressor':
1190
1214
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1191
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1192
-
1215
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1216
+ ([] if self._drop_input_cols else inputs) + outputs)
1193
1217
  for prob_func in PROB_FUNCTIONS:
1194
1218
  if hasattr(self, prob_func):
1195
1219
  output_cols_prefix: str = f"{prob_func}_"
1196
1220
  output_column_names = self._get_output_column_names(output_cols_prefix)
1197
1221
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1198
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1222
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1223
+ ([] if self._drop_input_cols else inputs) + outputs)
1199
1224
 
1200
1225
  @property
1201
1226
  def model_signatures(self) -> Dict[str, ModelSignature]:
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -205,7 +207,6 @@ class ComplementNB(BaseTransformer):
205
207
  sample_weight_col: Optional[str] = None,
206
208
  ) -> None:
207
209
  super().__init__()
208
- self.id = str(uuid4()).replace("-", "_").upper()
209
210
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
210
211
 
211
212
  self._deps = list(deps)
@@ -229,6 +230,15 @@ class ComplementNB(BaseTransformer):
229
230
  self.set_drop_input_cols(drop_input_cols)
230
231
  self.set_sample_weight_col(sample_weight_col)
231
232
 
233
+ def _get_rand_id(self) -> str:
234
+ """
235
+ Generate random id to be used in sproc and stage names.
236
+
237
+ Returns:
238
+ Random id string usable in sproc, table, and stage names.
239
+ """
240
+ return str(uuid4()).replace("-", "_").upper()
241
+
232
242
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
233
243
  """
234
244
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -307,7 +317,7 @@ class ComplementNB(BaseTransformer):
307
317
  cp.dump(self._sklearn_object, local_transform_file)
308
318
 
309
319
  # Create temp stage to run fit.
310
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
320
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
311
321
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
312
322
  SqlResultValidator(
313
323
  session=session,
@@ -320,11 +330,12 @@ class ComplementNB(BaseTransformer):
320
330
  expected_value=f"Stage area {transform_stage_name} successfully created."
321
331
  ).validate()
322
332
 
323
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
333
+ # Use posixpath to construct stage paths
334
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
335
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
324
336
  local_result_file_name = get_temp_file_path()
325
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
326
337
 
327
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
338
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
328
339
  statement_params = telemetry.get_function_usage_statement_params(
329
340
  project=_PROJECT,
330
341
  subproject=_SUBPROJECT,
@@ -350,6 +361,7 @@ class ComplementNB(BaseTransformer):
350
361
  replace=True,
351
362
  session=session,
352
363
  statement_params=statement_params,
364
+ anonymous=True
353
365
  )
354
366
  def fit_wrapper_sproc(
355
367
  session: Session,
@@ -358,7 +370,8 @@ class ComplementNB(BaseTransformer):
358
370
  stage_result_file_name: str,
359
371
  input_cols: List[str],
360
372
  label_cols: List[str],
361
- sample_weight_col: Optional[str]
373
+ sample_weight_col: Optional[str],
374
+ statement_params: Dict[str, str]
362
375
  ) -> str:
363
376
  import cloudpickle as cp
364
377
  import numpy as np
@@ -425,15 +438,15 @@ class ComplementNB(BaseTransformer):
425
438
  api_calls=[Session.call],
426
439
  custom_tags=dict([("autogen", True)]),
427
440
  )
428
- sproc_export_file_name = session.call(
429
- fit_sproc_name,
441
+ sproc_export_file_name = fit_wrapper_sproc(
442
+ session,
430
443
  query,
431
444
  stage_transform_file_name,
432
445
  stage_result_file_name,
433
446
  identifier.get_unescaped_names(self.input_cols),
434
447
  identifier.get_unescaped_names(self.label_cols),
435
448
  identifier.get_unescaped_names(self.sample_weight_col),
436
- statement_params=statement_params,
449
+ statement_params,
437
450
  )
438
451
 
439
452
  if "|" in sproc_export_file_name:
@@ -443,7 +456,7 @@ class ComplementNB(BaseTransformer):
443
456
  print("\n".join(fields[1:]))
444
457
 
445
458
  session.file.get(
446
- os.path.join(stage_result_file_name, sproc_export_file_name),
459
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
447
460
  local_result_file_name,
448
461
  statement_params=statement_params
449
462
  )
@@ -489,7 +502,7 @@ class ComplementNB(BaseTransformer):
489
502
 
490
503
  # Register vectorized UDF for batch inference
491
504
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
492
- safe_id=self.id, method=inference_method)
505
+ safe_id=self._get_rand_id(), method=inference_method)
493
506
 
494
507
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
495
508
  # will try to pickle all of self which fails.
@@ -581,7 +594,7 @@ class ComplementNB(BaseTransformer):
581
594
  return transformed_pandas_df.to_dict("records")
582
595
 
583
596
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
584
- safe_id=self.id
597
+ safe_id=self._get_rand_id()
585
598
  )
586
599
 
587
600
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -748,11 +761,18 @@ class ComplementNB(BaseTransformer):
748
761
  Transformed dataset.
749
762
  """
750
763
  if isinstance(dataset, DataFrame):
764
+ expected_type_inferred = ""
765
+ # when it is classifier, infer the datatype from label columns
766
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
767
+ expected_type_inferred = convert_sp_to_sf_type(
768
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
769
+ )
770
+
751
771
  output_df = self._batch_inference(
752
772
  dataset=dataset,
753
773
  inference_method="predict",
754
774
  expected_output_cols_list=self.output_cols,
755
- expected_output_cols_type="",
775
+ expected_output_cols_type=expected_type_inferred,
756
776
  )
757
777
  elif isinstance(dataset, pd.DataFrame):
758
778
  output_df = self._sklearn_inference(
@@ -823,10 +843,10 @@ class ComplementNB(BaseTransformer):
823
843
 
824
844
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
825
845
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
826
- Returns an empty list if current object is not a classifier or not yet fitted.
846
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
827
847
  """
828
848
  if getattr(self._sklearn_object, "classes_", None) is None:
829
- return []
849
+ return [output_cols_prefix]
830
850
 
831
851
  classes = self._sklearn_object.classes_
832
852
  if isinstance(classes, numpy.ndarray):
@@ -1055,7 +1075,7 @@ class ComplementNB(BaseTransformer):
1055
1075
  cp.dump(self._sklearn_object, local_score_file)
1056
1076
 
1057
1077
  # Create temp stage to run score.
1058
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1078
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1059
1079
  session = dataset._session
1060
1080
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1061
1081
  SqlResultValidator(
@@ -1069,8 +1089,9 @@ class ComplementNB(BaseTransformer):
1069
1089
  expected_value=f"Stage area {score_stage_name} successfully created."
1070
1090
  ).validate()
1071
1091
 
1072
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1073
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1092
+ # Use posixpath to construct stage paths
1093
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1094
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1074
1095
  statement_params = telemetry.get_function_usage_statement_params(
1075
1096
  project=_PROJECT,
1076
1097
  subproject=_SUBPROJECT,
@@ -1096,6 +1117,7 @@ class ComplementNB(BaseTransformer):
1096
1117
  replace=True,
1097
1118
  session=session,
1098
1119
  statement_params=statement_params,
1120
+ anonymous=True
1099
1121
  )
1100
1122
  def score_wrapper_sproc(
1101
1123
  session: Session,
@@ -1103,7 +1125,8 @@ class ComplementNB(BaseTransformer):
1103
1125
  stage_score_file_name: str,
1104
1126
  input_cols: List[str],
1105
1127
  label_cols: List[str],
1106
- sample_weight_col: Optional[str]
1128
+ sample_weight_col: Optional[str],
1129
+ statement_params: Dict[str, str]
1107
1130
  ) -> float:
1108
1131
  import cloudpickle as cp
1109
1132
  import numpy as np
@@ -1153,14 +1176,14 @@ class ComplementNB(BaseTransformer):
1153
1176
  api_calls=[Session.call],
1154
1177
  custom_tags=dict([("autogen", True)]),
1155
1178
  )
1156
- score = session.call(
1157
- score_sproc_name,
1179
+ score = score_wrapper_sproc(
1180
+ session,
1158
1181
  query,
1159
1182
  stage_score_file_name,
1160
1183
  identifier.get_unescaped_names(self.input_cols),
1161
1184
  identifier.get_unescaped_names(self.label_cols),
1162
1185
  identifier.get_unescaped_names(self.sample_weight_col),
1163
- statement_params=statement_params,
1186
+ statement_params,
1164
1187
  )
1165
1188
 
1166
1189
  cleanup_temp_files([local_score_file_name])
@@ -1178,18 +1201,20 @@ class ComplementNB(BaseTransformer):
1178
1201
  if self._sklearn_object._estimator_type == 'classifier':
1179
1202
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1180
1203
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1181
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1204
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1205
+ ([] if self._drop_input_cols else inputs) + outputs)
1182
1206
  # For regressor, the type of predict is float64
1183
1207
  elif self._sklearn_object._estimator_type == 'regressor':
1184
1208
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1185
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1186
-
1209
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1210
+ ([] if self._drop_input_cols else inputs) + outputs)
1187
1211
  for prob_func in PROB_FUNCTIONS:
1188
1212
  if hasattr(self, prob_func):
1189
1213
  output_cols_prefix: str = f"{prob_func}_"
1190
1214
  output_column_names = self._get_output_column_names(output_cols_prefix)
1191
1215
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1192
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1216
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1217
+ ([] if self._drop_input_cols else inputs) + outputs)
1193
1218
 
1194
1219
  @property
1195
1220
  def model_signatures(self) -> Dict[str, ModelSignature]: