snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. snowflake/ml/_internal/env_utils.py +2 -1
  2. snowflake/ml/_internal/file_utils.py +29 -7
  3. snowflake/ml/_internal/telemetry.py +5 -8
  4. snowflake/ml/_internal/utils/uri.py +7 -2
  5. snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
  6. snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
  7. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
  8. snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
  9. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
  10. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
  11. snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
  12. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
  13. snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
  14. snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
  15. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
  16. snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
  17. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
  18. snowflake/ml/model/_deployer.py +14 -27
  19. snowflake/ml/model/_env.py +4 -4
  20. snowflake/ml/model/_handlers/custom.py +14 -2
  21. snowflake/ml/model/_handlers/pytorch.py +186 -0
  22. snowflake/ml/model/_handlers/sklearn.py +14 -9
  23. snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
  24. snowflake/ml/model/_handlers/torchscript.py +180 -0
  25. snowflake/ml/model/_handlers/xgboost.py +19 -9
  26. snowflake/ml/model/_model.py +3 -2
  27. snowflake/ml/model/_model_meta.py +12 -7
  28. snowflake/ml/model/model_signature.py +446 -66
  29. snowflake/ml/model/type_hints.py +23 -4
  30. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
  31. snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
  32. snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
  33. snowflake/ml/modeling/cluster/birch.py +51 -26
  34. snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
  35. snowflake/ml/modeling/cluster/dbscan.py +51 -26
  36. snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
  37. snowflake/ml/modeling/cluster/k_means.py +51 -26
  38. snowflake/ml/modeling/cluster/mean_shift.py +51 -26
  39. snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
  40. snowflake/ml/modeling/cluster/optics.py +51 -26
  41. snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
  42. snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
  43. snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
  44. snowflake/ml/modeling/compose/column_transformer.py +51 -26
  45. snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
  46. snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
  47. snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
  48. snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
  49. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
  50. snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
  51. snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
  52. snowflake/ml/modeling/covariance/oas.py +51 -26
  53. snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
  54. snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
  55. snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
  56. snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
  57. snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
  58. snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
  59. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
  60. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
  61. snowflake/ml/modeling/decomposition/pca.py +51 -26
  62. snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
  63. snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
  64. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
  65. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
  66. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
  67. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
  68. snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
  69. snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
  70. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
  71. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
  72. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
  73. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
  74. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
  75. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
  76. snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
  77. snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
  78. snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
  79. snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
  80. snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
  81. snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
  82. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
  83. snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
  84. snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
  85. snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
  86. snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
  87. snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
  88. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
  89. snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
  90. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
  91. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
  92. snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
  93. snowflake/ml/modeling/impute/knn_imputer.py +51 -26
  94. snowflake/ml/modeling/impute/missing_indicator.py +51 -26
  95. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
  96. snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
  97. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
  98. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
  99. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
  100. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
  101. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
  102. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
  103. snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
  104. snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
  105. snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
  106. snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
  107. snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
  108. snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
  109. snowflake/ml/modeling/linear_model/lars.py +51 -26
  110. snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
  111. snowflake/ml/modeling/linear_model/lasso.py +51 -26
  112. snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
  113. snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
  114. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
  115. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
  116. snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
  117. snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
  118. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
  119. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
  120. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
  121. snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
  122. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
  123. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
  124. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
  125. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
  126. snowflake/ml/modeling/linear_model/perceptron.py +51 -26
  127. snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
  128. snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
  129. snowflake/ml/modeling/linear_model/ridge.py +51 -26
  130. snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
  131. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
  132. snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
  133. snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
  134. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
  135. snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
  136. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
  137. snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
  138. snowflake/ml/modeling/manifold/isomap.py +51 -26
  139. snowflake/ml/modeling/manifold/mds.py +51 -26
  140. snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
  141. snowflake/ml/modeling/manifold/tsne.py +51 -26
  142. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
  143. snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
  144. snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
  145. snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
  146. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
  147. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
  148. snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
  149. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
  150. snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
  151. snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
  152. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
  153. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
  154. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
  155. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
  156. snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
  157. snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
  158. snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
  159. snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
  160. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
  161. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
  162. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
  163. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
  164. snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
  165. snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
  166. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
  167. snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
  168. snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
  169. snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
  170. snowflake/ml/modeling/svm/linear_svc.py +51 -26
  171. snowflake/ml/modeling/svm/linear_svr.py +51 -26
  172. snowflake/ml/modeling/svm/nu_svc.py +51 -26
  173. snowflake/ml/modeling/svm/nu_svr.py +51 -26
  174. snowflake/ml/modeling/svm/svc.py +51 -26
  175. snowflake/ml/modeling/svm/svr.py +51 -26
  176. snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
  177. snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
  178. snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
  179. snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
  180. snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
  181. snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
  182. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
  183. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
  184. snowflake/ml/registry/model_registry.py +74 -56
  185. snowflake/ml/version.py +1 -1
  186. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
  187. snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
  188. snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
  189. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -199,7 +201,6 @@ class NearestCentroid(BaseTransformer):
199
201
  sample_weight_col: Optional[str] = None,
200
202
  ) -> None:
201
203
  super().__init__()
202
- self.id = str(uuid4()).replace("-", "_").upper()
203
204
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
204
205
 
205
206
  self._deps = list(deps)
@@ -220,6 +221,15 @@ class NearestCentroid(BaseTransformer):
220
221
  self.set_drop_input_cols(drop_input_cols)
221
222
  self.set_sample_weight_col(sample_weight_col)
222
223
 
224
+ def _get_rand_id(self) -> str:
225
+ """
226
+ Generate random id to be used in sproc and stage names.
227
+
228
+ Returns:
229
+ Random id string usable in sproc, table, and stage names.
230
+ """
231
+ return str(uuid4()).replace("-", "_").upper()
232
+
223
233
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
224
234
  """
225
235
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -298,7 +308,7 @@ class NearestCentroid(BaseTransformer):
298
308
  cp.dump(self._sklearn_object, local_transform_file)
299
309
 
300
310
  # Create temp stage to run fit.
301
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
311
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
302
312
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
303
313
  SqlResultValidator(
304
314
  session=session,
@@ -311,11 +321,12 @@ class NearestCentroid(BaseTransformer):
311
321
  expected_value=f"Stage area {transform_stage_name} successfully created."
312
322
  ).validate()
313
323
 
314
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
324
+ # Use posixpath to construct stage paths
325
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
326
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
315
327
  local_result_file_name = get_temp_file_path()
316
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
317
328
 
318
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
329
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
319
330
  statement_params = telemetry.get_function_usage_statement_params(
320
331
  project=_PROJECT,
321
332
  subproject=_SUBPROJECT,
@@ -341,6 +352,7 @@ class NearestCentroid(BaseTransformer):
341
352
  replace=True,
342
353
  session=session,
343
354
  statement_params=statement_params,
355
+ anonymous=True
344
356
  )
345
357
  def fit_wrapper_sproc(
346
358
  session: Session,
@@ -349,7 +361,8 @@ class NearestCentroid(BaseTransformer):
349
361
  stage_result_file_name: str,
350
362
  input_cols: List[str],
351
363
  label_cols: List[str],
352
- sample_weight_col: Optional[str]
364
+ sample_weight_col: Optional[str],
365
+ statement_params: Dict[str, str]
353
366
  ) -> str:
354
367
  import cloudpickle as cp
355
368
  import numpy as np
@@ -416,15 +429,15 @@ class NearestCentroid(BaseTransformer):
416
429
  api_calls=[Session.call],
417
430
  custom_tags=dict([("autogen", True)]),
418
431
  )
419
- sproc_export_file_name = session.call(
420
- fit_sproc_name,
432
+ sproc_export_file_name = fit_wrapper_sproc(
433
+ session,
421
434
  query,
422
435
  stage_transform_file_name,
423
436
  stage_result_file_name,
424
437
  identifier.get_unescaped_names(self.input_cols),
425
438
  identifier.get_unescaped_names(self.label_cols),
426
439
  identifier.get_unescaped_names(self.sample_weight_col),
427
- statement_params=statement_params,
440
+ statement_params,
428
441
  )
429
442
 
430
443
  if "|" in sproc_export_file_name:
@@ -434,7 +447,7 @@ class NearestCentroid(BaseTransformer):
434
447
  print("\n".join(fields[1:]))
435
448
 
436
449
  session.file.get(
437
- os.path.join(stage_result_file_name, sproc_export_file_name),
450
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
438
451
  local_result_file_name,
439
452
  statement_params=statement_params
440
453
  )
@@ -480,7 +493,7 @@ class NearestCentroid(BaseTransformer):
480
493
 
481
494
  # Register vectorized UDF for batch inference
482
495
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
483
- safe_id=self.id, method=inference_method)
496
+ safe_id=self._get_rand_id(), method=inference_method)
484
497
 
485
498
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
486
499
  # will try to pickle all of self which fails.
@@ -572,7 +585,7 @@ class NearestCentroid(BaseTransformer):
572
585
  return transformed_pandas_df.to_dict("records")
573
586
 
574
587
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
575
- safe_id=self.id
588
+ safe_id=self._get_rand_id()
576
589
  )
577
590
 
578
591
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -739,11 +752,18 @@ class NearestCentroid(BaseTransformer):
739
752
  Transformed dataset.
740
753
  """
741
754
  if isinstance(dataset, DataFrame):
755
+ expected_type_inferred = ""
756
+ # when it is classifier, infer the datatype from label columns
757
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
758
+ expected_type_inferred = convert_sp_to_sf_type(
759
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
760
+ )
761
+
742
762
  output_df = self._batch_inference(
743
763
  dataset=dataset,
744
764
  inference_method="predict",
745
765
  expected_output_cols_list=self.output_cols,
746
- expected_output_cols_type="",
766
+ expected_output_cols_type=expected_type_inferred,
747
767
  )
748
768
  elif isinstance(dataset, pd.DataFrame):
749
769
  output_df = self._sklearn_inference(
@@ -814,10 +834,10 @@ class NearestCentroid(BaseTransformer):
814
834
 
815
835
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
816
836
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
817
- Returns an empty list if current object is not a classifier or not yet fitted.
837
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
818
838
  """
819
839
  if getattr(self._sklearn_object, "classes_", None) is None:
820
- return []
840
+ return [output_cols_prefix]
821
841
 
822
842
  classes = self._sklearn_object.classes_
823
843
  if isinstance(classes, numpy.ndarray):
@@ -1042,7 +1062,7 @@ class NearestCentroid(BaseTransformer):
1042
1062
  cp.dump(self._sklearn_object, local_score_file)
1043
1063
 
1044
1064
  # Create temp stage to run score.
1045
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1065
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1046
1066
  session = dataset._session
1047
1067
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1048
1068
  SqlResultValidator(
@@ -1056,8 +1076,9 @@ class NearestCentroid(BaseTransformer):
1056
1076
  expected_value=f"Stage area {score_stage_name} successfully created."
1057
1077
  ).validate()
1058
1078
 
1059
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1060
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1079
+ # Use posixpath to construct stage paths
1080
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1081
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1061
1082
  statement_params = telemetry.get_function_usage_statement_params(
1062
1083
  project=_PROJECT,
1063
1084
  subproject=_SUBPROJECT,
@@ -1083,6 +1104,7 @@ class NearestCentroid(BaseTransformer):
1083
1104
  replace=True,
1084
1105
  session=session,
1085
1106
  statement_params=statement_params,
1107
+ anonymous=True
1086
1108
  )
1087
1109
  def score_wrapper_sproc(
1088
1110
  session: Session,
@@ -1090,7 +1112,8 @@ class NearestCentroid(BaseTransformer):
1090
1112
  stage_score_file_name: str,
1091
1113
  input_cols: List[str],
1092
1114
  label_cols: List[str],
1093
- sample_weight_col: Optional[str]
1115
+ sample_weight_col: Optional[str],
1116
+ statement_params: Dict[str, str]
1094
1117
  ) -> float:
1095
1118
  import cloudpickle as cp
1096
1119
  import numpy as np
@@ -1140,14 +1163,14 @@ class NearestCentroid(BaseTransformer):
1140
1163
  api_calls=[Session.call],
1141
1164
  custom_tags=dict([("autogen", True)]),
1142
1165
  )
1143
- score = session.call(
1144
- score_sproc_name,
1166
+ score = score_wrapper_sproc(
1167
+ session,
1145
1168
  query,
1146
1169
  stage_score_file_name,
1147
1170
  identifier.get_unescaped_names(self.input_cols),
1148
1171
  identifier.get_unescaped_names(self.label_cols),
1149
1172
  identifier.get_unescaped_names(self.sample_weight_col),
1150
- statement_params=statement_params,
1173
+ statement_params,
1151
1174
  )
1152
1175
 
1153
1176
  cleanup_temp_files([local_score_file_name])
@@ -1165,18 +1188,20 @@ class NearestCentroid(BaseTransformer):
1165
1188
  if self._sklearn_object._estimator_type == 'classifier':
1166
1189
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1167
1190
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1168
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1191
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1192
+ ([] if self._drop_input_cols else inputs) + outputs)
1169
1193
  # For regressor, the type of predict is float64
1170
1194
  elif self._sklearn_object._estimator_type == 'regressor':
1171
1195
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1172
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1173
-
1196
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1197
+ ([] if self._drop_input_cols else inputs) + outputs)
1174
1198
  for prob_func in PROB_FUNCTIONS:
1175
1199
  if hasattr(self, prob_func):
1176
1200
  output_cols_prefix: str = f"{prob_func}_"
1177
1201
  output_column_names = self._get_output_column_names(output_cols_prefix)
1178
1202
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1179
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1203
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1204
+ ([] if self._drop_input_cols else inputs) + outputs)
1180
1205
 
1181
1206
  @property
1182
1207
  def model_signatures(self) -> Dict[str, ModelSignature]:
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -245,7 +247,6 @@ class NearestNeighbors(BaseTransformer):
245
247
  sample_weight_col: Optional[str] = None,
246
248
  ) -> None:
247
249
  super().__init__()
248
- self.id = str(uuid4()).replace("-", "_").upper()
249
250
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
250
251
 
251
252
  self._deps = list(deps)
@@ -272,6 +273,15 @@ class NearestNeighbors(BaseTransformer):
272
273
  self.set_drop_input_cols(drop_input_cols)
273
274
  self.set_sample_weight_col(sample_weight_col)
274
275
 
276
+ def _get_rand_id(self) -> str:
277
+ """
278
+ Generate random id to be used in sproc and stage names.
279
+
280
+ Returns:
281
+ Random id string usable in sproc, table, and stage names.
282
+ """
283
+ return str(uuid4()).replace("-", "_").upper()
284
+
275
285
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
276
286
  """
277
287
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -350,7 +360,7 @@ class NearestNeighbors(BaseTransformer):
350
360
  cp.dump(self._sklearn_object, local_transform_file)
351
361
 
352
362
  # Create temp stage to run fit.
353
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
363
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
354
364
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
355
365
  SqlResultValidator(
356
366
  session=session,
@@ -363,11 +373,12 @@ class NearestNeighbors(BaseTransformer):
363
373
  expected_value=f"Stage area {transform_stage_name} successfully created."
364
374
  ).validate()
365
375
 
366
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
376
+ # Use posixpath to construct stage paths
377
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
378
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
367
379
  local_result_file_name = get_temp_file_path()
368
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
369
380
 
370
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
381
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
371
382
  statement_params = telemetry.get_function_usage_statement_params(
372
383
  project=_PROJECT,
373
384
  subproject=_SUBPROJECT,
@@ -393,6 +404,7 @@ class NearestNeighbors(BaseTransformer):
393
404
  replace=True,
394
405
  session=session,
395
406
  statement_params=statement_params,
407
+ anonymous=True
396
408
  )
397
409
  def fit_wrapper_sproc(
398
410
  session: Session,
@@ -401,7 +413,8 @@ class NearestNeighbors(BaseTransformer):
401
413
  stage_result_file_name: str,
402
414
  input_cols: List[str],
403
415
  label_cols: List[str],
404
- sample_weight_col: Optional[str]
416
+ sample_weight_col: Optional[str],
417
+ statement_params: Dict[str, str]
405
418
  ) -> str:
406
419
  import cloudpickle as cp
407
420
  import numpy as np
@@ -468,15 +481,15 @@ class NearestNeighbors(BaseTransformer):
468
481
  api_calls=[Session.call],
469
482
  custom_tags=dict([("autogen", True)]),
470
483
  )
471
- sproc_export_file_name = session.call(
472
- fit_sproc_name,
484
+ sproc_export_file_name = fit_wrapper_sproc(
485
+ session,
473
486
  query,
474
487
  stage_transform_file_name,
475
488
  stage_result_file_name,
476
489
  identifier.get_unescaped_names(self.input_cols),
477
490
  identifier.get_unescaped_names(self.label_cols),
478
491
  identifier.get_unescaped_names(self.sample_weight_col),
479
- statement_params=statement_params,
492
+ statement_params,
480
493
  )
481
494
 
482
495
  if "|" in sproc_export_file_name:
@@ -486,7 +499,7 @@ class NearestNeighbors(BaseTransformer):
486
499
  print("\n".join(fields[1:]))
487
500
 
488
501
  session.file.get(
489
- os.path.join(stage_result_file_name, sproc_export_file_name),
502
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
490
503
  local_result_file_name,
491
504
  statement_params=statement_params
492
505
  )
@@ -532,7 +545,7 @@ class NearestNeighbors(BaseTransformer):
532
545
 
533
546
  # Register vectorized UDF for batch inference
534
547
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
535
- safe_id=self.id, method=inference_method)
548
+ safe_id=self._get_rand_id(), method=inference_method)
536
549
 
537
550
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
538
551
  # will try to pickle all of self which fails.
@@ -624,7 +637,7 @@ class NearestNeighbors(BaseTransformer):
624
637
  return transformed_pandas_df.to_dict("records")
625
638
 
626
639
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
627
- safe_id=self.id
640
+ safe_id=self._get_rand_id()
628
641
  )
629
642
 
630
643
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -789,11 +802,18 @@ class NearestNeighbors(BaseTransformer):
789
802
  Transformed dataset.
790
803
  """
791
804
  if isinstance(dataset, DataFrame):
805
+ expected_type_inferred = ""
806
+ # when it is classifier, infer the datatype from label columns
807
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
808
+ expected_type_inferred = convert_sp_to_sf_type(
809
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
810
+ )
811
+
792
812
  output_df = self._batch_inference(
793
813
  dataset=dataset,
794
814
  inference_method="predict",
795
815
  expected_output_cols_list=self.output_cols,
796
- expected_output_cols_type="",
816
+ expected_output_cols_type=expected_type_inferred,
797
817
  )
798
818
  elif isinstance(dataset, pd.DataFrame):
799
819
  output_df = self._sklearn_inference(
@@ -864,10 +884,10 @@ class NearestNeighbors(BaseTransformer):
864
884
 
865
885
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
866
886
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
867
- Returns an empty list if current object is not a classifier or not yet fitted.
887
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
868
888
  """
869
889
  if getattr(self._sklearn_object, "classes_", None) is None:
870
- return []
890
+ return [output_cols_prefix]
871
891
 
872
892
  classes = self._sklearn_object.classes_
873
893
  if isinstance(classes, numpy.ndarray):
@@ -1092,7 +1112,7 @@ class NearestNeighbors(BaseTransformer):
1092
1112
  cp.dump(self._sklearn_object, local_score_file)
1093
1113
 
1094
1114
  # Create temp stage to run score.
1095
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1115
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1096
1116
  session = dataset._session
1097
1117
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1098
1118
  SqlResultValidator(
@@ -1106,8 +1126,9 @@ class NearestNeighbors(BaseTransformer):
1106
1126
  expected_value=f"Stage area {score_stage_name} successfully created."
1107
1127
  ).validate()
1108
1128
 
1109
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1110
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1129
+ # Use posixpath to construct stage paths
1130
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1131
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1111
1132
  statement_params = telemetry.get_function_usage_statement_params(
1112
1133
  project=_PROJECT,
1113
1134
  subproject=_SUBPROJECT,
@@ -1133,6 +1154,7 @@ class NearestNeighbors(BaseTransformer):
1133
1154
  replace=True,
1134
1155
  session=session,
1135
1156
  statement_params=statement_params,
1157
+ anonymous=True
1136
1158
  )
1137
1159
  def score_wrapper_sproc(
1138
1160
  session: Session,
@@ -1140,7 +1162,8 @@ class NearestNeighbors(BaseTransformer):
1140
1162
  stage_score_file_name: str,
1141
1163
  input_cols: List[str],
1142
1164
  label_cols: List[str],
1143
- sample_weight_col: Optional[str]
1165
+ sample_weight_col: Optional[str],
1166
+ statement_params: Dict[str, str]
1144
1167
  ) -> float:
1145
1168
  import cloudpickle as cp
1146
1169
  import numpy as np
@@ -1190,14 +1213,14 @@ class NearestNeighbors(BaseTransformer):
1190
1213
  api_calls=[Session.call],
1191
1214
  custom_tags=dict([("autogen", True)]),
1192
1215
  )
1193
- score = session.call(
1194
- score_sproc_name,
1216
+ score = score_wrapper_sproc(
1217
+ session,
1195
1218
  query,
1196
1219
  stage_score_file_name,
1197
1220
  identifier.get_unescaped_names(self.input_cols),
1198
1221
  identifier.get_unescaped_names(self.label_cols),
1199
1222
  identifier.get_unescaped_names(self.sample_weight_col),
1200
- statement_params=statement_params,
1223
+ statement_params,
1201
1224
  )
1202
1225
 
1203
1226
  cleanup_temp_files([local_score_file_name])
@@ -1215,18 +1238,20 @@ class NearestNeighbors(BaseTransformer):
1215
1238
  if self._sklearn_object._estimator_type == 'classifier':
1216
1239
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1217
1240
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1218
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1241
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1242
+ ([] if self._drop_input_cols else inputs) + outputs)
1219
1243
  # For regressor, the type of predict is float64
1220
1244
  elif self._sklearn_object._estimator_type == 'regressor':
1221
1245
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1222
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1223
-
1246
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1247
+ ([] if self._drop_input_cols else inputs) + outputs)
1224
1248
  for prob_func in PROB_FUNCTIONS:
1225
1249
  if hasattr(self, prob_func):
1226
1250
  output_cols_prefix: str = f"{prob_func}_"
1227
1251
  output_column_names = self._get_output_column_names(output_cols_prefix)
1228
1252
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1229
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1253
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1254
+ ([] if self._drop_input_cols else inputs) + outputs)
1230
1255
 
1231
1256
  @property
1232
1257
  def model_signatures(self) -> Dict[str, ModelSignature]: