snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. snowflake/ml/_internal/env_utils.py +2 -1
  2. snowflake/ml/_internal/file_utils.py +29 -7
  3. snowflake/ml/_internal/telemetry.py +5 -8
  4. snowflake/ml/_internal/utils/uri.py +7 -2
  5. snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
  6. snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
  7. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
  8. snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
  9. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
  10. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
  11. snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
  12. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
  13. snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
  14. snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
  15. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
  16. snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
  17. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
  18. snowflake/ml/model/_deployer.py +14 -27
  19. snowflake/ml/model/_env.py +4 -4
  20. snowflake/ml/model/_handlers/custom.py +14 -2
  21. snowflake/ml/model/_handlers/pytorch.py +186 -0
  22. snowflake/ml/model/_handlers/sklearn.py +14 -9
  23. snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
  24. snowflake/ml/model/_handlers/torchscript.py +180 -0
  25. snowflake/ml/model/_handlers/xgboost.py +19 -9
  26. snowflake/ml/model/_model.py +3 -2
  27. snowflake/ml/model/_model_meta.py +12 -7
  28. snowflake/ml/model/model_signature.py +446 -66
  29. snowflake/ml/model/type_hints.py +23 -4
  30. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
  31. snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
  32. snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
  33. snowflake/ml/modeling/cluster/birch.py +51 -26
  34. snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
  35. snowflake/ml/modeling/cluster/dbscan.py +51 -26
  36. snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
  37. snowflake/ml/modeling/cluster/k_means.py +51 -26
  38. snowflake/ml/modeling/cluster/mean_shift.py +51 -26
  39. snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
  40. snowflake/ml/modeling/cluster/optics.py +51 -26
  41. snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
  42. snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
  43. snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
  44. snowflake/ml/modeling/compose/column_transformer.py +51 -26
  45. snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
  46. snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
  47. snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
  48. snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
  49. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
  50. snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
  51. snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
  52. snowflake/ml/modeling/covariance/oas.py +51 -26
  53. snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
  54. snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
  55. snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
  56. snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
  57. snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
  58. snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
  59. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
  60. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
  61. snowflake/ml/modeling/decomposition/pca.py +51 -26
  62. snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
  63. snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
  64. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
  65. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
  66. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
  67. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
  68. snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
  69. snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
  70. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
  71. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
  72. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
  73. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
  74. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
  75. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
  76. snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
  77. snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
  78. snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
  79. snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
  80. snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
  81. snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
  82. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
  83. snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
  84. snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
  85. snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
  86. snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
  87. snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
  88. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
  89. snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
  90. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
  91. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
  92. snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
  93. snowflake/ml/modeling/impute/knn_imputer.py +51 -26
  94. snowflake/ml/modeling/impute/missing_indicator.py +51 -26
  95. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
  96. snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
  97. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
  98. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
  99. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
  100. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
  101. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
  102. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
  103. snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
  104. snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
  105. snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
  106. snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
  107. snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
  108. snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
  109. snowflake/ml/modeling/linear_model/lars.py +51 -26
  110. snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
  111. snowflake/ml/modeling/linear_model/lasso.py +51 -26
  112. snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
  113. snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
  114. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
  115. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
  116. snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
  117. snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
  118. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
  119. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
  120. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
  121. snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
  122. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
  123. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
  124. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
  125. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
  126. snowflake/ml/modeling/linear_model/perceptron.py +51 -26
  127. snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
  128. snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
  129. snowflake/ml/modeling/linear_model/ridge.py +51 -26
  130. snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
  131. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
  132. snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
  133. snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
  134. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
  135. snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
  136. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
  137. snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
  138. snowflake/ml/modeling/manifold/isomap.py +51 -26
  139. snowflake/ml/modeling/manifold/mds.py +51 -26
  140. snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
  141. snowflake/ml/modeling/manifold/tsne.py +51 -26
  142. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
  143. snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
  144. snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
  145. snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
  146. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
  147. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
  148. snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
  149. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
  150. snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
  151. snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
  152. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
  153. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
  154. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
  155. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
  156. snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
  157. snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
  158. snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
  159. snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
  160. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
  161. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
  162. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
  163. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
  164. snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
  165. snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
  166. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
  167. snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
  168. snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
  169. snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
  170. snowflake/ml/modeling/svm/linear_svc.py +51 -26
  171. snowflake/ml/modeling/svm/linear_svr.py +51 -26
  172. snowflake/ml/modeling/svm/nu_svc.py +51 -26
  173. snowflake/ml/modeling/svm/nu_svr.py +51 -26
  174. snowflake/ml/modeling/svm/svc.py +51 -26
  175. snowflake/ml/modeling/svm/svr.py +51 -26
  176. snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
  177. snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
  178. snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
  179. snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
  180. snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
  181. snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
  182. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
  183. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
  184. snowflake/ml/registry/model_registry.py +74 -56
  185. snowflake/ml/version.py +1 -1
  186. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
  187. snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
  188. snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
  189. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -253,7 +255,6 @@ class KNeighborsClassifier(BaseTransformer):
253
255
  sample_weight_col: Optional[str] = None,
254
256
  ) -> None:
255
257
  super().__init__()
256
- self.id = str(uuid4()).replace("-", "_").upper()
257
258
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
258
259
 
259
260
  self._deps = list(deps)
@@ -280,6 +281,15 @@ class KNeighborsClassifier(BaseTransformer):
280
281
  self.set_drop_input_cols(drop_input_cols)
281
282
  self.set_sample_weight_col(sample_weight_col)
282
283
 
284
+ def _get_rand_id(self) -> str:
285
+ """
286
+ Generate random id to be used in sproc and stage names.
287
+
288
+ Returns:
289
+ Random id string usable in sproc, table, and stage names.
290
+ """
291
+ return str(uuid4()).replace("-", "_").upper()
292
+
283
293
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
284
294
  """
285
295
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -358,7 +368,7 @@ class KNeighborsClassifier(BaseTransformer):
358
368
  cp.dump(self._sklearn_object, local_transform_file)
359
369
 
360
370
  # Create temp stage to run fit.
361
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
371
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
362
372
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
363
373
  SqlResultValidator(
364
374
  session=session,
@@ -371,11 +381,12 @@ class KNeighborsClassifier(BaseTransformer):
371
381
  expected_value=f"Stage area {transform_stage_name} successfully created."
372
382
  ).validate()
373
383
 
374
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
384
+ # Use posixpath to construct stage paths
385
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
386
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
375
387
  local_result_file_name = get_temp_file_path()
376
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
377
388
 
378
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
389
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
379
390
  statement_params = telemetry.get_function_usage_statement_params(
380
391
  project=_PROJECT,
381
392
  subproject=_SUBPROJECT,
@@ -401,6 +412,7 @@ class KNeighborsClassifier(BaseTransformer):
401
412
  replace=True,
402
413
  session=session,
403
414
  statement_params=statement_params,
415
+ anonymous=True
404
416
  )
405
417
  def fit_wrapper_sproc(
406
418
  session: Session,
@@ -409,7 +421,8 @@ class KNeighborsClassifier(BaseTransformer):
409
421
  stage_result_file_name: str,
410
422
  input_cols: List[str],
411
423
  label_cols: List[str],
412
- sample_weight_col: Optional[str]
424
+ sample_weight_col: Optional[str],
425
+ statement_params: Dict[str, str]
413
426
  ) -> str:
414
427
  import cloudpickle as cp
415
428
  import numpy as np
@@ -476,15 +489,15 @@ class KNeighborsClassifier(BaseTransformer):
476
489
  api_calls=[Session.call],
477
490
  custom_tags=dict([("autogen", True)]),
478
491
  )
479
- sproc_export_file_name = session.call(
480
- fit_sproc_name,
492
+ sproc_export_file_name = fit_wrapper_sproc(
493
+ session,
481
494
  query,
482
495
  stage_transform_file_name,
483
496
  stage_result_file_name,
484
497
  identifier.get_unescaped_names(self.input_cols),
485
498
  identifier.get_unescaped_names(self.label_cols),
486
499
  identifier.get_unescaped_names(self.sample_weight_col),
487
- statement_params=statement_params,
500
+ statement_params,
488
501
  )
489
502
 
490
503
  if "|" in sproc_export_file_name:
@@ -494,7 +507,7 @@ class KNeighborsClassifier(BaseTransformer):
494
507
  print("\n".join(fields[1:]))
495
508
 
496
509
  session.file.get(
497
- os.path.join(stage_result_file_name, sproc_export_file_name),
510
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
498
511
  local_result_file_name,
499
512
  statement_params=statement_params
500
513
  )
@@ -540,7 +553,7 @@ class KNeighborsClassifier(BaseTransformer):
540
553
 
541
554
  # Register vectorized UDF for batch inference
542
555
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
543
- safe_id=self.id, method=inference_method)
556
+ safe_id=self._get_rand_id(), method=inference_method)
544
557
 
545
558
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
546
559
  # will try to pickle all of self which fails.
@@ -632,7 +645,7 @@ class KNeighborsClassifier(BaseTransformer):
632
645
  return transformed_pandas_df.to_dict("records")
633
646
 
634
647
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
635
- safe_id=self.id
648
+ safe_id=self._get_rand_id()
636
649
  )
637
650
 
638
651
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -799,11 +812,18 @@ class KNeighborsClassifier(BaseTransformer):
799
812
  Transformed dataset.
800
813
  """
801
814
  if isinstance(dataset, DataFrame):
815
+ expected_type_inferred = ""
816
+ # when it is classifier, infer the datatype from label columns
817
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
818
+ expected_type_inferred = convert_sp_to_sf_type(
819
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
820
+ )
821
+
802
822
  output_df = self._batch_inference(
803
823
  dataset=dataset,
804
824
  inference_method="predict",
805
825
  expected_output_cols_list=self.output_cols,
806
- expected_output_cols_type="",
826
+ expected_output_cols_type=expected_type_inferred,
807
827
  )
808
828
  elif isinstance(dataset, pd.DataFrame):
809
829
  output_df = self._sklearn_inference(
@@ -874,10 +894,10 @@ class KNeighborsClassifier(BaseTransformer):
874
894
 
875
895
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
876
896
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
877
- Returns an empty list if current object is not a classifier or not yet fitted.
897
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
878
898
  """
879
899
  if getattr(self._sklearn_object, "classes_", None) is None:
880
- return []
900
+ return [output_cols_prefix]
881
901
 
882
902
  classes = self._sklearn_object.classes_
883
903
  if isinstance(classes, numpy.ndarray):
@@ -1106,7 +1126,7 @@ class KNeighborsClassifier(BaseTransformer):
1106
1126
  cp.dump(self._sklearn_object, local_score_file)
1107
1127
 
1108
1128
  # Create temp stage to run score.
1109
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1129
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1110
1130
  session = dataset._session
1111
1131
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1112
1132
  SqlResultValidator(
@@ -1120,8 +1140,9 @@ class KNeighborsClassifier(BaseTransformer):
1120
1140
  expected_value=f"Stage area {score_stage_name} successfully created."
1121
1141
  ).validate()
1122
1142
 
1123
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1124
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1143
+ # Use posixpath to construct stage paths
1144
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1145
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1125
1146
  statement_params = telemetry.get_function_usage_statement_params(
1126
1147
  project=_PROJECT,
1127
1148
  subproject=_SUBPROJECT,
@@ -1147,6 +1168,7 @@ class KNeighborsClassifier(BaseTransformer):
1147
1168
  replace=True,
1148
1169
  session=session,
1149
1170
  statement_params=statement_params,
1171
+ anonymous=True
1150
1172
  )
1151
1173
  def score_wrapper_sproc(
1152
1174
  session: Session,
@@ -1154,7 +1176,8 @@ class KNeighborsClassifier(BaseTransformer):
1154
1176
  stage_score_file_name: str,
1155
1177
  input_cols: List[str],
1156
1178
  label_cols: List[str],
1157
- sample_weight_col: Optional[str]
1179
+ sample_weight_col: Optional[str],
1180
+ statement_params: Dict[str, str]
1158
1181
  ) -> float:
1159
1182
  import cloudpickle as cp
1160
1183
  import numpy as np
@@ -1204,14 +1227,14 @@ class KNeighborsClassifier(BaseTransformer):
1204
1227
  api_calls=[Session.call],
1205
1228
  custom_tags=dict([("autogen", True)]),
1206
1229
  )
1207
- score = session.call(
1208
- score_sproc_name,
1230
+ score = score_wrapper_sproc(
1231
+ session,
1209
1232
  query,
1210
1233
  stage_score_file_name,
1211
1234
  identifier.get_unescaped_names(self.input_cols),
1212
1235
  identifier.get_unescaped_names(self.label_cols),
1213
1236
  identifier.get_unescaped_names(self.sample_weight_col),
1214
- statement_params=statement_params,
1237
+ statement_params,
1215
1238
  )
1216
1239
 
1217
1240
  cleanup_temp_files([local_score_file_name])
@@ -1229,18 +1252,20 @@ class KNeighborsClassifier(BaseTransformer):
1229
1252
  if self._sklearn_object._estimator_type == 'classifier':
1230
1253
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1231
1254
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1232
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1255
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1256
+ ([] if self._drop_input_cols else inputs) + outputs)
1233
1257
  # For regressor, the type of predict is float64
1234
1258
  elif self._sklearn_object._estimator_type == 'regressor':
1235
1259
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1236
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1237
-
1260
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1261
+ ([] if self._drop_input_cols else inputs) + outputs)
1238
1262
  for prob_func in PROB_FUNCTIONS:
1239
1263
  if hasattr(self, prob_func):
1240
1264
  output_cols_prefix: str = f"{prob_func}_"
1241
1265
  output_column_names = self._get_output_column_names(output_cols_prefix)
1242
1266
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1243
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1267
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1268
+ ([] if self._drop_input_cols else inputs) + outputs)
1244
1269
 
1245
1270
  @property
1246
1271
  def model_signatures(self) -> Dict[str, ModelSignature]:
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -255,7 +257,6 @@ class KNeighborsRegressor(BaseTransformer):
255
257
  sample_weight_col: Optional[str] = None,
256
258
  ) -> None:
257
259
  super().__init__()
258
- self.id = str(uuid4()).replace("-", "_").upper()
259
260
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
260
261
 
261
262
  self._deps = list(deps)
@@ -282,6 +283,15 @@ class KNeighborsRegressor(BaseTransformer):
282
283
  self.set_drop_input_cols(drop_input_cols)
283
284
  self.set_sample_weight_col(sample_weight_col)
284
285
 
286
+ def _get_rand_id(self) -> str:
287
+ """
288
+ Generate random id to be used in sproc and stage names.
289
+
290
+ Returns:
291
+ Random id string usable in sproc, table, and stage names.
292
+ """
293
+ return str(uuid4()).replace("-", "_").upper()
294
+
285
295
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
286
296
  """
287
297
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -360,7 +370,7 @@ class KNeighborsRegressor(BaseTransformer):
360
370
  cp.dump(self._sklearn_object, local_transform_file)
361
371
 
362
372
  # Create temp stage to run fit.
363
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
373
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
364
374
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
365
375
  SqlResultValidator(
366
376
  session=session,
@@ -373,11 +383,12 @@ class KNeighborsRegressor(BaseTransformer):
373
383
  expected_value=f"Stage area {transform_stage_name} successfully created."
374
384
  ).validate()
375
385
 
376
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
386
+ # Use posixpath to construct stage paths
387
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
388
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
377
389
  local_result_file_name = get_temp_file_path()
378
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
379
390
 
380
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
391
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
381
392
  statement_params = telemetry.get_function_usage_statement_params(
382
393
  project=_PROJECT,
383
394
  subproject=_SUBPROJECT,
@@ -403,6 +414,7 @@ class KNeighborsRegressor(BaseTransformer):
403
414
  replace=True,
404
415
  session=session,
405
416
  statement_params=statement_params,
417
+ anonymous=True
406
418
  )
407
419
  def fit_wrapper_sproc(
408
420
  session: Session,
@@ -411,7 +423,8 @@ class KNeighborsRegressor(BaseTransformer):
411
423
  stage_result_file_name: str,
412
424
  input_cols: List[str],
413
425
  label_cols: List[str],
414
- sample_weight_col: Optional[str]
426
+ sample_weight_col: Optional[str],
427
+ statement_params: Dict[str, str]
415
428
  ) -> str:
416
429
  import cloudpickle as cp
417
430
  import numpy as np
@@ -478,15 +491,15 @@ class KNeighborsRegressor(BaseTransformer):
478
491
  api_calls=[Session.call],
479
492
  custom_tags=dict([("autogen", True)]),
480
493
  )
481
- sproc_export_file_name = session.call(
482
- fit_sproc_name,
494
+ sproc_export_file_name = fit_wrapper_sproc(
495
+ session,
483
496
  query,
484
497
  stage_transform_file_name,
485
498
  stage_result_file_name,
486
499
  identifier.get_unescaped_names(self.input_cols),
487
500
  identifier.get_unescaped_names(self.label_cols),
488
501
  identifier.get_unescaped_names(self.sample_weight_col),
489
- statement_params=statement_params,
502
+ statement_params,
490
503
  )
491
504
 
492
505
  if "|" in sproc_export_file_name:
@@ -496,7 +509,7 @@ class KNeighborsRegressor(BaseTransformer):
496
509
  print("\n".join(fields[1:]))
497
510
 
498
511
  session.file.get(
499
- os.path.join(stage_result_file_name, sproc_export_file_name),
512
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
500
513
  local_result_file_name,
501
514
  statement_params=statement_params
502
515
  )
@@ -542,7 +555,7 @@ class KNeighborsRegressor(BaseTransformer):
542
555
 
543
556
  # Register vectorized UDF for batch inference
544
557
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
545
- safe_id=self.id, method=inference_method)
558
+ safe_id=self._get_rand_id(), method=inference_method)
546
559
 
547
560
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
548
561
  # will try to pickle all of self which fails.
@@ -634,7 +647,7 @@ class KNeighborsRegressor(BaseTransformer):
634
647
  return transformed_pandas_df.to_dict("records")
635
648
 
636
649
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
637
- safe_id=self.id
650
+ safe_id=self._get_rand_id()
638
651
  )
639
652
 
640
653
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -801,11 +814,18 @@ class KNeighborsRegressor(BaseTransformer):
801
814
  Transformed dataset.
802
815
  """
803
816
  if isinstance(dataset, DataFrame):
817
+ expected_type_inferred = "float"
818
+ # when it is classifier, infer the datatype from label columns
819
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
820
+ expected_type_inferred = convert_sp_to_sf_type(
821
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
822
+ )
823
+
804
824
  output_df = self._batch_inference(
805
825
  dataset=dataset,
806
826
  inference_method="predict",
807
827
  expected_output_cols_list=self.output_cols,
808
- expected_output_cols_type="float",
828
+ expected_output_cols_type=expected_type_inferred,
809
829
  )
810
830
  elif isinstance(dataset, pd.DataFrame):
811
831
  output_df = self._sklearn_inference(
@@ -876,10 +896,10 @@ class KNeighborsRegressor(BaseTransformer):
876
896
 
877
897
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
878
898
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
879
- Returns an empty list if current object is not a classifier or not yet fitted.
899
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
880
900
  """
881
901
  if getattr(self._sklearn_object, "classes_", None) is None:
882
- return []
902
+ return [output_cols_prefix]
883
903
 
884
904
  classes = self._sklearn_object.classes_
885
905
  if isinstance(classes, numpy.ndarray):
@@ -1104,7 +1124,7 @@ class KNeighborsRegressor(BaseTransformer):
1104
1124
  cp.dump(self._sklearn_object, local_score_file)
1105
1125
 
1106
1126
  # Create temp stage to run score.
1107
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1127
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1108
1128
  session = dataset._session
1109
1129
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1110
1130
  SqlResultValidator(
@@ -1118,8 +1138,9 @@ class KNeighborsRegressor(BaseTransformer):
1118
1138
  expected_value=f"Stage area {score_stage_name} successfully created."
1119
1139
  ).validate()
1120
1140
 
1121
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1122
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1141
+ # Use posixpath to construct stage paths
1142
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1143
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1123
1144
  statement_params = telemetry.get_function_usage_statement_params(
1124
1145
  project=_PROJECT,
1125
1146
  subproject=_SUBPROJECT,
@@ -1145,6 +1166,7 @@ class KNeighborsRegressor(BaseTransformer):
1145
1166
  replace=True,
1146
1167
  session=session,
1147
1168
  statement_params=statement_params,
1169
+ anonymous=True
1148
1170
  )
1149
1171
  def score_wrapper_sproc(
1150
1172
  session: Session,
@@ -1152,7 +1174,8 @@ class KNeighborsRegressor(BaseTransformer):
1152
1174
  stage_score_file_name: str,
1153
1175
  input_cols: List[str],
1154
1176
  label_cols: List[str],
1155
- sample_weight_col: Optional[str]
1177
+ sample_weight_col: Optional[str],
1178
+ statement_params: Dict[str, str]
1156
1179
  ) -> float:
1157
1180
  import cloudpickle as cp
1158
1181
  import numpy as np
@@ -1202,14 +1225,14 @@ class KNeighborsRegressor(BaseTransformer):
1202
1225
  api_calls=[Session.call],
1203
1226
  custom_tags=dict([("autogen", True)]),
1204
1227
  )
1205
- score = session.call(
1206
- score_sproc_name,
1228
+ score = score_wrapper_sproc(
1229
+ session,
1207
1230
  query,
1208
1231
  stage_score_file_name,
1209
1232
  identifier.get_unescaped_names(self.input_cols),
1210
1233
  identifier.get_unescaped_names(self.label_cols),
1211
1234
  identifier.get_unescaped_names(self.sample_weight_col),
1212
- statement_params=statement_params,
1235
+ statement_params,
1213
1236
  )
1214
1237
 
1215
1238
  cleanup_temp_files([local_score_file_name])
@@ -1227,18 +1250,20 @@ class KNeighborsRegressor(BaseTransformer):
1227
1250
  if self._sklearn_object._estimator_type == 'classifier':
1228
1251
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1229
1252
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1230
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1253
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1254
+ ([] if self._drop_input_cols else inputs) + outputs)
1231
1255
  # For regressor, the type of predict is float64
1232
1256
  elif self._sklearn_object._estimator_type == 'regressor':
1233
1257
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1234
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1235
-
1258
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1259
+ ([] if self._drop_input_cols else inputs) + outputs)
1236
1260
  for prob_func in PROB_FUNCTIONS:
1237
1261
  if hasattr(self, prob_func):
1238
1262
  output_cols_prefix: str = f"{prob_func}_"
1239
1263
  output_column_names = self._get_output_column_names(output_cols_prefix)
1240
1264
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1241
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1265
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1266
+ ([] if self._drop_input_cols else inputs) + outputs)
1242
1267
 
1243
1268
  @property
1244
1269
  def model_signatures(self) -> Dict[str, ModelSignature]: