snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. snowflake/ml/_internal/env_utils.py +2 -1
  2. snowflake/ml/_internal/file_utils.py +29 -7
  3. snowflake/ml/_internal/telemetry.py +5 -8
  4. snowflake/ml/_internal/utils/uri.py +7 -2
  5. snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
  6. snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
  7. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
  8. snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
  9. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
  10. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
  11. snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
  12. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
  13. snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
  14. snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
  15. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
  16. snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
  17. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
  18. snowflake/ml/model/_deployer.py +14 -27
  19. snowflake/ml/model/_env.py +4 -4
  20. snowflake/ml/model/_handlers/custom.py +14 -2
  21. snowflake/ml/model/_handlers/pytorch.py +186 -0
  22. snowflake/ml/model/_handlers/sklearn.py +14 -9
  23. snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
  24. snowflake/ml/model/_handlers/torchscript.py +180 -0
  25. snowflake/ml/model/_handlers/xgboost.py +19 -9
  26. snowflake/ml/model/_model.py +3 -2
  27. snowflake/ml/model/_model_meta.py +12 -7
  28. snowflake/ml/model/model_signature.py +446 -66
  29. snowflake/ml/model/type_hints.py +23 -4
  30. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
  31. snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
  32. snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
  33. snowflake/ml/modeling/cluster/birch.py +51 -26
  34. snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
  35. snowflake/ml/modeling/cluster/dbscan.py +51 -26
  36. snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
  37. snowflake/ml/modeling/cluster/k_means.py +51 -26
  38. snowflake/ml/modeling/cluster/mean_shift.py +51 -26
  39. snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
  40. snowflake/ml/modeling/cluster/optics.py +51 -26
  41. snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
  42. snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
  43. snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
  44. snowflake/ml/modeling/compose/column_transformer.py +51 -26
  45. snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
  46. snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
  47. snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
  48. snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
  49. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
  50. snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
  51. snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
  52. snowflake/ml/modeling/covariance/oas.py +51 -26
  53. snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
  54. snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
  55. snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
  56. snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
  57. snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
  58. snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
  59. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
  60. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
  61. snowflake/ml/modeling/decomposition/pca.py +51 -26
  62. snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
  63. snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
  64. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
  65. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
  66. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
  67. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
  68. snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
  69. snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
  70. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
  71. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
  72. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
  73. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
  74. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
  75. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
  76. snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
  77. snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
  78. snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
  79. snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
  80. snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
  81. snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
  82. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
  83. snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
  84. snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
  85. snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
  86. snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
  87. snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
  88. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
  89. snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
  90. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
  91. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
  92. snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
  93. snowflake/ml/modeling/impute/knn_imputer.py +51 -26
  94. snowflake/ml/modeling/impute/missing_indicator.py +51 -26
  95. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
  96. snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
  97. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
  98. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
  99. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
  100. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
  101. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
  102. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
  103. snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
  104. snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
  105. snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
  106. snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
  107. snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
  108. snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
  109. snowflake/ml/modeling/linear_model/lars.py +51 -26
  110. snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
  111. snowflake/ml/modeling/linear_model/lasso.py +51 -26
  112. snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
  113. snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
  114. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
  115. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
  116. snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
  117. snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
  118. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
  119. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
  120. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
  121. snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
  122. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
  123. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
  124. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
  125. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
  126. snowflake/ml/modeling/linear_model/perceptron.py +51 -26
  127. snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
  128. snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
  129. snowflake/ml/modeling/linear_model/ridge.py +51 -26
  130. snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
  131. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
  132. snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
  133. snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
  134. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
  135. snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
  136. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
  137. snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
  138. snowflake/ml/modeling/manifold/isomap.py +51 -26
  139. snowflake/ml/modeling/manifold/mds.py +51 -26
  140. snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
  141. snowflake/ml/modeling/manifold/tsne.py +51 -26
  142. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
  143. snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
  144. snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
  145. snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
  146. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
  147. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
  148. snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
  149. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
  150. snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
  151. snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
  152. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
  153. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
  154. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
  155. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
  156. snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
  157. snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
  158. snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
  159. snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
  160. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
  161. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
  162. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
  163. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
  164. snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
  165. snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
  166. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
  167. snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
  168. snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
  169. snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
  170. snowflake/ml/modeling/svm/linear_svc.py +51 -26
  171. snowflake/ml/modeling/svm/linear_svr.py +51 -26
  172. snowflake/ml/modeling/svm/nu_svc.py +51 -26
  173. snowflake/ml/modeling/svm/nu_svr.py +51 -26
  174. snowflake/ml/modeling/svm/svc.py +51 -26
  175. snowflake/ml/modeling/svm/svr.py +51 -26
  176. snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
  177. snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
  178. snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
  179. snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
  180. snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
  181. snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
  182. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
  183. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
  184. snowflake/ml/registry/model_registry.py +74 -56
  185. snowflake/ml/version.py +1 -1
  186. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
  187. snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
  188. snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
  189. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -28,6 +29,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
28
29
  from snowflake.snowpark import DataFrame, Session
29
30
  from snowflake.snowpark.functions import pandas_udf, sproc
30
31
  from snowflake.snowpark.types import PandasSeries
32
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
31
33
 
32
34
  from snowflake.ml.model.model_signature import (
33
35
  DataType,
@@ -290,7 +292,6 @@ class IterativeImputer(BaseTransformer):
290
292
  sample_weight_col: Optional[str] = None,
291
293
  ) -> None:
292
294
  super().__init__()
293
- self.id = str(uuid4()).replace("-", "_").upper()
294
295
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
295
296
  deps = deps | _gather_dependencies(estimator)
296
297
  self._deps = list(deps)
@@ -324,6 +325,15 @@ class IterativeImputer(BaseTransformer):
324
325
  self.set_drop_input_cols(drop_input_cols)
325
326
  self.set_sample_weight_col(sample_weight_col)
326
327
 
328
+ def _get_rand_id(self) -> str:
329
+ """
330
+ Generate random id to be used in sproc and stage names.
331
+
332
+ Returns:
333
+ Random id string usable in sproc, table, and stage names.
334
+ """
335
+ return str(uuid4()).replace("-", "_").upper()
336
+
327
337
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
328
338
  """
329
339
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -402,7 +412,7 @@ class IterativeImputer(BaseTransformer):
402
412
  cp.dump(self._sklearn_object, local_transform_file)
403
413
 
404
414
  # Create temp stage to run fit.
405
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
415
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
406
416
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
407
417
  SqlResultValidator(
408
418
  session=session,
@@ -415,11 +425,12 @@ class IterativeImputer(BaseTransformer):
415
425
  expected_value=f"Stage area {transform_stage_name} successfully created."
416
426
  ).validate()
417
427
 
418
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
428
+ # Use posixpath to construct stage paths
429
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
430
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
419
431
  local_result_file_name = get_temp_file_path()
420
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
421
432
 
422
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
433
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
423
434
  statement_params = telemetry.get_function_usage_statement_params(
424
435
  project=_PROJECT,
425
436
  subproject=_SUBPROJECT,
@@ -445,6 +456,7 @@ class IterativeImputer(BaseTransformer):
445
456
  replace=True,
446
457
  session=session,
447
458
  statement_params=statement_params,
459
+ anonymous=True
448
460
  )
449
461
  def fit_wrapper_sproc(
450
462
  session: Session,
@@ -453,7 +465,8 @@ class IterativeImputer(BaseTransformer):
453
465
  stage_result_file_name: str,
454
466
  input_cols: List[str],
455
467
  label_cols: List[str],
456
- sample_weight_col: Optional[str]
468
+ sample_weight_col: Optional[str],
469
+ statement_params: Dict[str, str]
457
470
  ) -> str:
458
471
  import cloudpickle as cp
459
472
  import numpy as np
@@ -520,15 +533,15 @@ class IterativeImputer(BaseTransformer):
520
533
  api_calls=[Session.call],
521
534
  custom_tags=dict([("autogen", True)]),
522
535
  )
523
- sproc_export_file_name = session.call(
524
- fit_sproc_name,
536
+ sproc_export_file_name = fit_wrapper_sproc(
537
+ session,
525
538
  query,
526
539
  stage_transform_file_name,
527
540
  stage_result_file_name,
528
541
  identifier.get_unescaped_names(self.input_cols),
529
542
  identifier.get_unescaped_names(self.label_cols),
530
543
  identifier.get_unescaped_names(self.sample_weight_col),
531
- statement_params=statement_params,
544
+ statement_params,
532
545
  )
533
546
 
534
547
  if "|" in sproc_export_file_name:
@@ -538,7 +551,7 @@ class IterativeImputer(BaseTransformer):
538
551
  print("\n".join(fields[1:]))
539
552
 
540
553
  session.file.get(
541
- os.path.join(stage_result_file_name, sproc_export_file_name),
554
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
542
555
  local_result_file_name,
543
556
  statement_params=statement_params
544
557
  )
@@ -584,7 +597,7 @@ class IterativeImputer(BaseTransformer):
584
597
 
585
598
  # Register vectorized UDF for batch inference
586
599
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
587
- safe_id=self.id, method=inference_method)
600
+ safe_id=self._get_rand_id(), method=inference_method)
588
601
 
589
602
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
590
603
  # will try to pickle all of self which fails.
@@ -676,7 +689,7 @@ class IterativeImputer(BaseTransformer):
676
689
  return transformed_pandas_df.to_dict("records")
677
690
 
678
691
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
679
- safe_id=self.id
692
+ safe_id=self._get_rand_id()
680
693
  )
681
694
 
682
695
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -841,11 +854,18 @@ class IterativeImputer(BaseTransformer):
841
854
  Transformed dataset.
842
855
  """
843
856
  if isinstance(dataset, DataFrame):
857
+ expected_type_inferred = ""
858
+ # when it is classifier, infer the datatype from label columns
859
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
860
+ expected_type_inferred = convert_sp_to_sf_type(
861
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
862
+ )
863
+
844
864
  output_df = self._batch_inference(
845
865
  dataset=dataset,
846
866
  inference_method="predict",
847
867
  expected_output_cols_list=self.output_cols,
848
- expected_output_cols_type="",
868
+ expected_output_cols_type=expected_type_inferred,
849
869
  )
850
870
  elif isinstance(dataset, pd.DataFrame):
851
871
  output_df = self._sklearn_inference(
@@ -918,10 +938,10 @@ class IterativeImputer(BaseTransformer):
918
938
 
919
939
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
920
940
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
921
- Returns an empty list if current object is not a classifier or not yet fitted.
941
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
922
942
  """
923
943
  if getattr(self._sklearn_object, "classes_", None) is None:
924
- return []
944
+ return [output_cols_prefix]
925
945
 
926
946
  classes = self._sklearn_object.classes_
927
947
  if isinstance(classes, numpy.ndarray):
@@ -1146,7 +1166,7 @@ class IterativeImputer(BaseTransformer):
1146
1166
  cp.dump(self._sklearn_object, local_score_file)
1147
1167
 
1148
1168
  # Create temp stage to run score.
1149
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1169
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1150
1170
  session = dataset._session
1151
1171
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1152
1172
  SqlResultValidator(
@@ -1160,8 +1180,9 @@ class IterativeImputer(BaseTransformer):
1160
1180
  expected_value=f"Stage area {score_stage_name} successfully created."
1161
1181
  ).validate()
1162
1182
 
1163
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1164
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1183
+ # Use posixpath to construct stage paths
1184
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1185
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1165
1186
  statement_params = telemetry.get_function_usage_statement_params(
1166
1187
  project=_PROJECT,
1167
1188
  subproject=_SUBPROJECT,
@@ -1187,6 +1208,7 @@ class IterativeImputer(BaseTransformer):
1187
1208
  replace=True,
1188
1209
  session=session,
1189
1210
  statement_params=statement_params,
1211
+ anonymous=True
1190
1212
  )
1191
1213
  def score_wrapper_sproc(
1192
1214
  session: Session,
@@ -1194,7 +1216,8 @@ class IterativeImputer(BaseTransformer):
1194
1216
  stage_score_file_name: str,
1195
1217
  input_cols: List[str],
1196
1218
  label_cols: List[str],
1197
- sample_weight_col: Optional[str]
1219
+ sample_weight_col: Optional[str],
1220
+ statement_params: Dict[str, str]
1198
1221
  ) -> float:
1199
1222
  import cloudpickle as cp
1200
1223
  import numpy as np
@@ -1244,14 +1267,14 @@ class IterativeImputer(BaseTransformer):
1244
1267
  api_calls=[Session.call],
1245
1268
  custom_tags=dict([("autogen", True)]),
1246
1269
  )
1247
- score = session.call(
1248
- score_sproc_name,
1270
+ score = score_wrapper_sproc(
1271
+ session,
1249
1272
  query,
1250
1273
  stage_score_file_name,
1251
1274
  identifier.get_unescaped_names(self.input_cols),
1252
1275
  identifier.get_unescaped_names(self.label_cols),
1253
1276
  identifier.get_unescaped_names(self.sample_weight_col),
1254
- statement_params=statement_params,
1277
+ statement_params,
1255
1278
  )
1256
1279
 
1257
1280
  cleanup_temp_files([local_score_file_name])
@@ -1269,18 +1292,20 @@ class IterativeImputer(BaseTransformer):
1269
1292
  if self._sklearn_object._estimator_type == 'classifier':
1270
1293
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1271
1294
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1272
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1295
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1296
+ ([] if self._drop_input_cols else inputs) + outputs)
1273
1297
  # For regressor, the type of predict is float64
1274
1298
  elif self._sklearn_object._estimator_type == 'regressor':
1275
1299
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1276
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1277
-
1300
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1301
+ ([] if self._drop_input_cols else inputs) + outputs)
1278
1302
  for prob_func in PROB_FUNCTIONS:
1279
1303
  if hasattr(self, prob_func):
1280
1304
  output_cols_prefix: str = f"{prob_func}_"
1281
1305
  output_column_names = self._get_output_column_names(output_cols_prefix)
1282
1306
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1283
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1307
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1308
+ ([] if self._drop_input_cols else inputs) + outputs)
1284
1309
 
1285
1310
  @property
1286
1311
  def model_signatures(self) -> Dict[str, ModelSignature]:
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -233,7 +235,6 @@ class KNNImputer(BaseTransformer):
233
235
  sample_weight_col: Optional[str] = None,
234
236
  ) -> None:
235
237
  super().__init__()
236
- self.id = str(uuid4()).replace("-", "_").upper()
237
238
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
238
239
 
239
240
  self._deps = list(deps)
@@ -259,6 +260,15 @@ class KNNImputer(BaseTransformer):
259
260
  self.set_drop_input_cols(drop_input_cols)
260
261
  self.set_sample_weight_col(sample_weight_col)
261
262
 
263
+ def _get_rand_id(self) -> str:
264
+ """
265
+ Generate random id to be used in sproc and stage names.
266
+
267
+ Returns:
268
+ Random id string usable in sproc, table, and stage names.
269
+ """
270
+ return str(uuid4()).replace("-", "_").upper()
271
+
262
272
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
263
273
  """
264
274
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -337,7 +347,7 @@ class KNNImputer(BaseTransformer):
337
347
  cp.dump(self._sklearn_object, local_transform_file)
338
348
 
339
349
  # Create temp stage to run fit.
340
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
350
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
341
351
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
342
352
  SqlResultValidator(
343
353
  session=session,
@@ -350,11 +360,12 @@ class KNNImputer(BaseTransformer):
350
360
  expected_value=f"Stage area {transform_stage_name} successfully created."
351
361
  ).validate()
352
362
 
353
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
363
+ # Use posixpath to construct stage paths
364
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
365
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
354
366
  local_result_file_name = get_temp_file_path()
355
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
356
367
 
357
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
368
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
358
369
  statement_params = telemetry.get_function_usage_statement_params(
359
370
  project=_PROJECT,
360
371
  subproject=_SUBPROJECT,
@@ -380,6 +391,7 @@ class KNNImputer(BaseTransformer):
380
391
  replace=True,
381
392
  session=session,
382
393
  statement_params=statement_params,
394
+ anonymous=True
383
395
  )
384
396
  def fit_wrapper_sproc(
385
397
  session: Session,
@@ -388,7 +400,8 @@ class KNNImputer(BaseTransformer):
388
400
  stage_result_file_name: str,
389
401
  input_cols: List[str],
390
402
  label_cols: List[str],
391
- sample_weight_col: Optional[str]
403
+ sample_weight_col: Optional[str],
404
+ statement_params: Dict[str, str]
392
405
  ) -> str:
393
406
  import cloudpickle as cp
394
407
  import numpy as np
@@ -455,15 +468,15 @@ class KNNImputer(BaseTransformer):
455
468
  api_calls=[Session.call],
456
469
  custom_tags=dict([("autogen", True)]),
457
470
  )
458
- sproc_export_file_name = session.call(
459
- fit_sproc_name,
471
+ sproc_export_file_name = fit_wrapper_sproc(
472
+ session,
460
473
  query,
461
474
  stage_transform_file_name,
462
475
  stage_result_file_name,
463
476
  identifier.get_unescaped_names(self.input_cols),
464
477
  identifier.get_unescaped_names(self.label_cols),
465
478
  identifier.get_unescaped_names(self.sample_weight_col),
466
- statement_params=statement_params,
479
+ statement_params,
467
480
  )
468
481
 
469
482
  if "|" in sproc_export_file_name:
@@ -473,7 +486,7 @@ class KNNImputer(BaseTransformer):
473
486
  print("\n".join(fields[1:]))
474
487
 
475
488
  session.file.get(
476
- os.path.join(stage_result_file_name, sproc_export_file_name),
489
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
477
490
  local_result_file_name,
478
491
  statement_params=statement_params
479
492
  )
@@ -519,7 +532,7 @@ class KNNImputer(BaseTransformer):
519
532
 
520
533
  # Register vectorized UDF for batch inference
521
534
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
522
- safe_id=self.id, method=inference_method)
535
+ safe_id=self._get_rand_id(), method=inference_method)
523
536
 
524
537
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
525
538
  # will try to pickle all of self which fails.
@@ -611,7 +624,7 @@ class KNNImputer(BaseTransformer):
611
624
  return transformed_pandas_df.to_dict("records")
612
625
 
613
626
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
614
- safe_id=self.id
627
+ safe_id=self._get_rand_id()
615
628
  )
616
629
 
617
630
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -776,11 +789,18 @@ class KNNImputer(BaseTransformer):
776
789
  Transformed dataset.
777
790
  """
778
791
  if isinstance(dataset, DataFrame):
792
+ expected_type_inferred = ""
793
+ # when it is classifier, infer the datatype from label columns
794
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
795
+ expected_type_inferred = convert_sp_to_sf_type(
796
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
797
+ )
798
+
779
799
  output_df = self._batch_inference(
780
800
  dataset=dataset,
781
801
  inference_method="predict",
782
802
  expected_output_cols_list=self.output_cols,
783
- expected_output_cols_type="",
803
+ expected_output_cols_type=expected_type_inferred,
784
804
  )
785
805
  elif isinstance(dataset, pd.DataFrame):
786
806
  output_df = self._sklearn_inference(
@@ -853,10 +873,10 @@ class KNNImputer(BaseTransformer):
853
873
 
854
874
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
855
875
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
856
- Returns an empty list if current object is not a classifier or not yet fitted.
876
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
857
877
  """
858
878
  if getattr(self._sklearn_object, "classes_", None) is None:
859
- return []
879
+ return [output_cols_prefix]
860
880
 
861
881
  classes = self._sklearn_object.classes_
862
882
  if isinstance(classes, numpy.ndarray):
@@ -1081,7 +1101,7 @@ class KNNImputer(BaseTransformer):
1081
1101
  cp.dump(self._sklearn_object, local_score_file)
1082
1102
 
1083
1103
  # Create temp stage to run score.
1084
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1104
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1085
1105
  session = dataset._session
1086
1106
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1087
1107
  SqlResultValidator(
@@ -1095,8 +1115,9 @@ class KNNImputer(BaseTransformer):
1095
1115
  expected_value=f"Stage area {score_stage_name} successfully created."
1096
1116
  ).validate()
1097
1117
 
1098
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1099
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1118
+ # Use posixpath to construct stage paths
1119
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1120
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1100
1121
  statement_params = telemetry.get_function_usage_statement_params(
1101
1122
  project=_PROJECT,
1102
1123
  subproject=_SUBPROJECT,
@@ -1122,6 +1143,7 @@ class KNNImputer(BaseTransformer):
1122
1143
  replace=True,
1123
1144
  session=session,
1124
1145
  statement_params=statement_params,
1146
+ anonymous=True
1125
1147
  )
1126
1148
  def score_wrapper_sproc(
1127
1149
  session: Session,
@@ -1129,7 +1151,8 @@ class KNNImputer(BaseTransformer):
1129
1151
  stage_score_file_name: str,
1130
1152
  input_cols: List[str],
1131
1153
  label_cols: List[str],
1132
- sample_weight_col: Optional[str]
1154
+ sample_weight_col: Optional[str],
1155
+ statement_params: Dict[str, str]
1133
1156
  ) -> float:
1134
1157
  import cloudpickle as cp
1135
1158
  import numpy as np
@@ -1179,14 +1202,14 @@ class KNNImputer(BaseTransformer):
1179
1202
  api_calls=[Session.call],
1180
1203
  custom_tags=dict([("autogen", True)]),
1181
1204
  )
1182
- score = session.call(
1183
- score_sproc_name,
1205
+ score = score_wrapper_sproc(
1206
+ session,
1184
1207
  query,
1185
1208
  stage_score_file_name,
1186
1209
  identifier.get_unescaped_names(self.input_cols),
1187
1210
  identifier.get_unescaped_names(self.label_cols),
1188
1211
  identifier.get_unescaped_names(self.sample_weight_col),
1189
- statement_params=statement_params,
1212
+ statement_params,
1190
1213
  )
1191
1214
 
1192
1215
  cleanup_temp_files([local_score_file_name])
@@ -1204,18 +1227,20 @@ class KNNImputer(BaseTransformer):
1204
1227
  if self._sklearn_object._estimator_type == 'classifier':
1205
1228
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1206
1229
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1207
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1230
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1231
+ ([] if self._drop_input_cols else inputs) + outputs)
1208
1232
  # For regressor, the type of predict is float64
1209
1233
  elif self._sklearn_object._estimator_type == 'regressor':
1210
1234
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1211
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1212
-
1235
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1236
+ ([] if self._drop_input_cols else inputs) + outputs)
1213
1237
  for prob_func in PROB_FUNCTIONS:
1214
1238
  if hasattr(self, prob_func):
1215
1239
  output_cols_prefix: str = f"{prob_func}_"
1216
1240
  output_column_names = self._get_output_column_names(output_cols_prefix)
1217
1241
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1218
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1242
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1243
+ ([] if self._drop_input_cols else inputs) + outputs)
1219
1244
 
1220
1245
  @property
1221
1246
  def model_signatures(self) -> Dict[str, ModelSignature]: