snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. snowflake/ml/_internal/env_utils.py +2 -1
  2. snowflake/ml/_internal/file_utils.py +29 -7
  3. snowflake/ml/_internal/telemetry.py +5 -8
  4. snowflake/ml/_internal/utils/uri.py +7 -2
  5. snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
  6. snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
  7. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
  8. snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
  9. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
  10. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
  11. snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
  12. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
  13. snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
  14. snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
  15. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
  16. snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
  17. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
  18. snowflake/ml/model/_deployer.py +14 -27
  19. snowflake/ml/model/_env.py +4 -4
  20. snowflake/ml/model/_handlers/custom.py +14 -2
  21. snowflake/ml/model/_handlers/pytorch.py +186 -0
  22. snowflake/ml/model/_handlers/sklearn.py +14 -9
  23. snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
  24. snowflake/ml/model/_handlers/torchscript.py +180 -0
  25. snowflake/ml/model/_handlers/xgboost.py +19 -9
  26. snowflake/ml/model/_model.py +3 -2
  27. snowflake/ml/model/_model_meta.py +12 -7
  28. snowflake/ml/model/model_signature.py +446 -66
  29. snowflake/ml/model/type_hints.py +23 -4
  30. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
  31. snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
  32. snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
  33. snowflake/ml/modeling/cluster/birch.py +51 -26
  34. snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
  35. snowflake/ml/modeling/cluster/dbscan.py +51 -26
  36. snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
  37. snowflake/ml/modeling/cluster/k_means.py +51 -26
  38. snowflake/ml/modeling/cluster/mean_shift.py +51 -26
  39. snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
  40. snowflake/ml/modeling/cluster/optics.py +51 -26
  41. snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
  42. snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
  43. snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
  44. snowflake/ml/modeling/compose/column_transformer.py +51 -26
  45. snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
  46. snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
  47. snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
  48. snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
  49. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
  50. snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
  51. snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
  52. snowflake/ml/modeling/covariance/oas.py +51 -26
  53. snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
  54. snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
  55. snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
  56. snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
  57. snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
  58. snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
  59. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
  60. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
  61. snowflake/ml/modeling/decomposition/pca.py +51 -26
  62. snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
  63. snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
  64. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
  65. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
  66. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
  67. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
  68. snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
  69. snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
  70. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
  71. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
  72. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
  73. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
  74. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
  75. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
  76. snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
  77. snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
  78. snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
  79. snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
  80. snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
  81. snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
  82. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
  83. snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
  84. snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
  85. snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
  86. snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
  87. snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
  88. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
  89. snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
  90. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
  91. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
  92. snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
  93. snowflake/ml/modeling/impute/knn_imputer.py +51 -26
  94. snowflake/ml/modeling/impute/missing_indicator.py +51 -26
  95. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
  96. snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
  97. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
  98. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
  99. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
  100. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
  101. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
  102. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
  103. snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
  104. snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
  105. snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
  106. snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
  107. snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
  108. snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
  109. snowflake/ml/modeling/linear_model/lars.py +51 -26
  110. snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
  111. snowflake/ml/modeling/linear_model/lasso.py +51 -26
  112. snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
  113. snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
  114. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
  115. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
  116. snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
  117. snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
  118. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
  119. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
  120. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
  121. snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
  122. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
  123. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
  124. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
  125. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
  126. snowflake/ml/modeling/linear_model/perceptron.py +51 -26
  127. snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
  128. snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
  129. snowflake/ml/modeling/linear_model/ridge.py +51 -26
  130. snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
  131. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
  132. snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
  133. snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
  134. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
  135. snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
  136. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
  137. snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
  138. snowflake/ml/modeling/manifold/isomap.py +51 -26
  139. snowflake/ml/modeling/manifold/mds.py +51 -26
  140. snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
  141. snowflake/ml/modeling/manifold/tsne.py +51 -26
  142. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
  143. snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
  144. snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
  145. snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
  146. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
  147. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
  148. snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
  149. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
  150. snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
  151. snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
  152. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
  153. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
  154. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
  155. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
  156. snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
  157. snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
  158. snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
  159. snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
  160. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
  161. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
  162. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
  163. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
  164. snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
  165. snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
  166. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
  167. snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
  168. snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
  169. snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
  170. snowflake/ml/modeling/svm/linear_svc.py +51 -26
  171. snowflake/ml/modeling/svm/linear_svr.py +51 -26
  172. snowflake/ml/modeling/svm/nu_svc.py +51 -26
  173. snowflake/ml/modeling/svm/nu_svr.py +51 -26
  174. snowflake/ml/modeling/svm/svc.py +51 -26
  175. snowflake/ml/modeling/svm/svr.py +51 -26
  176. snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
  177. snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
  178. snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
  179. snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
  180. snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
  181. snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
  182. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
  183. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
  184. snowflake/ml/registry/model_registry.py +74 -56
  185. snowflake/ml/version.py +1 -1
  186. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
  187. snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
  188. snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
  189. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -314,7 +316,6 @@ class GridSearchCV(BaseTransformer):
314
316
  sample_weight_col: Optional[str] = None,
315
317
  ) -> None:
316
318
  super().__init__()
317
- self.id = str(uuid4()).replace("-", "_").upper()
318
319
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
319
320
  deps = deps | _gather_dependencies(estimator)
320
321
  self._deps = list(deps)
@@ -343,6 +344,15 @@ class GridSearchCV(BaseTransformer):
343
344
  self.set_drop_input_cols(drop_input_cols)
344
345
  self.set_sample_weight_col(sample_weight_col)
345
346
 
347
+ def _get_rand_id(self) -> str:
348
+ """
349
+ Generate random id to be used in sproc and stage names.
350
+
351
+ Returns:
352
+ Random id string usable in sproc, table, and stage names.
353
+ """
354
+ return str(uuid4()).replace("-", "_").upper()
355
+
346
356
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
347
357
  """
348
358
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -421,7 +431,7 @@ class GridSearchCV(BaseTransformer):
421
431
  cp.dump(self._sklearn_object, local_transform_file)
422
432
 
423
433
  # Create temp stage to run fit.
424
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
434
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
425
435
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
426
436
  SqlResultValidator(
427
437
  session=session,
@@ -434,11 +444,12 @@ class GridSearchCV(BaseTransformer):
434
444
  expected_value=f"Stage area {transform_stage_name} successfully created."
435
445
  ).validate()
436
446
 
437
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
447
+ # Use posixpath to construct stage paths
448
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
449
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
438
450
  local_result_file_name = get_temp_file_path()
439
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
440
451
 
441
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
452
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
442
453
  statement_params = telemetry.get_function_usage_statement_params(
443
454
  project=_PROJECT,
444
455
  subproject=_SUBPROJECT,
@@ -464,6 +475,7 @@ class GridSearchCV(BaseTransformer):
464
475
  replace=True,
465
476
  session=session,
466
477
  statement_params=statement_params,
478
+ anonymous=True
467
479
  )
468
480
  def fit_wrapper_sproc(
469
481
  session: Session,
@@ -472,7 +484,8 @@ class GridSearchCV(BaseTransformer):
472
484
  stage_result_file_name: str,
473
485
  input_cols: List[str],
474
486
  label_cols: List[str],
475
- sample_weight_col: Optional[str]
487
+ sample_weight_col: Optional[str],
488
+ statement_params: Dict[str, str]
476
489
  ) -> str:
477
490
  import cloudpickle as cp
478
491
  import numpy as np
@@ -539,15 +552,15 @@ class GridSearchCV(BaseTransformer):
539
552
  api_calls=[Session.call],
540
553
  custom_tags=dict([("autogen", True)]),
541
554
  )
542
- sproc_export_file_name = session.call(
543
- fit_sproc_name,
555
+ sproc_export_file_name = fit_wrapper_sproc(
556
+ session,
544
557
  query,
545
558
  stage_transform_file_name,
546
559
  stage_result_file_name,
547
560
  identifier.get_unescaped_names(self.input_cols),
548
561
  identifier.get_unescaped_names(self.label_cols),
549
562
  identifier.get_unescaped_names(self.sample_weight_col),
550
- statement_params=statement_params,
563
+ statement_params,
551
564
  )
552
565
 
553
566
  if "|" in sproc_export_file_name:
@@ -557,7 +570,7 @@ class GridSearchCV(BaseTransformer):
557
570
  print("\n".join(fields[1:]))
558
571
 
559
572
  session.file.get(
560
- os.path.join(stage_result_file_name, sproc_export_file_name),
573
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
561
574
  local_result_file_name,
562
575
  statement_params=statement_params
563
576
  )
@@ -603,7 +616,7 @@ class GridSearchCV(BaseTransformer):
603
616
 
604
617
  # Register vectorized UDF for batch inference
605
618
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
606
- safe_id=self.id, method=inference_method)
619
+ safe_id=self._get_rand_id(), method=inference_method)
607
620
 
608
621
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
609
622
  # will try to pickle all of self which fails.
@@ -695,7 +708,7 @@ class GridSearchCV(BaseTransformer):
695
708
  return transformed_pandas_df.to_dict("records")
696
709
 
697
710
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
698
- safe_id=self.id
711
+ safe_id=self._get_rand_id()
699
712
  )
700
713
 
701
714
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -862,11 +875,18 @@ class GridSearchCV(BaseTransformer):
862
875
  Transformed dataset.
863
876
  """
864
877
  if isinstance(dataset, DataFrame):
878
+ expected_type_inferred = ""
879
+ # when it is classifier, infer the datatype from label columns
880
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
881
+ expected_type_inferred = convert_sp_to_sf_type(
882
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
883
+ )
884
+
865
885
  output_df = self._batch_inference(
866
886
  dataset=dataset,
867
887
  inference_method="predict",
868
888
  expected_output_cols_list=self.output_cols,
869
- expected_output_cols_type="",
889
+ expected_output_cols_type=expected_type_inferred,
870
890
  )
871
891
  elif isinstance(dataset, pd.DataFrame):
872
892
  output_df = self._sklearn_inference(
@@ -939,10 +959,10 @@ class GridSearchCV(BaseTransformer):
939
959
 
940
960
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
941
961
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
942
- Returns an empty list if current object is not a classifier or not yet fitted.
962
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
943
963
  """
944
964
  if getattr(self._sklearn_object, "classes_", None) is None:
945
- return []
965
+ return [output_cols_prefix]
946
966
 
947
967
  classes = self._sklearn_object.classes_
948
968
  if isinstance(classes, numpy.ndarray):
@@ -1173,7 +1193,7 @@ class GridSearchCV(BaseTransformer):
1173
1193
  cp.dump(self._sklearn_object, local_score_file)
1174
1194
 
1175
1195
  # Create temp stage to run score.
1176
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1196
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1177
1197
  session = dataset._session
1178
1198
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1179
1199
  SqlResultValidator(
@@ -1187,8 +1207,9 @@ class GridSearchCV(BaseTransformer):
1187
1207
  expected_value=f"Stage area {score_stage_name} successfully created."
1188
1208
  ).validate()
1189
1209
 
1190
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1191
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1210
+ # Use posixpath to construct stage paths
1211
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1212
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1192
1213
  statement_params = telemetry.get_function_usage_statement_params(
1193
1214
  project=_PROJECT,
1194
1215
  subproject=_SUBPROJECT,
@@ -1214,6 +1235,7 @@ class GridSearchCV(BaseTransformer):
1214
1235
  replace=True,
1215
1236
  session=session,
1216
1237
  statement_params=statement_params,
1238
+ anonymous=True
1217
1239
  )
1218
1240
  def score_wrapper_sproc(
1219
1241
  session: Session,
@@ -1221,7 +1243,8 @@ class GridSearchCV(BaseTransformer):
1221
1243
  stage_score_file_name: str,
1222
1244
  input_cols: List[str],
1223
1245
  label_cols: List[str],
1224
- sample_weight_col: Optional[str]
1246
+ sample_weight_col: Optional[str],
1247
+ statement_params: Dict[str, str]
1225
1248
  ) -> float:
1226
1249
  import cloudpickle as cp
1227
1250
  import numpy as np
@@ -1271,14 +1294,14 @@ class GridSearchCV(BaseTransformer):
1271
1294
  api_calls=[Session.call],
1272
1295
  custom_tags=dict([("autogen", True)]),
1273
1296
  )
1274
- score = session.call(
1275
- score_sproc_name,
1297
+ score = score_wrapper_sproc(
1298
+ session,
1276
1299
  query,
1277
1300
  stage_score_file_name,
1278
1301
  identifier.get_unescaped_names(self.input_cols),
1279
1302
  identifier.get_unescaped_names(self.label_cols),
1280
1303
  identifier.get_unescaped_names(self.sample_weight_col),
1281
- statement_params=statement_params,
1304
+ statement_params,
1282
1305
  )
1283
1306
 
1284
1307
  cleanup_temp_files([local_score_file_name])
@@ -1296,18 +1319,20 @@ class GridSearchCV(BaseTransformer):
1296
1319
  if self._sklearn_object._estimator_type == 'classifier':
1297
1320
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1298
1321
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1299
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1322
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1323
+ ([] if self._drop_input_cols else inputs) + outputs)
1300
1324
  # For regressor, the type of predict is float64
1301
1325
  elif self._sklearn_object._estimator_type == 'regressor':
1302
1326
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1303
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1304
-
1327
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1328
+ ([] if self._drop_input_cols else inputs) + outputs)
1305
1329
  for prob_func in PROB_FUNCTIONS:
1306
1330
  if hasattr(self, prob_func):
1307
1331
  output_cols_prefix: str = f"{prob_func}_"
1308
1332
  output_column_names = self._get_output_column_names(output_cols_prefix)
1309
1333
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1310
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1334
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1335
+ ([] if self._drop_input_cols else inputs) + outputs)
1311
1336
 
1312
1337
  @property
1313
1338
  def model_signatures(self) -> Dict[str, ModelSignature]:
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -327,7 +329,6 @@ class RandomizedSearchCV(BaseTransformer):
327
329
  sample_weight_col: Optional[str] = None,
328
330
  ) -> None:
329
331
  super().__init__()
330
- self.id = str(uuid4()).replace("-", "_").upper()
331
332
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
332
333
  deps = deps | _gather_dependencies(estimator)
333
334
  self._deps = list(deps)
@@ -358,6 +359,15 @@ class RandomizedSearchCV(BaseTransformer):
358
359
  self.set_drop_input_cols(drop_input_cols)
359
360
  self.set_sample_weight_col(sample_weight_col)
360
361
 
362
+ def _get_rand_id(self) -> str:
363
+ """
364
+ Generate random id to be used in sproc and stage names.
365
+
366
+ Returns:
367
+ Random id string usable in sproc, table, and stage names.
368
+ """
369
+ return str(uuid4()).replace("-", "_").upper()
370
+
361
371
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
362
372
  """
363
373
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -436,7 +446,7 @@ class RandomizedSearchCV(BaseTransformer):
436
446
  cp.dump(self._sklearn_object, local_transform_file)
437
447
 
438
448
  # Create temp stage to run fit.
439
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
449
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
440
450
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
441
451
  SqlResultValidator(
442
452
  session=session,
@@ -449,11 +459,12 @@ class RandomizedSearchCV(BaseTransformer):
449
459
  expected_value=f"Stage area {transform_stage_name} successfully created."
450
460
  ).validate()
451
461
 
452
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
462
+ # Use posixpath to construct stage paths
463
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
464
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
453
465
  local_result_file_name = get_temp_file_path()
454
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
455
466
 
456
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
467
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
457
468
  statement_params = telemetry.get_function_usage_statement_params(
458
469
  project=_PROJECT,
459
470
  subproject=_SUBPROJECT,
@@ -479,6 +490,7 @@ class RandomizedSearchCV(BaseTransformer):
479
490
  replace=True,
480
491
  session=session,
481
492
  statement_params=statement_params,
493
+ anonymous=True
482
494
  )
483
495
  def fit_wrapper_sproc(
484
496
  session: Session,
@@ -487,7 +499,8 @@ class RandomizedSearchCV(BaseTransformer):
487
499
  stage_result_file_name: str,
488
500
  input_cols: List[str],
489
501
  label_cols: List[str],
490
- sample_weight_col: Optional[str]
502
+ sample_weight_col: Optional[str],
503
+ statement_params: Dict[str, str]
491
504
  ) -> str:
492
505
  import cloudpickle as cp
493
506
  import numpy as np
@@ -554,15 +567,15 @@ class RandomizedSearchCV(BaseTransformer):
554
567
  api_calls=[Session.call],
555
568
  custom_tags=dict([("autogen", True)]),
556
569
  )
557
- sproc_export_file_name = session.call(
558
- fit_sproc_name,
570
+ sproc_export_file_name = fit_wrapper_sproc(
571
+ session,
559
572
  query,
560
573
  stage_transform_file_name,
561
574
  stage_result_file_name,
562
575
  identifier.get_unescaped_names(self.input_cols),
563
576
  identifier.get_unescaped_names(self.label_cols),
564
577
  identifier.get_unescaped_names(self.sample_weight_col),
565
- statement_params=statement_params,
578
+ statement_params,
566
579
  )
567
580
 
568
581
  if "|" in sproc_export_file_name:
@@ -572,7 +585,7 @@ class RandomizedSearchCV(BaseTransformer):
572
585
  print("\n".join(fields[1:]))
573
586
 
574
587
  session.file.get(
575
- os.path.join(stage_result_file_name, sproc_export_file_name),
588
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
576
589
  local_result_file_name,
577
590
  statement_params=statement_params
578
591
  )
@@ -618,7 +631,7 @@ class RandomizedSearchCV(BaseTransformer):
618
631
 
619
632
  # Register vectorized UDF for batch inference
620
633
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
621
- safe_id=self.id, method=inference_method)
634
+ safe_id=self._get_rand_id(), method=inference_method)
622
635
 
623
636
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
624
637
  # will try to pickle all of self which fails.
@@ -710,7 +723,7 @@ class RandomizedSearchCV(BaseTransformer):
710
723
  return transformed_pandas_df.to_dict("records")
711
724
 
712
725
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
713
- safe_id=self.id
726
+ safe_id=self._get_rand_id()
714
727
  )
715
728
 
716
729
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -877,11 +890,18 @@ class RandomizedSearchCV(BaseTransformer):
877
890
  Transformed dataset.
878
891
  """
879
892
  if isinstance(dataset, DataFrame):
893
+ expected_type_inferred = ""
894
+ # when it is classifier, infer the datatype from label columns
895
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
896
+ expected_type_inferred = convert_sp_to_sf_type(
897
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
898
+ )
899
+
880
900
  output_df = self._batch_inference(
881
901
  dataset=dataset,
882
902
  inference_method="predict",
883
903
  expected_output_cols_list=self.output_cols,
884
- expected_output_cols_type="",
904
+ expected_output_cols_type=expected_type_inferred,
885
905
  )
886
906
  elif isinstance(dataset, pd.DataFrame):
887
907
  output_df = self._sklearn_inference(
@@ -954,10 +974,10 @@ class RandomizedSearchCV(BaseTransformer):
954
974
 
955
975
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
956
976
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
957
- Returns an empty list if current object is not a classifier or not yet fitted.
977
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
958
978
  """
959
979
  if getattr(self._sklearn_object, "classes_", None) is None:
960
- return []
980
+ return [output_cols_prefix]
961
981
 
962
982
  classes = self._sklearn_object.classes_
963
983
  if isinstance(classes, numpy.ndarray):
@@ -1188,7 +1208,7 @@ class RandomizedSearchCV(BaseTransformer):
1188
1208
  cp.dump(self._sklearn_object, local_score_file)
1189
1209
 
1190
1210
  # Create temp stage to run score.
1191
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1211
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1192
1212
  session = dataset._session
1193
1213
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1194
1214
  SqlResultValidator(
@@ -1202,8 +1222,9 @@ class RandomizedSearchCV(BaseTransformer):
1202
1222
  expected_value=f"Stage area {score_stage_name} successfully created."
1203
1223
  ).validate()
1204
1224
 
1205
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1206
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1225
+ # Use posixpath to construct stage paths
1226
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1227
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1207
1228
  statement_params = telemetry.get_function_usage_statement_params(
1208
1229
  project=_PROJECT,
1209
1230
  subproject=_SUBPROJECT,
@@ -1229,6 +1250,7 @@ class RandomizedSearchCV(BaseTransformer):
1229
1250
  replace=True,
1230
1251
  session=session,
1231
1252
  statement_params=statement_params,
1253
+ anonymous=True
1232
1254
  )
1233
1255
  def score_wrapper_sproc(
1234
1256
  session: Session,
@@ -1236,7 +1258,8 @@ class RandomizedSearchCV(BaseTransformer):
1236
1258
  stage_score_file_name: str,
1237
1259
  input_cols: List[str],
1238
1260
  label_cols: List[str],
1239
- sample_weight_col: Optional[str]
1261
+ sample_weight_col: Optional[str],
1262
+ statement_params: Dict[str, str]
1240
1263
  ) -> float:
1241
1264
  import cloudpickle as cp
1242
1265
  import numpy as np
@@ -1286,14 +1309,14 @@ class RandomizedSearchCV(BaseTransformer):
1286
1309
  api_calls=[Session.call],
1287
1310
  custom_tags=dict([("autogen", True)]),
1288
1311
  )
1289
- score = session.call(
1290
- score_sproc_name,
1312
+ score = score_wrapper_sproc(
1313
+ session,
1291
1314
  query,
1292
1315
  stage_score_file_name,
1293
1316
  identifier.get_unescaped_names(self.input_cols),
1294
1317
  identifier.get_unescaped_names(self.label_cols),
1295
1318
  identifier.get_unescaped_names(self.sample_weight_col),
1296
- statement_params=statement_params,
1319
+ statement_params,
1297
1320
  )
1298
1321
 
1299
1322
  cleanup_temp_files([local_score_file_name])
@@ -1311,18 +1334,20 @@ class RandomizedSearchCV(BaseTransformer):
1311
1334
  if self._sklearn_object._estimator_type == 'classifier':
1312
1335
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1313
1336
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1314
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1337
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1338
+ ([] if self._drop_input_cols else inputs) + outputs)
1315
1339
  # For regressor, the type of predict is float64
1316
1340
  elif self._sklearn_object._estimator_type == 'regressor':
1317
1341
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1318
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1319
-
1342
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1343
+ ([] if self._drop_input_cols else inputs) + outputs)
1320
1344
  for prob_func in PROB_FUNCTIONS:
1321
1345
  if hasattr(self, prob_func):
1322
1346
  output_cols_prefix: str = f"{prob_func}_"
1323
1347
  output_column_names = self._get_output_column_names(output_cols_prefix)
1324
1348
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1325
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1349
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1350
+ ([] if self._drop_input_cols else inputs) + outputs)
1326
1351
 
1327
1352
  @property
1328
1353
  def model_signatures(self) -> Dict[str, ModelSignature]: