snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. snowflake/ml/_internal/env_utils.py +2 -1
  2. snowflake/ml/_internal/file_utils.py +29 -7
  3. snowflake/ml/_internal/telemetry.py +5 -8
  4. snowflake/ml/_internal/utils/uri.py +7 -2
  5. snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
  6. snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
  7. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
  8. snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
  9. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
  10. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
  11. snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
  12. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
  13. snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
  14. snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
  15. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
  16. snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
  17. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
  18. snowflake/ml/model/_deployer.py +14 -27
  19. snowflake/ml/model/_env.py +4 -4
  20. snowflake/ml/model/_handlers/custom.py +14 -2
  21. snowflake/ml/model/_handlers/pytorch.py +186 -0
  22. snowflake/ml/model/_handlers/sklearn.py +14 -9
  23. snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
  24. snowflake/ml/model/_handlers/torchscript.py +180 -0
  25. snowflake/ml/model/_handlers/xgboost.py +19 -9
  26. snowflake/ml/model/_model.py +3 -2
  27. snowflake/ml/model/_model_meta.py +12 -7
  28. snowflake/ml/model/model_signature.py +446 -66
  29. snowflake/ml/model/type_hints.py +23 -4
  30. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
  31. snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
  32. snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
  33. snowflake/ml/modeling/cluster/birch.py +51 -26
  34. snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
  35. snowflake/ml/modeling/cluster/dbscan.py +51 -26
  36. snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
  37. snowflake/ml/modeling/cluster/k_means.py +51 -26
  38. snowflake/ml/modeling/cluster/mean_shift.py +51 -26
  39. snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
  40. snowflake/ml/modeling/cluster/optics.py +51 -26
  41. snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
  42. snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
  43. snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
  44. snowflake/ml/modeling/compose/column_transformer.py +51 -26
  45. snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
  46. snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
  47. snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
  48. snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
  49. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
  50. snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
  51. snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
  52. snowflake/ml/modeling/covariance/oas.py +51 -26
  53. snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
  54. snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
  55. snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
  56. snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
  57. snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
  58. snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
  59. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
  60. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
  61. snowflake/ml/modeling/decomposition/pca.py +51 -26
  62. snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
  63. snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
  64. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
  65. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
  66. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
  67. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
  68. snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
  69. snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
  70. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
  71. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
  72. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
  73. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
  74. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
  75. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
  76. snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
  77. snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
  78. snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
  79. snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
  80. snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
  81. snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
  82. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
  83. snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
  84. snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
  85. snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
  86. snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
  87. snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
  88. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
  89. snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
  90. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
  91. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
  92. snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
  93. snowflake/ml/modeling/impute/knn_imputer.py +51 -26
  94. snowflake/ml/modeling/impute/missing_indicator.py +51 -26
  95. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
  96. snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
  97. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
  98. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
  99. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
  100. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
  101. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
  102. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
  103. snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
  104. snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
  105. snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
  106. snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
  107. snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
  108. snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
  109. snowflake/ml/modeling/linear_model/lars.py +51 -26
  110. snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
  111. snowflake/ml/modeling/linear_model/lasso.py +51 -26
  112. snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
  113. snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
  114. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
  115. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
  116. snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
  117. snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
  118. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
  119. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
  120. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
  121. snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
  122. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
  123. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
  124. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
  125. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
  126. snowflake/ml/modeling/linear_model/perceptron.py +51 -26
  127. snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
  128. snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
  129. snowflake/ml/modeling/linear_model/ridge.py +51 -26
  130. snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
  131. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
  132. snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
  133. snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
  134. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
  135. snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
  136. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
  137. snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
  138. snowflake/ml/modeling/manifold/isomap.py +51 -26
  139. snowflake/ml/modeling/manifold/mds.py +51 -26
  140. snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
  141. snowflake/ml/modeling/manifold/tsne.py +51 -26
  142. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
  143. snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
  144. snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
  145. snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
  146. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
  147. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
  148. snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
  149. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
  150. snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
  151. snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
  152. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
  153. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
  154. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
  155. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
  156. snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
  157. snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
  158. snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
  159. snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
  160. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
  161. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
  162. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
  163. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
  164. snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
  165. snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
  166. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
  167. snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
  168. snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
  169. snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
  170. snowflake/ml/modeling/svm/linear_svc.py +51 -26
  171. snowflake/ml/modeling/svm/linear_svr.py +51 -26
  172. snowflake/ml/modeling/svm/nu_svc.py +51 -26
  173. snowflake/ml/modeling/svm/nu_svr.py +51 -26
  174. snowflake/ml/modeling/svm/svc.py +51 -26
  175. snowflake/ml/modeling/svm/svr.py +51 -26
  176. snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
  177. snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
  178. snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
  179. snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
  180. snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
  181. snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
  182. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
  183. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
  184. snowflake/ml/registry/model_registry.py +74 -56
  185. snowflake/ml/version.py +1 -1
  186. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
  187. snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
  188. snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
  189. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -190,7 +192,6 @@ class OAS(BaseTransformer):
190
192
  sample_weight_col: Optional[str] = None,
191
193
  ) -> None:
192
194
  super().__init__()
193
- self.id = str(uuid4()).replace("-", "_").upper()
194
195
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
195
196
 
196
197
  self._deps = list(deps)
@@ -211,6 +212,15 @@ class OAS(BaseTransformer):
211
212
  self.set_drop_input_cols(drop_input_cols)
212
213
  self.set_sample_weight_col(sample_weight_col)
213
214
 
215
+ def _get_rand_id(self) -> str:
216
+ """
217
+ Generate random id to be used in sproc and stage names.
218
+
219
+ Returns:
220
+ Random id string usable in sproc, table, and stage names.
221
+ """
222
+ return str(uuid4()).replace("-", "_").upper()
223
+
214
224
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
215
225
  """
216
226
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -289,7 +299,7 @@ class OAS(BaseTransformer):
289
299
  cp.dump(self._sklearn_object, local_transform_file)
290
300
 
291
301
  # Create temp stage to run fit.
292
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
302
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
293
303
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
294
304
  SqlResultValidator(
295
305
  session=session,
@@ -302,11 +312,12 @@ class OAS(BaseTransformer):
302
312
  expected_value=f"Stage area {transform_stage_name} successfully created."
303
313
  ).validate()
304
314
 
305
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
315
+ # Use posixpath to construct stage paths
316
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
317
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
306
318
  local_result_file_name = get_temp_file_path()
307
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
308
319
 
309
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
320
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
310
321
  statement_params = telemetry.get_function_usage_statement_params(
311
322
  project=_PROJECT,
312
323
  subproject=_SUBPROJECT,
@@ -332,6 +343,7 @@ class OAS(BaseTransformer):
332
343
  replace=True,
333
344
  session=session,
334
345
  statement_params=statement_params,
346
+ anonymous=True
335
347
  )
336
348
  def fit_wrapper_sproc(
337
349
  session: Session,
@@ -340,7 +352,8 @@ class OAS(BaseTransformer):
340
352
  stage_result_file_name: str,
341
353
  input_cols: List[str],
342
354
  label_cols: List[str],
343
- sample_weight_col: Optional[str]
355
+ sample_weight_col: Optional[str],
356
+ statement_params: Dict[str, str]
344
357
  ) -> str:
345
358
  import cloudpickle as cp
346
359
  import numpy as np
@@ -407,15 +420,15 @@ class OAS(BaseTransformer):
407
420
  api_calls=[Session.call],
408
421
  custom_tags=dict([("autogen", True)]),
409
422
  )
410
- sproc_export_file_name = session.call(
411
- fit_sproc_name,
423
+ sproc_export_file_name = fit_wrapper_sproc(
424
+ session,
412
425
  query,
413
426
  stage_transform_file_name,
414
427
  stage_result_file_name,
415
428
  identifier.get_unescaped_names(self.input_cols),
416
429
  identifier.get_unescaped_names(self.label_cols),
417
430
  identifier.get_unescaped_names(self.sample_weight_col),
418
- statement_params=statement_params,
431
+ statement_params,
419
432
  )
420
433
 
421
434
  if "|" in sproc_export_file_name:
@@ -425,7 +438,7 @@ class OAS(BaseTransformer):
425
438
  print("\n".join(fields[1:]))
426
439
 
427
440
  session.file.get(
428
- os.path.join(stage_result_file_name, sproc_export_file_name),
441
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
429
442
  local_result_file_name,
430
443
  statement_params=statement_params
431
444
  )
@@ -471,7 +484,7 @@ class OAS(BaseTransformer):
471
484
 
472
485
  # Register vectorized UDF for batch inference
473
486
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
474
- safe_id=self.id, method=inference_method)
487
+ safe_id=self._get_rand_id(), method=inference_method)
475
488
 
476
489
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
477
490
  # will try to pickle all of self which fails.
@@ -563,7 +576,7 @@ class OAS(BaseTransformer):
563
576
  return transformed_pandas_df.to_dict("records")
564
577
 
565
578
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
566
- safe_id=self.id
579
+ safe_id=self._get_rand_id()
567
580
  )
568
581
 
569
582
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -728,11 +741,18 @@ class OAS(BaseTransformer):
728
741
  Transformed dataset.
729
742
  """
730
743
  if isinstance(dataset, DataFrame):
744
+ expected_type_inferred = ""
745
+ # when it is classifier, infer the datatype from label columns
746
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
747
+ expected_type_inferred = convert_sp_to_sf_type(
748
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
749
+ )
750
+
731
751
  output_df = self._batch_inference(
732
752
  dataset=dataset,
733
753
  inference_method="predict",
734
754
  expected_output_cols_list=self.output_cols,
735
- expected_output_cols_type="",
755
+ expected_output_cols_type=expected_type_inferred,
736
756
  )
737
757
  elif isinstance(dataset, pd.DataFrame):
738
758
  output_df = self._sklearn_inference(
@@ -803,10 +823,10 @@ class OAS(BaseTransformer):
803
823
 
804
824
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
805
825
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
806
- Returns an empty list if current object is not a classifier or not yet fitted.
826
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
807
827
  """
808
828
  if getattr(self._sklearn_object, "classes_", None) is None:
809
- return []
829
+ return [output_cols_prefix]
810
830
 
811
831
  classes = self._sklearn_object.classes_
812
832
  if isinstance(classes, numpy.ndarray):
@@ -1031,7 +1051,7 @@ class OAS(BaseTransformer):
1031
1051
  cp.dump(self._sklearn_object, local_score_file)
1032
1052
 
1033
1053
  # Create temp stage to run score.
1034
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1054
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1035
1055
  session = dataset._session
1036
1056
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1037
1057
  SqlResultValidator(
@@ -1045,8 +1065,9 @@ class OAS(BaseTransformer):
1045
1065
  expected_value=f"Stage area {score_stage_name} successfully created."
1046
1066
  ).validate()
1047
1067
 
1048
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1049
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1068
+ # Use posixpath to construct stage paths
1069
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1070
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1050
1071
  statement_params = telemetry.get_function_usage_statement_params(
1051
1072
  project=_PROJECT,
1052
1073
  subproject=_SUBPROJECT,
@@ -1072,6 +1093,7 @@ class OAS(BaseTransformer):
1072
1093
  replace=True,
1073
1094
  session=session,
1074
1095
  statement_params=statement_params,
1096
+ anonymous=True
1075
1097
  )
1076
1098
  def score_wrapper_sproc(
1077
1099
  session: Session,
@@ -1079,7 +1101,8 @@ class OAS(BaseTransformer):
1079
1101
  stage_score_file_name: str,
1080
1102
  input_cols: List[str],
1081
1103
  label_cols: List[str],
1082
- sample_weight_col: Optional[str]
1104
+ sample_weight_col: Optional[str],
1105
+ statement_params: Dict[str, str]
1083
1106
  ) -> float:
1084
1107
  import cloudpickle as cp
1085
1108
  import numpy as np
@@ -1129,14 +1152,14 @@ class OAS(BaseTransformer):
1129
1152
  api_calls=[Session.call],
1130
1153
  custom_tags=dict([("autogen", True)]),
1131
1154
  )
1132
- score = session.call(
1133
- score_sproc_name,
1155
+ score = score_wrapper_sproc(
1156
+ session,
1134
1157
  query,
1135
1158
  stage_score_file_name,
1136
1159
  identifier.get_unescaped_names(self.input_cols),
1137
1160
  identifier.get_unescaped_names(self.label_cols),
1138
1161
  identifier.get_unescaped_names(self.sample_weight_col),
1139
- statement_params=statement_params,
1162
+ statement_params,
1140
1163
  )
1141
1164
 
1142
1165
  cleanup_temp_files([local_score_file_name])
@@ -1154,18 +1177,20 @@ class OAS(BaseTransformer):
1154
1177
  if self._sklearn_object._estimator_type == 'classifier':
1155
1178
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1156
1179
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1157
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1180
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1181
+ ([] if self._drop_input_cols else inputs) + outputs)
1158
1182
  # For regressor, the type of predict is float64
1159
1183
  elif self._sklearn_object._estimator_type == 'regressor':
1160
1184
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1161
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1162
-
1185
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1186
+ ([] if self._drop_input_cols else inputs) + outputs)
1163
1187
  for prob_func in PROB_FUNCTIONS:
1164
1188
  if hasattr(self, prob_func):
1165
1189
  output_cols_prefix: str = f"{prob_func}_"
1166
1190
  output_column_names = self._get_output_column_names(output_cols_prefix)
1167
1191
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1168
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1192
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1193
+ ([] if self._drop_input_cols else inputs) + outputs)
1169
1194
 
1170
1195
  @property
1171
1196
  def model_signatures(self) -> Dict[str, ModelSignature]:
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -195,7 +197,6 @@ class ShrunkCovariance(BaseTransformer):
195
197
  sample_weight_col: Optional[str] = None,
196
198
  ) -> None:
197
199
  super().__init__()
198
- self.id = str(uuid4()).replace("-", "_").upper()
199
200
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
200
201
 
201
202
  self._deps = list(deps)
@@ -217,6 +218,15 @@ class ShrunkCovariance(BaseTransformer):
217
218
  self.set_drop_input_cols(drop_input_cols)
218
219
  self.set_sample_weight_col(sample_weight_col)
219
220
 
221
+ def _get_rand_id(self) -> str:
222
+ """
223
+ Generate random id to be used in sproc and stage names.
224
+
225
+ Returns:
226
+ Random id string usable in sproc, table, and stage names.
227
+ """
228
+ return str(uuid4()).replace("-", "_").upper()
229
+
220
230
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
221
231
  """
222
232
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -295,7 +305,7 @@ class ShrunkCovariance(BaseTransformer):
295
305
  cp.dump(self._sklearn_object, local_transform_file)
296
306
 
297
307
  # Create temp stage to run fit.
298
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
308
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
299
309
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
300
310
  SqlResultValidator(
301
311
  session=session,
@@ -308,11 +318,12 @@ class ShrunkCovariance(BaseTransformer):
308
318
  expected_value=f"Stage area {transform_stage_name} successfully created."
309
319
  ).validate()
310
320
 
311
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
321
+ # Use posixpath to construct stage paths
322
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
323
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
312
324
  local_result_file_name = get_temp_file_path()
313
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
314
325
 
315
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
326
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
316
327
  statement_params = telemetry.get_function_usage_statement_params(
317
328
  project=_PROJECT,
318
329
  subproject=_SUBPROJECT,
@@ -338,6 +349,7 @@ class ShrunkCovariance(BaseTransformer):
338
349
  replace=True,
339
350
  session=session,
340
351
  statement_params=statement_params,
352
+ anonymous=True
341
353
  )
342
354
  def fit_wrapper_sproc(
343
355
  session: Session,
@@ -346,7 +358,8 @@ class ShrunkCovariance(BaseTransformer):
346
358
  stage_result_file_name: str,
347
359
  input_cols: List[str],
348
360
  label_cols: List[str],
349
- sample_weight_col: Optional[str]
361
+ sample_weight_col: Optional[str],
362
+ statement_params: Dict[str, str]
350
363
  ) -> str:
351
364
  import cloudpickle as cp
352
365
  import numpy as np
@@ -413,15 +426,15 @@ class ShrunkCovariance(BaseTransformer):
413
426
  api_calls=[Session.call],
414
427
  custom_tags=dict([("autogen", True)]),
415
428
  )
416
- sproc_export_file_name = session.call(
417
- fit_sproc_name,
429
+ sproc_export_file_name = fit_wrapper_sproc(
430
+ session,
418
431
  query,
419
432
  stage_transform_file_name,
420
433
  stage_result_file_name,
421
434
  identifier.get_unescaped_names(self.input_cols),
422
435
  identifier.get_unescaped_names(self.label_cols),
423
436
  identifier.get_unescaped_names(self.sample_weight_col),
424
- statement_params=statement_params,
437
+ statement_params,
425
438
  )
426
439
 
427
440
  if "|" in sproc_export_file_name:
@@ -431,7 +444,7 @@ class ShrunkCovariance(BaseTransformer):
431
444
  print("\n".join(fields[1:]))
432
445
 
433
446
  session.file.get(
434
- os.path.join(stage_result_file_name, sproc_export_file_name),
447
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
435
448
  local_result_file_name,
436
449
  statement_params=statement_params
437
450
  )
@@ -477,7 +490,7 @@ class ShrunkCovariance(BaseTransformer):
477
490
 
478
491
  # Register vectorized UDF for batch inference
479
492
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
480
- safe_id=self.id, method=inference_method)
493
+ safe_id=self._get_rand_id(), method=inference_method)
481
494
 
482
495
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
483
496
  # will try to pickle all of self which fails.
@@ -569,7 +582,7 @@ class ShrunkCovariance(BaseTransformer):
569
582
  return transformed_pandas_df.to_dict("records")
570
583
 
571
584
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
572
- safe_id=self.id
585
+ safe_id=self._get_rand_id()
573
586
  )
574
587
 
575
588
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -734,11 +747,18 @@ class ShrunkCovariance(BaseTransformer):
734
747
  Transformed dataset.
735
748
  """
736
749
  if isinstance(dataset, DataFrame):
750
+ expected_type_inferred = ""
751
+ # when it is classifier, infer the datatype from label columns
752
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
753
+ expected_type_inferred = convert_sp_to_sf_type(
754
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
755
+ )
756
+
737
757
  output_df = self._batch_inference(
738
758
  dataset=dataset,
739
759
  inference_method="predict",
740
760
  expected_output_cols_list=self.output_cols,
741
- expected_output_cols_type="",
761
+ expected_output_cols_type=expected_type_inferred,
742
762
  )
743
763
  elif isinstance(dataset, pd.DataFrame):
744
764
  output_df = self._sklearn_inference(
@@ -809,10 +829,10 @@ class ShrunkCovariance(BaseTransformer):
809
829
 
810
830
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
811
831
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
812
- Returns an empty list if current object is not a classifier or not yet fitted.
832
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
813
833
  """
814
834
  if getattr(self._sklearn_object, "classes_", None) is None:
815
- return []
835
+ return [output_cols_prefix]
816
836
 
817
837
  classes = self._sklearn_object.classes_
818
838
  if isinstance(classes, numpy.ndarray):
@@ -1037,7 +1057,7 @@ class ShrunkCovariance(BaseTransformer):
1037
1057
  cp.dump(self._sklearn_object, local_score_file)
1038
1058
 
1039
1059
  # Create temp stage to run score.
1040
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1060
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1041
1061
  session = dataset._session
1042
1062
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1043
1063
  SqlResultValidator(
@@ -1051,8 +1071,9 @@ class ShrunkCovariance(BaseTransformer):
1051
1071
  expected_value=f"Stage area {score_stage_name} successfully created."
1052
1072
  ).validate()
1053
1073
 
1054
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1055
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1074
+ # Use posixpath to construct stage paths
1075
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1076
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1056
1077
  statement_params = telemetry.get_function_usage_statement_params(
1057
1078
  project=_PROJECT,
1058
1079
  subproject=_SUBPROJECT,
@@ -1078,6 +1099,7 @@ class ShrunkCovariance(BaseTransformer):
1078
1099
  replace=True,
1079
1100
  session=session,
1080
1101
  statement_params=statement_params,
1102
+ anonymous=True
1081
1103
  )
1082
1104
  def score_wrapper_sproc(
1083
1105
  session: Session,
@@ -1085,7 +1107,8 @@ class ShrunkCovariance(BaseTransformer):
1085
1107
  stage_score_file_name: str,
1086
1108
  input_cols: List[str],
1087
1109
  label_cols: List[str],
1088
- sample_weight_col: Optional[str]
1110
+ sample_weight_col: Optional[str],
1111
+ statement_params: Dict[str, str]
1089
1112
  ) -> float:
1090
1113
  import cloudpickle as cp
1091
1114
  import numpy as np
@@ -1135,14 +1158,14 @@ class ShrunkCovariance(BaseTransformer):
1135
1158
  api_calls=[Session.call],
1136
1159
  custom_tags=dict([("autogen", True)]),
1137
1160
  )
1138
- score = session.call(
1139
- score_sproc_name,
1161
+ score = score_wrapper_sproc(
1162
+ session,
1140
1163
  query,
1141
1164
  stage_score_file_name,
1142
1165
  identifier.get_unescaped_names(self.input_cols),
1143
1166
  identifier.get_unescaped_names(self.label_cols),
1144
1167
  identifier.get_unescaped_names(self.sample_weight_col),
1145
- statement_params=statement_params,
1168
+ statement_params,
1146
1169
  )
1147
1170
 
1148
1171
  cleanup_temp_files([local_score_file_name])
@@ -1160,18 +1183,20 @@ class ShrunkCovariance(BaseTransformer):
1160
1183
  if self._sklearn_object._estimator_type == 'classifier':
1161
1184
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1162
1185
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1163
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1186
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1187
+ ([] if self._drop_input_cols else inputs) + outputs)
1164
1188
  # For regressor, the type of predict is float64
1165
1189
  elif self._sklearn_object._estimator_type == 'regressor':
1166
1190
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1167
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1168
-
1191
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1192
+ ([] if self._drop_input_cols else inputs) + outputs)
1169
1193
  for prob_func in PROB_FUNCTIONS:
1170
1194
  if hasattr(self, prob_func):
1171
1195
  output_cols_prefix: str = f"{prob_func}_"
1172
1196
  output_column_names = self._get_output_column_names(output_cols_prefix)
1173
1197
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1174
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1198
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1199
+ ([] if self._drop_input_cols else inputs) + outputs)
1175
1200
 
1176
1201
  @property
1177
1202
  def model_signatures(self) -> Dict[str, ModelSignature]: