snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. snowflake/ml/_internal/env_utils.py +2 -1
  2. snowflake/ml/_internal/file_utils.py +29 -7
  3. snowflake/ml/_internal/telemetry.py +5 -8
  4. snowflake/ml/_internal/utils/uri.py +7 -2
  5. snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
  6. snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
  7. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
  8. snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
  9. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
  10. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
  11. snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
  12. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
  13. snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
  14. snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
  15. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
  16. snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
  17. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
  18. snowflake/ml/model/_deployer.py +14 -27
  19. snowflake/ml/model/_env.py +4 -4
  20. snowflake/ml/model/_handlers/custom.py +14 -2
  21. snowflake/ml/model/_handlers/pytorch.py +186 -0
  22. snowflake/ml/model/_handlers/sklearn.py +14 -9
  23. snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
  24. snowflake/ml/model/_handlers/torchscript.py +180 -0
  25. snowflake/ml/model/_handlers/xgboost.py +19 -9
  26. snowflake/ml/model/_model.py +3 -2
  27. snowflake/ml/model/_model_meta.py +12 -7
  28. snowflake/ml/model/model_signature.py +446 -66
  29. snowflake/ml/model/type_hints.py +23 -4
  30. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
  31. snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
  32. snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
  33. snowflake/ml/modeling/cluster/birch.py +51 -26
  34. snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
  35. snowflake/ml/modeling/cluster/dbscan.py +51 -26
  36. snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
  37. snowflake/ml/modeling/cluster/k_means.py +51 -26
  38. snowflake/ml/modeling/cluster/mean_shift.py +51 -26
  39. snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
  40. snowflake/ml/modeling/cluster/optics.py +51 -26
  41. snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
  42. snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
  43. snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
  44. snowflake/ml/modeling/compose/column_transformer.py +51 -26
  45. snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
  46. snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
  47. snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
  48. snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
  49. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
  50. snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
  51. snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
  52. snowflake/ml/modeling/covariance/oas.py +51 -26
  53. snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
  54. snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
  55. snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
  56. snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
  57. snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
  58. snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
  59. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
  60. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
  61. snowflake/ml/modeling/decomposition/pca.py +51 -26
  62. snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
  63. snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
  64. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
  65. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
  66. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
  67. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
  68. snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
  69. snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
  70. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
  71. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
  72. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
  73. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
  74. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
  75. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
  76. snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
  77. snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
  78. snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
  79. snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
  80. snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
  81. snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
  82. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
  83. snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
  84. snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
  85. snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
  86. snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
  87. snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
  88. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
  89. snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
  90. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
  91. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
  92. snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
  93. snowflake/ml/modeling/impute/knn_imputer.py +51 -26
  94. snowflake/ml/modeling/impute/missing_indicator.py +51 -26
  95. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
  96. snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
  97. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
  98. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
  99. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
  100. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
  101. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
  102. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
  103. snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
  104. snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
  105. snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
  106. snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
  107. snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
  108. snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
  109. snowflake/ml/modeling/linear_model/lars.py +51 -26
  110. snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
  111. snowflake/ml/modeling/linear_model/lasso.py +51 -26
  112. snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
  113. snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
  114. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
  115. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
  116. snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
  117. snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
  118. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
  119. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
  120. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
  121. snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
  122. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
  123. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
  124. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
  125. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
  126. snowflake/ml/modeling/linear_model/perceptron.py +51 -26
  127. snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
  128. snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
  129. snowflake/ml/modeling/linear_model/ridge.py +51 -26
  130. snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
  131. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
  132. snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
  133. snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
  134. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
  135. snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
  136. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
  137. snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
  138. snowflake/ml/modeling/manifold/isomap.py +51 -26
  139. snowflake/ml/modeling/manifold/mds.py +51 -26
  140. snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
  141. snowflake/ml/modeling/manifold/tsne.py +51 -26
  142. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
  143. snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
  144. snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
  145. snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
  146. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
  147. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
  148. snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
  149. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
  150. snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
  151. snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
  152. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
  153. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
  154. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
  155. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
  156. snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
  157. snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
  158. snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
  159. snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
  160. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
  161. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
  162. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
  163. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
  164. snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
  165. snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
  166. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
  167. snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
  168. snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
  169. snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
  170. snowflake/ml/modeling/svm/linear_svc.py +51 -26
  171. snowflake/ml/modeling/svm/linear_svr.py +51 -26
  172. snowflake/ml/modeling/svm/nu_svc.py +51 -26
  173. snowflake/ml/modeling/svm/nu_svr.py +51 -26
  174. snowflake/ml/modeling/svm/svc.py +51 -26
  175. snowflake/ml/modeling/svm/svr.py +51 -26
  176. snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
  177. snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
  178. snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
  179. snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
  180. snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
  181. snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
  182. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
  183. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
  184. snowflake/ml/registry/model_registry.py +74 -56
  185. snowflake/ml/version.py +1 -1
  186. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
  187. snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
  188. snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
  189. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -197,7 +199,6 @@ class RBFSampler(BaseTransformer):
197
199
  sample_weight_col: Optional[str] = None,
198
200
  ) -> None:
199
201
  super().__init__()
200
- self.id = str(uuid4()).replace("-", "_").upper()
201
202
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
202
203
 
203
204
  self._deps = list(deps)
@@ -219,6 +220,15 @@ class RBFSampler(BaseTransformer):
219
220
  self.set_drop_input_cols(drop_input_cols)
220
221
  self.set_sample_weight_col(sample_weight_col)
221
222
 
223
+ def _get_rand_id(self) -> str:
224
+ """
225
+ Generate random id to be used in sproc and stage names.
226
+
227
+ Returns:
228
+ Random id string usable in sproc, table, and stage names.
229
+ """
230
+ return str(uuid4()).replace("-", "_").upper()
231
+
222
232
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
223
233
  """
224
234
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -297,7 +307,7 @@ class RBFSampler(BaseTransformer):
297
307
  cp.dump(self._sklearn_object, local_transform_file)
298
308
 
299
309
  # Create temp stage to run fit.
300
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
310
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
301
311
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
302
312
  SqlResultValidator(
303
313
  session=session,
@@ -310,11 +320,12 @@ class RBFSampler(BaseTransformer):
310
320
  expected_value=f"Stage area {transform_stage_name} successfully created."
311
321
  ).validate()
312
322
 
313
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
323
+ # Use posixpath to construct stage paths
324
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
325
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
314
326
  local_result_file_name = get_temp_file_path()
315
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
316
327
 
317
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
328
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
318
329
  statement_params = telemetry.get_function_usage_statement_params(
319
330
  project=_PROJECT,
320
331
  subproject=_SUBPROJECT,
@@ -340,6 +351,7 @@ class RBFSampler(BaseTransformer):
340
351
  replace=True,
341
352
  session=session,
342
353
  statement_params=statement_params,
354
+ anonymous=True
343
355
  )
344
356
  def fit_wrapper_sproc(
345
357
  session: Session,
@@ -348,7 +360,8 @@ class RBFSampler(BaseTransformer):
348
360
  stage_result_file_name: str,
349
361
  input_cols: List[str],
350
362
  label_cols: List[str],
351
- sample_weight_col: Optional[str]
363
+ sample_weight_col: Optional[str],
364
+ statement_params: Dict[str, str]
352
365
  ) -> str:
353
366
  import cloudpickle as cp
354
367
  import numpy as np
@@ -415,15 +428,15 @@ class RBFSampler(BaseTransformer):
415
428
  api_calls=[Session.call],
416
429
  custom_tags=dict([("autogen", True)]),
417
430
  )
418
- sproc_export_file_name = session.call(
419
- fit_sproc_name,
431
+ sproc_export_file_name = fit_wrapper_sproc(
432
+ session,
420
433
  query,
421
434
  stage_transform_file_name,
422
435
  stage_result_file_name,
423
436
  identifier.get_unescaped_names(self.input_cols),
424
437
  identifier.get_unescaped_names(self.label_cols),
425
438
  identifier.get_unescaped_names(self.sample_weight_col),
426
- statement_params=statement_params,
439
+ statement_params,
427
440
  )
428
441
 
429
442
  if "|" in sproc_export_file_name:
@@ -433,7 +446,7 @@ class RBFSampler(BaseTransformer):
433
446
  print("\n".join(fields[1:]))
434
447
 
435
448
  session.file.get(
436
- os.path.join(stage_result_file_name, sproc_export_file_name),
449
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
437
450
  local_result_file_name,
438
451
  statement_params=statement_params
439
452
  )
@@ -479,7 +492,7 @@ class RBFSampler(BaseTransformer):
479
492
 
480
493
  # Register vectorized UDF for batch inference
481
494
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
482
- safe_id=self.id, method=inference_method)
495
+ safe_id=self._get_rand_id(), method=inference_method)
483
496
 
484
497
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
485
498
  # will try to pickle all of self which fails.
@@ -571,7 +584,7 @@ class RBFSampler(BaseTransformer):
571
584
  return transformed_pandas_df.to_dict("records")
572
585
 
573
586
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
574
- safe_id=self.id
587
+ safe_id=self._get_rand_id()
575
588
  )
576
589
 
577
590
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -736,11 +749,18 @@ class RBFSampler(BaseTransformer):
736
749
  Transformed dataset.
737
750
  """
738
751
  if isinstance(dataset, DataFrame):
752
+ expected_type_inferred = ""
753
+ # when it is classifier, infer the datatype from label columns
754
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
755
+ expected_type_inferred = convert_sp_to_sf_type(
756
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
757
+ )
758
+
739
759
  output_df = self._batch_inference(
740
760
  dataset=dataset,
741
761
  inference_method="predict",
742
762
  expected_output_cols_list=self.output_cols,
743
- expected_output_cols_type="",
763
+ expected_output_cols_type=expected_type_inferred,
744
764
  )
745
765
  elif isinstance(dataset, pd.DataFrame):
746
766
  output_df = self._sklearn_inference(
@@ -813,10 +833,10 @@ class RBFSampler(BaseTransformer):
813
833
 
814
834
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
815
835
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
816
- Returns an empty list if current object is not a classifier or not yet fitted.
836
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
817
837
  """
818
838
  if getattr(self._sklearn_object, "classes_", None) is None:
819
- return []
839
+ return [output_cols_prefix]
820
840
 
821
841
  classes = self._sklearn_object.classes_
822
842
  if isinstance(classes, numpy.ndarray):
@@ -1041,7 +1061,7 @@ class RBFSampler(BaseTransformer):
1041
1061
  cp.dump(self._sklearn_object, local_score_file)
1042
1062
 
1043
1063
  # Create temp stage to run score.
1044
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1064
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1045
1065
  session = dataset._session
1046
1066
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1047
1067
  SqlResultValidator(
@@ -1055,8 +1075,9 @@ class RBFSampler(BaseTransformer):
1055
1075
  expected_value=f"Stage area {score_stage_name} successfully created."
1056
1076
  ).validate()
1057
1077
 
1058
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1059
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1078
+ # Use posixpath to construct stage paths
1079
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1080
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1060
1081
  statement_params = telemetry.get_function_usage_statement_params(
1061
1082
  project=_PROJECT,
1062
1083
  subproject=_SUBPROJECT,
@@ -1082,6 +1103,7 @@ class RBFSampler(BaseTransformer):
1082
1103
  replace=True,
1083
1104
  session=session,
1084
1105
  statement_params=statement_params,
1106
+ anonymous=True
1085
1107
  )
1086
1108
  def score_wrapper_sproc(
1087
1109
  session: Session,
@@ -1089,7 +1111,8 @@ class RBFSampler(BaseTransformer):
1089
1111
  stage_score_file_name: str,
1090
1112
  input_cols: List[str],
1091
1113
  label_cols: List[str],
1092
- sample_weight_col: Optional[str]
1114
+ sample_weight_col: Optional[str],
1115
+ statement_params: Dict[str, str]
1093
1116
  ) -> float:
1094
1117
  import cloudpickle as cp
1095
1118
  import numpy as np
@@ -1139,14 +1162,14 @@ class RBFSampler(BaseTransformer):
1139
1162
  api_calls=[Session.call],
1140
1163
  custom_tags=dict([("autogen", True)]),
1141
1164
  )
1142
- score = session.call(
1143
- score_sproc_name,
1165
+ score = score_wrapper_sproc(
1166
+ session,
1144
1167
  query,
1145
1168
  stage_score_file_name,
1146
1169
  identifier.get_unescaped_names(self.input_cols),
1147
1170
  identifier.get_unescaped_names(self.label_cols),
1148
1171
  identifier.get_unescaped_names(self.sample_weight_col),
1149
- statement_params=statement_params,
1172
+ statement_params,
1150
1173
  )
1151
1174
 
1152
1175
  cleanup_temp_files([local_score_file_name])
@@ -1164,18 +1187,20 @@ class RBFSampler(BaseTransformer):
1164
1187
  if self._sklearn_object._estimator_type == 'classifier':
1165
1188
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1166
1189
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1167
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1190
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1191
+ ([] if self._drop_input_cols else inputs) + outputs)
1168
1192
  # For regressor, the type of predict is float64
1169
1193
  elif self._sklearn_object._estimator_type == 'regressor':
1170
1194
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1171
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1172
-
1195
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1196
+ ([] if self._drop_input_cols else inputs) + outputs)
1173
1197
  for prob_func in PROB_FUNCTIONS:
1174
1198
  if hasattr(self, prob_func):
1175
1199
  output_cols_prefix: str = f"{prob_func}_"
1176
1200
  output_column_names = self._get_output_column_names(output_cols_prefix)
1177
1201
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1178
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1202
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1203
+ ([] if self._drop_input_cols else inputs) + outputs)
1179
1204
 
1180
1205
  @property
1181
1206
  def model_signatures(self) -> Dict[str, ModelSignature]:
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -195,7 +197,6 @@ class SkewedChi2Sampler(BaseTransformer):
195
197
  sample_weight_col: Optional[str] = None,
196
198
  ) -> None:
197
199
  super().__init__()
198
- self.id = str(uuid4()).replace("-", "_").upper()
199
200
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
200
201
 
201
202
  self._deps = list(deps)
@@ -217,6 +218,15 @@ class SkewedChi2Sampler(BaseTransformer):
217
218
  self.set_drop_input_cols(drop_input_cols)
218
219
  self.set_sample_weight_col(sample_weight_col)
219
220
 
221
+ def _get_rand_id(self) -> str:
222
+ """
223
+ Generate random id to be used in sproc and stage names.
224
+
225
+ Returns:
226
+ Random id string usable in sproc, table, and stage names.
227
+ """
228
+ return str(uuid4()).replace("-", "_").upper()
229
+
220
230
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
221
231
  """
222
232
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -295,7 +305,7 @@ class SkewedChi2Sampler(BaseTransformer):
295
305
  cp.dump(self._sklearn_object, local_transform_file)
296
306
 
297
307
  # Create temp stage to run fit.
298
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
308
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
299
309
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
300
310
  SqlResultValidator(
301
311
  session=session,
@@ -308,11 +318,12 @@ class SkewedChi2Sampler(BaseTransformer):
308
318
  expected_value=f"Stage area {transform_stage_name} successfully created."
309
319
  ).validate()
310
320
 
311
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
321
+ # Use posixpath to construct stage paths
322
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
323
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
312
324
  local_result_file_name = get_temp_file_path()
313
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
314
325
 
315
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
326
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
316
327
  statement_params = telemetry.get_function_usage_statement_params(
317
328
  project=_PROJECT,
318
329
  subproject=_SUBPROJECT,
@@ -338,6 +349,7 @@ class SkewedChi2Sampler(BaseTransformer):
338
349
  replace=True,
339
350
  session=session,
340
351
  statement_params=statement_params,
352
+ anonymous=True
341
353
  )
342
354
  def fit_wrapper_sproc(
343
355
  session: Session,
@@ -346,7 +358,8 @@ class SkewedChi2Sampler(BaseTransformer):
346
358
  stage_result_file_name: str,
347
359
  input_cols: List[str],
348
360
  label_cols: List[str],
349
- sample_weight_col: Optional[str]
361
+ sample_weight_col: Optional[str],
362
+ statement_params: Dict[str, str]
350
363
  ) -> str:
351
364
  import cloudpickle as cp
352
365
  import numpy as np
@@ -413,15 +426,15 @@ class SkewedChi2Sampler(BaseTransformer):
413
426
  api_calls=[Session.call],
414
427
  custom_tags=dict([("autogen", True)]),
415
428
  )
416
- sproc_export_file_name = session.call(
417
- fit_sproc_name,
429
+ sproc_export_file_name = fit_wrapper_sproc(
430
+ session,
418
431
  query,
419
432
  stage_transform_file_name,
420
433
  stage_result_file_name,
421
434
  identifier.get_unescaped_names(self.input_cols),
422
435
  identifier.get_unescaped_names(self.label_cols),
423
436
  identifier.get_unescaped_names(self.sample_weight_col),
424
- statement_params=statement_params,
437
+ statement_params,
425
438
  )
426
439
 
427
440
  if "|" in sproc_export_file_name:
@@ -431,7 +444,7 @@ class SkewedChi2Sampler(BaseTransformer):
431
444
  print("\n".join(fields[1:]))
432
445
 
433
446
  session.file.get(
434
- os.path.join(stage_result_file_name, sproc_export_file_name),
447
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
435
448
  local_result_file_name,
436
449
  statement_params=statement_params
437
450
  )
@@ -477,7 +490,7 @@ class SkewedChi2Sampler(BaseTransformer):
477
490
 
478
491
  # Register vectorized UDF for batch inference
479
492
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
480
- safe_id=self.id, method=inference_method)
493
+ safe_id=self._get_rand_id(), method=inference_method)
481
494
 
482
495
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
483
496
  # will try to pickle all of self which fails.
@@ -569,7 +582,7 @@ class SkewedChi2Sampler(BaseTransformer):
569
582
  return transformed_pandas_df.to_dict("records")
570
583
 
571
584
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
572
- safe_id=self.id
585
+ safe_id=self._get_rand_id()
573
586
  )
574
587
 
575
588
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -734,11 +747,18 @@ class SkewedChi2Sampler(BaseTransformer):
734
747
  Transformed dataset.
735
748
  """
736
749
  if isinstance(dataset, DataFrame):
750
+ expected_type_inferred = ""
751
+ # when it is classifier, infer the datatype from label columns
752
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
753
+ expected_type_inferred = convert_sp_to_sf_type(
754
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
755
+ )
756
+
737
757
  output_df = self._batch_inference(
738
758
  dataset=dataset,
739
759
  inference_method="predict",
740
760
  expected_output_cols_list=self.output_cols,
741
- expected_output_cols_type="",
761
+ expected_output_cols_type=expected_type_inferred,
742
762
  )
743
763
  elif isinstance(dataset, pd.DataFrame):
744
764
  output_df = self._sklearn_inference(
@@ -811,10 +831,10 @@ class SkewedChi2Sampler(BaseTransformer):
811
831
 
812
832
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
813
833
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
814
- Returns an empty list if current object is not a classifier or not yet fitted.
834
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
815
835
  """
816
836
  if getattr(self._sklearn_object, "classes_", None) is None:
817
- return []
837
+ return [output_cols_prefix]
818
838
 
819
839
  classes = self._sklearn_object.classes_
820
840
  if isinstance(classes, numpy.ndarray):
@@ -1039,7 +1059,7 @@ class SkewedChi2Sampler(BaseTransformer):
1039
1059
  cp.dump(self._sklearn_object, local_score_file)
1040
1060
 
1041
1061
  # Create temp stage to run score.
1042
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1062
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1043
1063
  session = dataset._session
1044
1064
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1045
1065
  SqlResultValidator(
@@ -1053,8 +1073,9 @@ class SkewedChi2Sampler(BaseTransformer):
1053
1073
  expected_value=f"Stage area {score_stage_name} successfully created."
1054
1074
  ).validate()
1055
1075
 
1056
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1057
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1076
+ # Use posixpath to construct stage paths
1077
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1078
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1058
1079
  statement_params = telemetry.get_function_usage_statement_params(
1059
1080
  project=_PROJECT,
1060
1081
  subproject=_SUBPROJECT,
@@ -1080,6 +1101,7 @@ class SkewedChi2Sampler(BaseTransformer):
1080
1101
  replace=True,
1081
1102
  session=session,
1082
1103
  statement_params=statement_params,
1104
+ anonymous=True
1083
1105
  )
1084
1106
  def score_wrapper_sproc(
1085
1107
  session: Session,
@@ -1087,7 +1109,8 @@ class SkewedChi2Sampler(BaseTransformer):
1087
1109
  stage_score_file_name: str,
1088
1110
  input_cols: List[str],
1089
1111
  label_cols: List[str],
1090
- sample_weight_col: Optional[str]
1112
+ sample_weight_col: Optional[str],
1113
+ statement_params: Dict[str, str]
1091
1114
  ) -> float:
1092
1115
  import cloudpickle as cp
1093
1116
  import numpy as np
@@ -1137,14 +1160,14 @@ class SkewedChi2Sampler(BaseTransformer):
1137
1160
  api_calls=[Session.call],
1138
1161
  custom_tags=dict([("autogen", True)]),
1139
1162
  )
1140
- score = session.call(
1141
- score_sproc_name,
1163
+ score = score_wrapper_sproc(
1164
+ session,
1142
1165
  query,
1143
1166
  stage_score_file_name,
1144
1167
  identifier.get_unescaped_names(self.input_cols),
1145
1168
  identifier.get_unescaped_names(self.label_cols),
1146
1169
  identifier.get_unescaped_names(self.sample_weight_col),
1147
- statement_params=statement_params,
1170
+ statement_params,
1148
1171
  )
1149
1172
 
1150
1173
  cleanup_temp_files([local_score_file_name])
@@ -1162,18 +1185,20 @@ class SkewedChi2Sampler(BaseTransformer):
1162
1185
  if self._sklearn_object._estimator_type == 'classifier':
1163
1186
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1164
1187
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1165
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1188
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1189
+ ([] if self._drop_input_cols else inputs) + outputs)
1166
1190
  # For regressor, the type of predict is float64
1167
1191
  elif self._sklearn_object._estimator_type == 'regressor':
1168
1192
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1169
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1170
-
1193
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1194
+ ([] if self._drop_input_cols else inputs) + outputs)
1171
1195
  for prob_func in PROB_FUNCTIONS:
1172
1196
  if hasattr(self, prob_func):
1173
1197
  output_cols_prefix: str = f"{prob_func}_"
1174
1198
  output_column_names = self._get_output_column_names(output_cols_prefix)
1175
1199
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1176
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1200
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1201
+ ([] if self._drop_input_cols else inputs) + outputs)
1177
1202
 
1178
1203
  @property
1179
1204
  def model_signatures(self) -> Dict[str, ModelSignature]: