snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. snowflake/ml/_internal/env_utils.py +2 -1
  2. snowflake/ml/_internal/file_utils.py +29 -7
  3. snowflake/ml/_internal/telemetry.py +5 -8
  4. snowflake/ml/_internal/utils/uri.py +7 -2
  5. snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
  6. snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
  7. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
  8. snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
  9. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
  10. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
  11. snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
  12. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
  13. snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
  14. snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
  15. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
  16. snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
  17. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
  18. snowflake/ml/model/_deployer.py +14 -27
  19. snowflake/ml/model/_env.py +4 -4
  20. snowflake/ml/model/_handlers/custom.py +14 -2
  21. snowflake/ml/model/_handlers/pytorch.py +186 -0
  22. snowflake/ml/model/_handlers/sklearn.py +14 -9
  23. snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
  24. snowflake/ml/model/_handlers/torchscript.py +180 -0
  25. snowflake/ml/model/_handlers/xgboost.py +19 -9
  26. snowflake/ml/model/_model.py +3 -2
  27. snowflake/ml/model/_model_meta.py +12 -7
  28. snowflake/ml/model/model_signature.py +446 -66
  29. snowflake/ml/model/type_hints.py +23 -4
  30. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
  31. snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
  32. snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
  33. snowflake/ml/modeling/cluster/birch.py +51 -26
  34. snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
  35. snowflake/ml/modeling/cluster/dbscan.py +51 -26
  36. snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
  37. snowflake/ml/modeling/cluster/k_means.py +51 -26
  38. snowflake/ml/modeling/cluster/mean_shift.py +51 -26
  39. snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
  40. snowflake/ml/modeling/cluster/optics.py +51 -26
  41. snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
  42. snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
  43. snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
  44. snowflake/ml/modeling/compose/column_transformer.py +51 -26
  45. snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
  46. snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
  47. snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
  48. snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
  49. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
  50. snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
  51. snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
  52. snowflake/ml/modeling/covariance/oas.py +51 -26
  53. snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
  54. snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
  55. snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
  56. snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
  57. snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
  58. snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
  59. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
  60. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
  61. snowflake/ml/modeling/decomposition/pca.py +51 -26
  62. snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
  63. snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
  64. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
  65. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
  66. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
  67. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
  68. snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
  69. snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
  70. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
  71. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
  72. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
  73. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
  74. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
  75. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
  76. snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
  77. snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
  78. snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
  79. snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
  80. snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
  81. snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
  82. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
  83. snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
  84. snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
  85. snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
  86. snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
  87. snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
  88. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
  89. snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
  90. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
  91. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
  92. snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
  93. snowflake/ml/modeling/impute/knn_imputer.py +51 -26
  94. snowflake/ml/modeling/impute/missing_indicator.py +51 -26
  95. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
  96. snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
  97. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
  98. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
  99. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
  100. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
  101. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
  102. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
  103. snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
  104. snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
  105. snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
  106. snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
  107. snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
  108. snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
  109. snowflake/ml/modeling/linear_model/lars.py +51 -26
  110. snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
  111. snowflake/ml/modeling/linear_model/lasso.py +51 -26
  112. snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
  113. snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
  114. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
  115. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
  116. snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
  117. snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
  118. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
  119. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
  120. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
  121. snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
  122. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
  123. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
  124. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
  125. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
  126. snowflake/ml/modeling/linear_model/perceptron.py +51 -26
  127. snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
  128. snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
  129. snowflake/ml/modeling/linear_model/ridge.py +51 -26
  130. snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
  131. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
  132. snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
  133. snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
  134. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
  135. snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
  136. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
  137. snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
  138. snowflake/ml/modeling/manifold/isomap.py +51 -26
  139. snowflake/ml/modeling/manifold/mds.py +51 -26
  140. snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
  141. snowflake/ml/modeling/manifold/tsne.py +51 -26
  142. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
  143. snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
  144. snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
  145. snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
  146. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
  147. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
  148. snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
  149. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
  150. snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
  151. snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
  152. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
  153. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
  154. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
  155. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
  156. snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
  157. snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
  158. snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
  159. snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
  160. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
  161. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
  162. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
  163. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
  164. snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
  165. snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
  166. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
  167. snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
  168. snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
  169. snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
  170. snowflake/ml/modeling/svm/linear_svc.py +51 -26
  171. snowflake/ml/modeling/svm/linear_svr.py +51 -26
  172. snowflake/ml/modeling/svm/nu_svc.py +51 -26
  173. snowflake/ml/modeling/svm/nu_svr.py +51 -26
  174. snowflake/ml/modeling/svm/svc.py +51 -26
  175. snowflake/ml/modeling/svm/svr.py +51 -26
  176. snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
  177. snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
  178. snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
  179. snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
  180. snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
  181. snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
  182. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
  183. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
  184. snowflake/ml/registry/model_registry.py +74 -56
  185. snowflake/ml/version.py +1 -1
  186. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
  187. snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
  188. snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
  189. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -260,7 +262,6 @@ class MiniBatchSparsePCA(BaseTransformer):
260
262
  sample_weight_col: Optional[str] = None,
261
263
  ) -> None:
262
264
  super().__init__()
263
- self.id = str(uuid4()).replace("-", "_").upper()
264
265
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
265
266
 
266
267
  self._deps = list(deps)
@@ -293,6 +294,15 @@ class MiniBatchSparsePCA(BaseTransformer):
293
294
  self.set_drop_input_cols(drop_input_cols)
294
295
  self.set_sample_weight_col(sample_weight_col)
295
296
 
297
+ def _get_rand_id(self) -> str:
298
+ """
299
+ Generate random id to be used in sproc and stage names.
300
+
301
+ Returns:
302
+ Random id string usable in sproc, table, and stage names.
303
+ """
304
+ return str(uuid4()).replace("-", "_").upper()
305
+
296
306
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
297
307
  """
298
308
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -371,7 +381,7 @@ class MiniBatchSparsePCA(BaseTransformer):
371
381
  cp.dump(self._sklearn_object, local_transform_file)
372
382
 
373
383
  # Create temp stage to run fit.
374
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
384
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
375
385
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
376
386
  SqlResultValidator(
377
387
  session=session,
@@ -384,11 +394,12 @@ class MiniBatchSparsePCA(BaseTransformer):
384
394
  expected_value=f"Stage area {transform_stage_name} successfully created."
385
395
  ).validate()
386
396
 
387
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
397
+ # Use posixpath to construct stage paths
398
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
399
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
388
400
  local_result_file_name = get_temp_file_path()
389
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
390
401
 
391
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
402
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
392
403
  statement_params = telemetry.get_function_usage_statement_params(
393
404
  project=_PROJECT,
394
405
  subproject=_SUBPROJECT,
@@ -414,6 +425,7 @@ class MiniBatchSparsePCA(BaseTransformer):
414
425
  replace=True,
415
426
  session=session,
416
427
  statement_params=statement_params,
428
+ anonymous=True
417
429
  )
418
430
  def fit_wrapper_sproc(
419
431
  session: Session,
@@ -422,7 +434,8 @@ class MiniBatchSparsePCA(BaseTransformer):
422
434
  stage_result_file_name: str,
423
435
  input_cols: List[str],
424
436
  label_cols: List[str],
425
- sample_weight_col: Optional[str]
437
+ sample_weight_col: Optional[str],
438
+ statement_params: Dict[str, str]
426
439
  ) -> str:
427
440
  import cloudpickle as cp
428
441
  import numpy as np
@@ -489,15 +502,15 @@ class MiniBatchSparsePCA(BaseTransformer):
489
502
  api_calls=[Session.call],
490
503
  custom_tags=dict([("autogen", True)]),
491
504
  )
492
- sproc_export_file_name = session.call(
493
- fit_sproc_name,
505
+ sproc_export_file_name = fit_wrapper_sproc(
506
+ session,
494
507
  query,
495
508
  stage_transform_file_name,
496
509
  stage_result_file_name,
497
510
  identifier.get_unescaped_names(self.input_cols),
498
511
  identifier.get_unescaped_names(self.label_cols),
499
512
  identifier.get_unescaped_names(self.sample_weight_col),
500
- statement_params=statement_params,
513
+ statement_params,
501
514
  )
502
515
 
503
516
  if "|" in sproc_export_file_name:
@@ -507,7 +520,7 @@ class MiniBatchSparsePCA(BaseTransformer):
507
520
  print("\n".join(fields[1:]))
508
521
 
509
522
  session.file.get(
510
- os.path.join(stage_result_file_name, sproc_export_file_name),
523
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
511
524
  local_result_file_name,
512
525
  statement_params=statement_params
513
526
  )
@@ -553,7 +566,7 @@ class MiniBatchSparsePCA(BaseTransformer):
553
566
 
554
567
  # Register vectorized UDF for batch inference
555
568
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
556
- safe_id=self.id, method=inference_method)
569
+ safe_id=self._get_rand_id(), method=inference_method)
557
570
 
558
571
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
559
572
  # will try to pickle all of self which fails.
@@ -645,7 +658,7 @@ class MiniBatchSparsePCA(BaseTransformer):
645
658
  return transformed_pandas_df.to_dict("records")
646
659
 
647
660
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
648
- safe_id=self.id
661
+ safe_id=self._get_rand_id()
649
662
  )
650
663
 
651
664
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -810,11 +823,18 @@ class MiniBatchSparsePCA(BaseTransformer):
810
823
  Transformed dataset.
811
824
  """
812
825
  if isinstance(dataset, DataFrame):
826
+ expected_type_inferred = ""
827
+ # when it is classifier, infer the datatype from label columns
828
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
829
+ expected_type_inferred = convert_sp_to_sf_type(
830
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
831
+ )
832
+
813
833
  output_df = self._batch_inference(
814
834
  dataset=dataset,
815
835
  inference_method="predict",
816
836
  expected_output_cols_list=self.output_cols,
817
- expected_output_cols_type="",
837
+ expected_output_cols_type=expected_type_inferred,
818
838
  )
819
839
  elif isinstance(dataset, pd.DataFrame):
820
840
  output_df = self._sklearn_inference(
@@ -887,10 +907,10 @@ class MiniBatchSparsePCA(BaseTransformer):
887
907
 
888
908
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
889
909
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
890
- Returns an empty list if current object is not a classifier or not yet fitted.
910
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
891
911
  """
892
912
  if getattr(self._sklearn_object, "classes_", None) is None:
893
- return []
913
+ return [output_cols_prefix]
894
914
 
895
915
  classes = self._sklearn_object.classes_
896
916
  if isinstance(classes, numpy.ndarray):
@@ -1115,7 +1135,7 @@ class MiniBatchSparsePCA(BaseTransformer):
1115
1135
  cp.dump(self._sklearn_object, local_score_file)
1116
1136
 
1117
1137
  # Create temp stage to run score.
1118
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1138
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1119
1139
  session = dataset._session
1120
1140
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1121
1141
  SqlResultValidator(
@@ -1129,8 +1149,9 @@ class MiniBatchSparsePCA(BaseTransformer):
1129
1149
  expected_value=f"Stage area {score_stage_name} successfully created."
1130
1150
  ).validate()
1131
1151
 
1132
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1133
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1152
+ # Use posixpath to construct stage paths
1153
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1154
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1134
1155
  statement_params = telemetry.get_function_usage_statement_params(
1135
1156
  project=_PROJECT,
1136
1157
  subproject=_SUBPROJECT,
@@ -1156,6 +1177,7 @@ class MiniBatchSparsePCA(BaseTransformer):
1156
1177
  replace=True,
1157
1178
  session=session,
1158
1179
  statement_params=statement_params,
1180
+ anonymous=True
1159
1181
  )
1160
1182
  def score_wrapper_sproc(
1161
1183
  session: Session,
@@ -1163,7 +1185,8 @@ class MiniBatchSparsePCA(BaseTransformer):
1163
1185
  stage_score_file_name: str,
1164
1186
  input_cols: List[str],
1165
1187
  label_cols: List[str],
1166
- sample_weight_col: Optional[str]
1188
+ sample_weight_col: Optional[str],
1189
+ statement_params: Dict[str, str]
1167
1190
  ) -> float:
1168
1191
  import cloudpickle as cp
1169
1192
  import numpy as np
@@ -1213,14 +1236,14 @@ class MiniBatchSparsePCA(BaseTransformer):
1213
1236
  api_calls=[Session.call],
1214
1237
  custom_tags=dict([("autogen", True)]),
1215
1238
  )
1216
- score = session.call(
1217
- score_sproc_name,
1239
+ score = score_wrapper_sproc(
1240
+ session,
1218
1241
  query,
1219
1242
  stage_score_file_name,
1220
1243
  identifier.get_unescaped_names(self.input_cols),
1221
1244
  identifier.get_unescaped_names(self.label_cols),
1222
1245
  identifier.get_unescaped_names(self.sample_weight_col),
1223
- statement_params=statement_params,
1246
+ statement_params,
1224
1247
  )
1225
1248
 
1226
1249
  cleanup_temp_files([local_score_file_name])
@@ -1238,18 +1261,20 @@ class MiniBatchSparsePCA(BaseTransformer):
1238
1261
  if self._sklearn_object._estimator_type == 'classifier':
1239
1262
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1240
1263
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1241
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1264
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1265
+ ([] if self._drop_input_cols else inputs) + outputs)
1242
1266
  # For regressor, the type of predict is float64
1243
1267
  elif self._sklearn_object._estimator_type == 'regressor':
1244
1268
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1245
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1246
-
1269
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1270
+ ([] if self._drop_input_cols else inputs) + outputs)
1247
1271
  for prob_func in PROB_FUNCTIONS:
1248
1272
  if hasattr(self, prob_func):
1249
1273
  output_cols_prefix: str = f"{prob_func}_"
1250
1274
  output_column_names = self._get_output_column_names(output_cols_prefix)
1251
1275
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1252
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1276
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1277
+ ([] if self._drop_input_cols else inputs) + outputs)
1253
1278
 
1254
1279
  @property
1255
1280
  def model_signatures(self) -> Dict[str, ModelSignature]:
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -267,7 +269,6 @@ class PCA(BaseTransformer):
267
269
  sample_weight_col: Optional[str] = None,
268
270
  ) -> None:
269
271
  super().__init__()
270
- self.id = str(uuid4()).replace("-", "_").upper()
271
272
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
272
273
 
273
274
  self._deps = list(deps)
@@ -295,6 +296,15 @@ class PCA(BaseTransformer):
295
296
  self.set_drop_input_cols(drop_input_cols)
296
297
  self.set_sample_weight_col(sample_weight_col)
297
298
 
299
+ def _get_rand_id(self) -> str:
300
+ """
301
+ Generate random id to be used in sproc and stage names.
302
+
303
+ Returns:
304
+ Random id string usable in sproc, table, and stage names.
305
+ """
306
+ return str(uuid4()).replace("-", "_").upper()
307
+
298
308
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
299
309
  """
300
310
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -373,7 +383,7 @@ class PCA(BaseTransformer):
373
383
  cp.dump(self._sklearn_object, local_transform_file)
374
384
 
375
385
  # Create temp stage to run fit.
376
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
386
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
377
387
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
378
388
  SqlResultValidator(
379
389
  session=session,
@@ -386,11 +396,12 @@ class PCA(BaseTransformer):
386
396
  expected_value=f"Stage area {transform_stage_name} successfully created."
387
397
  ).validate()
388
398
 
389
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
399
+ # Use posixpath to construct stage paths
400
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
401
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
390
402
  local_result_file_name = get_temp_file_path()
391
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
392
403
 
393
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
404
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
394
405
  statement_params = telemetry.get_function_usage_statement_params(
395
406
  project=_PROJECT,
396
407
  subproject=_SUBPROJECT,
@@ -416,6 +427,7 @@ class PCA(BaseTransformer):
416
427
  replace=True,
417
428
  session=session,
418
429
  statement_params=statement_params,
430
+ anonymous=True
419
431
  )
420
432
  def fit_wrapper_sproc(
421
433
  session: Session,
@@ -424,7 +436,8 @@ class PCA(BaseTransformer):
424
436
  stage_result_file_name: str,
425
437
  input_cols: List[str],
426
438
  label_cols: List[str],
427
- sample_weight_col: Optional[str]
439
+ sample_weight_col: Optional[str],
440
+ statement_params: Dict[str, str]
428
441
  ) -> str:
429
442
  import cloudpickle as cp
430
443
  import numpy as np
@@ -491,15 +504,15 @@ class PCA(BaseTransformer):
491
504
  api_calls=[Session.call],
492
505
  custom_tags=dict([("autogen", True)]),
493
506
  )
494
- sproc_export_file_name = session.call(
495
- fit_sproc_name,
507
+ sproc_export_file_name = fit_wrapper_sproc(
508
+ session,
496
509
  query,
497
510
  stage_transform_file_name,
498
511
  stage_result_file_name,
499
512
  identifier.get_unescaped_names(self.input_cols),
500
513
  identifier.get_unescaped_names(self.label_cols),
501
514
  identifier.get_unescaped_names(self.sample_weight_col),
502
- statement_params=statement_params,
515
+ statement_params,
503
516
  )
504
517
 
505
518
  if "|" in sproc_export_file_name:
@@ -509,7 +522,7 @@ class PCA(BaseTransformer):
509
522
  print("\n".join(fields[1:]))
510
523
 
511
524
  session.file.get(
512
- os.path.join(stage_result_file_name, sproc_export_file_name),
525
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
513
526
  local_result_file_name,
514
527
  statement_params=statement_params
515
528
  )
@@ -555,7 +568,7 @@ class PCA(BaseTransformer):
555
568
 
556
569
  # Register vectorized UDF for batch inference
557
570
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
558
- safe_id=self.id, method=inference_method)
571
+ safe_id=self._get_rand_id(), method=inference_method)
559
572
 
560
573
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
561
574
  # will try to pickle all of self which fails.
@@ -647,7 +660,7 @@ class PCA(BaseTransformer):
647
660
  return transformed_pandas_df.to_dict("records")
648
661
 
649
662
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
650
- safe_id=self.id
663
+ safe_id=self._get_rand_id()
651
664
  )
652
665
 
653
666
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -812,11 +825,18 @@ class PCA(BaseTransformer):
812
825
  Transformed dataset.
813
826
  """
814
827
  if isinstance(dataset, DataFrame):
828
+ expected_type_inferred = ""
829
+ # when it is classifier, infer the datatype from label columns
830
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
831
+ expected_type_inferred = convert_sp_to_sf_type(
832
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
833
+ )
834
+
815
835
  output_df = self._batch_inference(
816
836
  dataset=dataset,
817
837
  inference_method="predict",
818
838
  expected_output_cols_list=self.output_cols,
819
- expected_output_cols_type="",
839
+ expected_output_cols_type=expected_type_inferred,
820
840
  )
821
841
  elif isinstance(dataset, pd.DataFrame):
822
842
  output_df = self._sklearn_inference(
@@ -889,10 +909,10 @@ class PCA(BaseTransformer):
889
909
 
890
910
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
891
911
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
892
- Returns an empty list if current object is not a classifier or not yet fitted.
912
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
893
913
  """
894
914
  if getattr(self._sklearn_object, "classes_", None) is None:
895
- return []
915
+ return [output_cols_prefix]
896
916
 
897
917
  classes = self._sklearn_object.classes_
898
918
  if isinstance(classes, numpy.ndarray):
@@ -1117,7 +1137,7 @@ class PCA(BaseTransformer):
1117
1137
  cp.dump(self._sklearn_object, local_score_file)
1118
1138
 
1119
1139
  # Create temp stage to run score.
1120
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1140
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1121
1141
  session = dataset._session
1122
1142
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1123
1143
  SqlResultValidator(
@@ -1131,8 +1151,9 @@ class PCA(BaseTransformer):
1131
1151
  expected_value=f"Stage area {score_stage_name} successfully created."
1132
1152
  ).validate()
1133
1153
 
1134
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1135
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1154
+ # Use posixpath to construct stage paths
1155
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1156
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1136
1157
  statement_params = telemetry.get_function_usage_statement_params(
1137
1158
  project=_PROJECT,
1138
1159
  subproject=_SUBPROJECT,
@@ -1158,6 +1179,7 @@ class PCA(BaseTransformer):
1158
1179
  replace=True,
1159
1180
  session=session,
1160
1181
  statement_params=statement_params,
1182
+ anonymous=True
1161
1183
  )
1162
1184
  def score_wrapper_sproc(
1163
1185
  session: Session,
@@ -1165,7 +1187,8 @@ class PCA(BaseTransformer):
1165
1187
  stage_score_file_name: str,
1166
1188
  input_cols: List[str],
1167
1189
  label_cols: List[str],
1168
- sample_weight_col: Optional[str]
1190
+ sample_weight_col: Optional[str],
1191
+ statement_params: Dict[str, str]
1169
1192
  ) -> float:
1170
1193
  import cloudpickle as cp
1171
1194
  import numpy as np
@@ -1215,14 +1238,14 @@ class PCA(BaseTransformer):
1215
1238
  api_calls=[Session.call],
1216
1239
  custom_tags=dict([("autogen", True)]),
1217
1240
  )
1218
- score = session.call(
1219
- score_sproc_name,
1241
+ score = score_wrapper_sproc(
1242
+ session,
1220
1243
  query,
1221
1244
  stage_score_file_name,
1222
1245
  identifier.get_unescaped_names(self.input_cols),
1223
1246
  identifier.get_unescaped_names(self.label_cols),
1224
1247
  identifier.get_unescaped_names(self.sample_weight_col),
1225
- statement_params=statement_params,
1248
+ statement_params,
1226
1249
  )
1227
1250
 
1228
1251
  cleanup_temp_files([local_score_file_name])
@@ -1240,18 +1263,20 @@ class PCA(BaseTransformer):
1240
1263
  if self._sklearn_object._estimator_type == 'classifier':
1241
1264
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1242
1265
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1243
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1266
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1267
+ ([] if self._drop_input_cols else inputs) + outputs)
1244
1268
  # For regressor, the type of predict is float64
1245
1269
  elif self._sklearn_object._estimator_type == 'regressor':
1246
1270
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1247
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1248
-
1271
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1272
+ ([] if self._drop_input_cols else inputs) + outputs)
1249
1273
  for prob_func in PROB_FUNCTIONS:
1250
1274
  if hasattr(self, prob_func):
1251
1275
  output_cols_prefix: str = f"{prob_func}_"
1252
1276
  output_column_names = self._get_output_column_names(output_cols_prefix)
1253
1277
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1254
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1278
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1279
+ ([] if self._drop_input_cols else inputs) + outputs)
1255
1280
 
1256
1281
  @property
1257
1282
  def model_signatures(self) -> Dict[str, ModelSignature]: