snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. snowflake/ml/_internal/env_utils.py +2 -1
  2. snowflake/ml/_internal/file_utils.py +29 -7
  3. snowflake/ml/_internal/telemetry.py +5 -8
  4. snowflake/ml/_internal/utils/uri.py +7 -2
  5. snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
  6. snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
  7. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
  8. snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
  9. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
  10. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
  11. snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
  12. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
  13. snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
  14. snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
  15. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
  16. snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
  17. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
  18. snowflake/ml/model/_deployer.py +14 -27
  19. snowflake/ml/model/_env.py +4 -4
  20. snowflake/ml/model/_handlers/custom.py +14 -2
  21. snowflake/ml/model/_handlers/pytorch.py +186 -0
  22. snowflake/ml/model/_handlers/sklearn.py +14 -9
  23. snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
  24. snowflake/ml/model/_handlers/torchscript.py +180 -0
  25. snowflake/ml/model/_handlers/xgboost.py +19 -9
  26. snowflake/ml/model/_model.py +3 -2
  27. snowflake/ml/model/_model_meta.py +12 -7
  28. snowflake/ml/model/model_signature.py +446 -66
  29. snowflake/ml/model/type_hints.py +23 -4
  30. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
  31. snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
  32. snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
  33. snowflake/ml/modeling/cluster/birch.py +51 -26
  34. snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
  35. snowflake/ml/modeling/cluster/dbscan.py +51 -26
  36. snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
  37. snowflake/ml/modeling/cluster/k_means.py +51 -26
  38. snowflake/ml/modeling/cluster/mean_shift.py +51 -26
  39. snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
  40. snowflake/ml/modeling/cluster/optics.py +51 -26
  41. snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
  42. snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
  43. snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
  44. snowflake/ml/modeling/compose/column_transformer.py +51 -26
  45. snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
  46. snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
  47. snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
  48. snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
  49. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
  50. snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
  51. snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
  52. snowflake/ml/modeling/covariance/oas.py +51 -26
  53. snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
  54. snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
  55. snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
  56. snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
  57. snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
  58. snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
  59. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
  60. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
  61. snowflake/ml/modeling/decomposition/pca.py +51 -26
  62. snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
  63. snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
  64. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
  65. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
  66. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
  67. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
  68. snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
  69. snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
  70. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
  71. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
  72. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
  73. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
  74. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
  75. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
  76. snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
  77. snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
  78. snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
  79. snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
  80. snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
  81. snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
  82. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
  83. snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
  84. snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
  85. snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
  86. snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
  87. snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
  88. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
  89. snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
  90. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
  91. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
  92. snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
  93. snowflake/ml/modeling/impute/knn_imputer.py +51 -26
  94. snowflake/ml/modeling/impute/missing_indicator.py +51 -26
  95. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
  96. snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
  97. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
  98. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
  99. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
  100. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
  101. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
  102. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
  103. snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
  104. snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
  105. snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
  106. snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
  107. snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
  108. snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
  109. snowflake/ml/modeling/linear_model/lars.py +51 -26
  110. snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
  111. snowflake/ml/modeling/linear_model/lasso.py +51 -26
  112. snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
  113. snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
  114. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
  115. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
  116. snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
  117. snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
  118. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
  119. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
  120. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
  121. snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
  122. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
  123. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
  124. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
  125. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
  126. snowflake/ml/modeling/linear_model/perceptron.py +51 -26
  127. snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
  128. snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
  129. snowflake/ml/modeling/linear_model/ridge.py +51 -26
  130. snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
  131. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
  132. snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
  133. snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
  134. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
  135. snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
  136. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
  137. snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
  138. snowflake/ml/modeling/manifold/isomap.py +51 -26
  139. snowflake/ml/modeling/manifold/mds.py +51 -26
  140. snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
  141. snowflake/ml/modeling/manifold/tsne.py +51 -26
  142. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
  143. snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
  144. snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
  145. snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
  146. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
  147. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
  148. snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
  149. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
  150. snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
  151. snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
  152. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
  153. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
  154. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
  155. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
  156. snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
  157. snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
  158. snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
  159. snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
  160. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
  161. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
  162. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
  163. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
  164. snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
  165. snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
  166. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
  167. snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
  168. snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
  169. snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
  170. snowflake/ml/modeling/svm/linear_svc.py +51 -26
  171. snowflake/ml/modeling/svm/linear_svr.py +51 -26
  172. snowflake/ml/modeling/svm/nu_svc.py +51 -26
  173. snowflake/ml/modeling/svm/nu_svr.py +51 -26
  174. snowflake/ml/modeling/svm/svc.py +51 -26
  175. snowflake/ml/modeling/svm/svr.py +51 -26
  176. snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
  177. snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
  178. snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
  179. snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
  180. snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
  181. snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
  182. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
  183. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
  184. snowflake/ml/registry/model_registry.py +74 -56
  185. snowflake/ml/version.py +1 -1
  186. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
  187. snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
  188. snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
  189. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -238,7 +240,6 @@ class SparsePCA(BaseTransformer):
238
240
  sample_weight_col: Optional[str] = None,
239
241
  ) -> None:
240
242
  super().__init__()
241
- self.id = str(uuid4()).replace("-", "_").upper()
242
243
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
243
244
 
244
245
  self._deps = list(deps)
@@ -268,6 +269,15 @@ class SparsePCA(BaseTransformer):
268
269
  self.set_drop_input_cols(drop_input_cols)
269
270
  self.set_sample_weight_col(sample_weight_col)
270
271
 
272
+ def _get_rand_id(self) -> str:
273
+ """
274
+ Generate random id to be used in sproc and stage names.
275
+
276
+ Returns:
277
+ Random id string usable in sproc, table, and stage names.
278
+ """
279
+ return str(uuid4()).replace("-", "_").upper()
280
+
271
281
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
272
282
  """
273
283
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -346,7 +356,7 @@ class SparsePCA(BaseTransformer):
346
356
  cp.dump(self._sklearn_object, local_transform_file)
347
357
 
348
358
  # Create temp stage to run fit.
349
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
359
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
350
360
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
351
361
  SqlResultValidator(
352
362
  session=session,
@@ -359,11 +369,12 @@ class SparsePCA(BaseTransformer):
359
369
  expected_value=f"Stage area {transform_stage_name} successfully created."
360
370
  ).validate()
361
371
 
362
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
372
+ # Use posixpath to construct stage paths
373
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
374
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
363
375
  local_result_file_name = get_temp_file_path()
364
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
365
376
 
366
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
377
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
367
378
  statement_params = telemetry.get_function_usage_statement_params(
368
379
  project=_PROJECT,
369
380
  subproject=_SUBPROJECT,
@@ -389,6 +400,7 @@ class SparsePCA(BaseTransformer):
389
400
  replace=True,
390
401
  session=session,
391
402
  statement_params=statement_params,
403
+ anonymous=True
392
404
  )
393
405
  def fit_wrapper_sproc(
394
406
  session: Session,
@@ -397,7 +409,8 @@ class SparsePCA(BaseTransformer):
397
409
  stage_result_file_name: str,
398
410
  input_cols: List[str],
399
411
  label_cols: List[str],
400
- sample_weight_col: Optional[str]
412
+ sample_weight_col: Optional[str],
413
+ statement_params: Dict[str, str]
401
414
  ) -> str:
402
415
  import cloudpickle as cp
403
416
  import numpy as np
@@ -464,15 +477,15 @@ class SparsePCA(BaseTransformer):
464
477
  api_calls=[Session.call],
465
478
  custom_tags=dict([("autogen", True)]),
466
479
  )
467
- sproc_export_file_name = session.call(
468
- fit_sproc_name,
480
+ sproc_export_file_name = fit_wrapper_sproc(
481
+ session,
469
482
  query,
470
483
  stage_transform_file_name,
471
484
  stage_result_file_name,
472
485
  identifier.get_unescaped_names(self.input_cols),
473
486
  identifier.get_unescaped_names(self.label_cols),
474
487
  identifier.get_unescaped_names(self.sample_weight_col),
475
- statement_params=statement_params,
488
+ statement_params,
476
489
  )
477
490
 
478
491
  if "|" in sproc_export_file_name:
@@ -482,7 +495,7 @@ class SparsePCA(BaseTransformer):
482
495
  print("\n".join(fields[1:]))
483
496
 
484
497
  session.file.get(
485
- os.path.join(stage_result_file_name, sproc_export_file_name),
498
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
486
499
  local_result_file_name,
487
500
  statement_params=statement_params
488
501
  )
@@ -528,7 +541,7 @@ class SparsePCA(BaseTransformer):
528
541
 
529
542
  # Register vectorized UDF for batch inference
530
543
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
531
- safe_id=self.id, method=inference_method)
544
+ safe_id=self._get_rand_id(), method=inference_method)
532
545
 
533
546
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
534
547
  # will try to pickle all of self which fails.
@@ -620,7 +633,7 @@ class SparsePCA(BaseTransformer):
620
633
  return transformed_pandas_df.to_dict("records")
621
634
 
622
635
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
623
- safe_id=self.id
636
+ safe_id=self._get_rand_id()
624
637
  )
625
638
 
626
639
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -785,11 +798,18 @@ class SparsePCA(BaseTransformer):
785
798
  Transformed dataset.
786
799
  """
787
800
  if isinstance(dataset, DataFrame):
801
+ expected_type_inferred = ""
802
+ # when it is classifier, infer the datatype from label columns
803
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
804
+ expected_type_inferred = convert_sp_to_sf_type(
805
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
806
+ )
807
+
788
808
  output_df = self._batch_inference(
789
809
  dataset=dataset,
790
810
  inference_method="predict",
791
811
  expected_output_cols_list=self.output_cols,
792
- expected_output_cols_type="",
812
+ expected_output_cols_type=expected_type_inferred,
793
813
  )
794
814
  elif isinstance(dataset, pd.DataFrame):
795
815
  output_df = self._sklearn_inference(
@@ -862,10 +882,10 @@ class SparsePCA(BaseTransformer):
862
882
 
863
883
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
864
884
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
865
- Returns an empty list if current object is not a classifier or not yet fitted.
885
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
866
886
  """
867
887
  if getattr(self._sklearn_object, "classes_", None) is None:
868
- return []
888
+ return [output_cols_prefix]
869
889
 
870
890
  classes = self._sklearn_object.classes_
871
891
  if isinstance(classes, numpy.ndarray):
@@ -1090,7 +1110,7 @@ class SparsePCA(BaseTransformer):
1090
1110
  cp.dump(self._sklearn_object, local_score_file)
1091
1111
 
1092
1112
  # Create temp stage to run score.
1093
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1113
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1094
1114
  session = dataset._session
1095
1115
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1096
1116
  SqlResultValidator(
@@ -1104,8 +1124,9 @@ class SparsePCA(BaseTransformer):
1104
1124
  expected_value=f"Stage area {score_stage_name} successfully created."
1105
1125
  ).validate()
1106
1126
 
1107
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1108
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1127
+ # Use posixpath to construct stage paths
1128
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1129
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1109
1130
  statement_params = telemetry.get_function_usage_statement_params(
1110
1131
  project=_PROJECT,
1111
1132
  subproject=_SUBPROJECT,
@@ -1131,6 +1152,7 @@ class SparsePCA(BaseTransformer):
1131
1152
  replace=True,
1132
1153
  session=session,
1133
1154
  statement_params=statement_params,
1155
+ anonymous=True
1134
1156
  )
1135
1157
  def score_wrapper_sproc(
1136
1158
  session: Session,
@@ -1138,7 +1160,8 @@ class SparsePCA(BaseTransformer):
1138
1160
  stage_score_file_name: str,
1139
1161
  input_cols: List[str],
1140
1162
  label_cols: List[str],
1141
- sample_weight_col: Optional[str]
1163
+ sample_weight_col: Optional[str],
1164
+ statement_params: Dict[str, str]
1142
1165
  ) -> float:
1143
1166
  import cloudpickle as cp
1144
1167
  import numpy as np
@@ -1188,14 +1211,14 @@ class SparsePCA(BaseTransformer):
1188
1211
  api_calls=[Session.call],
1189
1212
  custom_tags=dict([("autogen", True)]),
1190
1213
  )
1191
- score = session.call(
1192
- score_sproc_name,
1214
+ score = score_wrapper_sproc(
1215
+ session,
1193
1216
  query,
1194
1217
  stage_score_file_name,
1195
1218
  identifier.get_unescaped_names(self.input_cols),
1196
1219
  identifier.get_unescaped_names(self.label_cols),
1197
1220
  identifier.get_unescaped_names(self.sample_weight_col),
1198
- statement_params=statement_params,
1221
+ statement_params,
1199
1222
  )
1200
1223
 
1201
1224
  cleanup_temp_files([local_score_file_name])
@@ -1213,18 +1236,20 @@ class SparsePCA(BaseTransformer):
1213
1236
  if self._sklearn_object._estimator_type == 'classifier':
1214
1237
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1215
1238
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1216
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1239
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1240
+ ([] if self._drop_input_cols else inputs) + outputs)
1217
1241
  # For regressor, the type of predict is float64
1218
1242
  elif self._sklearn_object._estimator_type == 'regressor':
1219
1243
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1220
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1221
-
1244
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1245
+ ([] if self._drop_input_cols else inputs) + outputs)
1222
1246
  for prob_func in PROB_FUNCTIONS:
1223
1247
  if hasattr(self, prob_func):
1224
1248
  output_cols_prefix: str = f"{prob_func}_"
1225
1249
  output_column_names = self._get_output_column_names(output_cols_prefix)
1226
1250
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1227
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1251
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1252
+ ([] if self._drop_input_cols else inputs) + outputs)
1228
1253
 
1229
1254
  @property
1230
1255
  def model_signatures(self) -> Dict[str, ModelSignature]:
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -223,7 +225,6 @@ class TruncatedSVD(BaseTransformer):
223
225
  sample_weight_col: Optional[str] = None,
224
226
  ) -> None:
225
227
  super().__init__()
226
- self.id = str(uuid4()).replace("-", "_").upper()
227
228
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
228
229
 
229
230
  self._deps = list(deps)
@@ -249,6 +250,15 @@ class TruncatedSVD(BaseTransformer):
249
250
  self.set_drop_input_cols(drop_input_cols)
250
251
  self.set_sample_weight_col(sample_weight_col)
251
252
 
253
+ def _get_rand_id(self) -> str:
254
+ """
255
+ Generate random id to be used in sproc and stage names.
256
+
257
+ Returns:
258
+ Random id string usable in sproc, table, and stage names.
259
+ """
260
+ return str(uuid4()).replace("-", "_").upper()
261
+
252
262
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
253
263
  """
254
264
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -327,7 +337,7 @@ class TruncatedSVD(BaseTransformer):
327
337
  cp.dump(self._sklearn_object, local_transform_file)
328
338
 
329
339
  # Create temp stage to run fit.
330
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
340
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
331
341
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
332
342
  SqlResultValidator(
333
343
  session=session,
@@ -340,11 +350,12 @@ class TruncatedSVD(BaseTransformer):
340
350
  expected_value=f"Stage area {transform_stage_name} successfully created."
341
351
  ).validate()
342
352
 
343
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
353
+ # Use posixpath to construct stage paths
354
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
355
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
344
356
  local_result_file_name = get_temp_file_path()
345
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
346
357
 
347
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
358
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
348
359
  statement_params = telemetry.get_function_usage_statement_params(
349
360
  project=_PROJECT,
350
361
  subproject=_SUBPROJECT,
@@ -370,6 +381,7 @@ class TruncatedSVD(BaseTransformer):
370
381
  replace=True,
371
382
  session=session,
372
383
  statement_params=statement_params,
384
+ anonymous=True
373
385
  )
374
386
  def fit_wrapper_sproc(
375
387
  session: Session,
@@ -378,7 +390,8 @@ class TruncatedSVD(BaseTransformer):
378
390
  stage_result_file_name: str,
379
391
  input_cols: List[str],
380
392
  label_cols: List[str],
381
- sample_weight_col: Optional[str]
393
+ sample_weight_col: Optional[str],
394
+ statement_params: Dict[str, str]
382
395
  ) -> str:
383
396
  import cloudpickle as cp
384
397
  import numpy as np
@@ -445,15 +458,15 @@ class TruncatedSVD(BaseTransformer):
445
458
  api_calls=[Session.call],
446
459
  custom_tags=dict([("autogen", True)]),
447
460
  )
448
- sproc_export_file_name = session.call(
449
- fit_sproc_name,
461
+ sproc_export_file_name = fit_wrapper_sproc(
462
+ session,
450
463
  query,
451
464
  stage_transform_file_name,
452
465
  stage_result_file_name,
453
466
  identifier.get_unescaped_names(self.input_cols),
454
467
  identifier.get_unescaped_names(self.label_cols),
455
468
  identifier.get_unescaped_names(self.sample_weight_col),
456
- statement_params=statement_params,
469
+ statement_params,
457
470
  )
458
471
 
459
472
  if "|" in sproc_export_file_name:
@@ -463,7 +476,7 @@ class TruncatedSVD(BaseTransformer):
463
476
  print("\n".join(fields[1:]))
464
477
 
465
478
  session.file.get(
466
- os.path.join(stage_result_file_name, sproc_export_file_name),
479
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
467
480
  local_result_file_name,
468
481
  statement_params=statement_params
469
482
  )
@@ -509,7 +522,7 @@ class TruncatedSVD(BaseTransformer):
509
522
 
510
523
  # Register vectorized UDF for batch inference
511
524
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
512
- safe_id=self.id, method=inference_method)
525
+ safe_id=self._get_rand_id(), method=inference_method)
513
526
 
514
527
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
515
528
  # will try to pickle all of self which fails.
@@ -601,7 +614,7 @@ class TruncatedSVD(BaseTransformer):
601
614
  return transformed_pandas_df.to_dict("records")
602
615
 
603
616
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
604
- safe_id=self.id
617
+ safe_id=self._get_rand_id()
605
618
  )
606
619
 
607
620
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -766,11 +779,18 @@ class TruncatedSVD(BaseTransformer):
766
779
  Transformed dataset.
767
780
  """
768
781
  if isinstance(dataset, DataFrame):
782
+ expected_type_inferred = ""
783
+ # when it is classifier, infer the datatype from label columns
784
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
785
+ expected_type_inferred = convert_sp_to_sf_type(
786
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
787
+ )
788
+
769
789
  output_df = self._batch_inference(
770
790
  dataset=dataset,
771
791
  inference_method="predict",
772
792
  expected_output_cols_list=self.output_cols,
773
- expected_output_cols_type="",
793
+ expected_output_cols_type=expected_type_inferred,
774
794
  )
775
795
  elif isinstance(dataset, pd.DataFrame):
776
796
  output_df = self._sklearn_inference(
@@ -843,10 +863,10 @@ class TruncatedSVD(BaseTransformer):
843
863
 
844
864
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
845
865
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
846
- Returns an empty list if current object is not a classifier or not yet fitted.
866
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
847
867
  """
848
868
  if getattr(self._sklearn_object, "classes_", None) is None:
849
- return []
869
+ return [output_cols_prefix]
850
870
 
851
871
  classes = self._sklearn_object.classes_
852
872
  if isinstance(classes, numpy.ndarray):
@@ -1071,7 +1091,7 @@ class TruncatedSVD(BaseTransformer):
1071
1091
  cp.dump(self._sklearn_object, local_score_file)
1072
1092
 
1073
1093
  # Create temp stage to run score.
1074
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1094
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1075
1095
  session = dataset._session
1076
1096
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1077
1097
  SqlResultValidator(
@@ -1085,8 +1105,9 @@ class TruncatedSVD(BaseTransformer):
1085
1105
  expected_value=f"Stage area {score_stage_name} successfully created."
1086
1106
  ).validate()
1087
1107
 
1088
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1089
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1108
+ # Use posixpath to construct stage paths
1109
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1110
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1090
1111
  statement_params = telemetry.get_function_usage_statement_params(
1091
1112
  project=_PROJECT,
1092
1113
  subproject=_SUBPROJECT,
@@ -1112,6 +1133,7 @@ class TruncatedSVD(BaseTransformer):
1112
1133
  replace=True,
1113
1134
  session=session,
1114
1135
  statement_params=statement_params,
1136
+ anonymous=True
1115
1137
  )
1116
1138
  def score_wrapper_sproc(
1117
1139
  session: Session,
@@ -1119,7 +1141,8 @@ class TruncatedSVD(BaseTransformer):
1119
1141
  stage_score_file_name: str,
1120
1142
  input_cols: List[str],
1121
1143
  label_cols: List[str],
1122
- sample_weight_col: Optional[str]
1144
+ sample_weight_col: Optional[str],
1145
+ statement_params: Dict[str, str]
1123
1146
  ) -> float:
1124
1147
  import cloudpickle as cp
1125
1148
  import numpy as np
@@ -1169,14 +1192,14 @@ class TruncatedSVD(BaseTransformer):
1169
1192
  api_calls=[Session.call],
1170
1193
  custom_tags=dict([("autogen", True)]),
1171
1194
  )
1172
- score = session.call(
1173
- score_sproc_name,
1195
+ score = score_wrapper_sproc(
1196
+ session,
1174
1197
  query,
1175
1198
  stage_score_file_name,
1176
1199
  identifier.get_unescaped_names(self.input_cols),
1177
1200
  identifier.get_unescaped_names(self.label_cols),
1178
1201
  identifier.get_unescaped_names(self.sample_weight_col),
1179
- statement_params=statement_params,
1202
+ statement_params,
1180
1203
  )
1181
1204
 
1182
1205
  cleanup_temp_files([local_score_file_name])
@@ -1194,18 +1217,20 @@ class TruncatedSVD(BaseTransformer):
1194
1217
  if self._sklearn_object._estimator_type == 'classifier':
1195
1218
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1196
1219
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1197
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1220
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1221
+ ([] if self._drop_input_cols else inputs) + outputs)
1198
1222
  # For regressor, the type of predict is float64
1199
1223
  elif self._sklearn_object._estimator_type == 'regressor':
1200
1224
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1201
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1202
-
1225
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1226
+ ([] if self._drop_input_cols else inputs) + outputs)
1203
1227
  for prob_func in PROB_FUNCTIONS:
1204
1228
  if hasattr(self, prob_func):
1205
1229
  output_cols_prefix: str = f"{prob_func}_"
1206
1230
  output_column_names = self._get_output_column_names(output_cols_prefix)
1207
1231
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1208
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1232
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1233
+ ([] if self._drop_input_cols else inputs) + outputs)
1209
1234
 
1210
1235
  @property
1211
1236
  def model_signatures(self) -> Dict[str, ModelSignature]: