snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. snowflake/ml/_internal/env_utils.py +2 -1
  2. snowflake/ml/_internal/file_utils.py +29 -7
  3. snowflake/ml/_internal/telemetry.py +5 -8
  4. snowflake/ml/_internal/utils/uri.py +7 -2
  5. snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
  6. snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
  7. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
  8. snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
  9. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
  10. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
  11. snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
  12. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
  13. snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
  14. snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
  15. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
  16. snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
  17. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
  18. snowflake/ml/model/_deployer.py +14 -27
  19. snowflake/ml/model/_env.py +4 -4
  20. snowflake/ml/model/_handlers/custom.py +14 -2
  21. snowflake/ml/model/_handlers/pytorch.py +186 -0
  22. snowflake/ml/model/_handlers/sklearn.py +14 -9
  23. snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
  24. snowflake/ml/model/_handlers/torchscript.py +180 -0
  25. snowflake/ml/model/_handlers/xgboost.py +19 -9
  26. snowflake/ml/model/_model.py +3 -2
  27. snowflake/ml/model/_model_meta.py +12 -7
  28. snowflake/ml/model/model_signature.py +446 -66
  29. snowflake/ml/model/type_hints.py +23 -4
  30. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
  31. snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
  32. snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
  33. snowflake/ml/modeling/cluster/birch.py +51 -26
  34. snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
  35. snowflake/ml/modeling/cluster/dbscan.py +51 -26
  36. snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
  37. snowflake/ml/modeling/cluster/k_means.py +51 -26
  38. snowflake/ml/modeling/cluster/mean_shift.py +51 -26
  39. snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
  40. snowflake/ml/modeling/cluster/optics.py +51 -26
  41. snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
  42. snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
  43. snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
  44. snowflake/ml/modeling/compose/column_transformer.py +51 -26
  45. snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
  46. snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
  47. snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
  48. snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
  49. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
  50. snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
  51. snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
  52. snowflake/ml/modeling/covariance/oas.py +51 -26
  53. snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
  54. snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
  55. snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
  56. snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
  57. snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
  58. snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
  59. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
  60. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
  61. snowflake/ml/modeling/decomposition/pca.py +51 -26
  62. snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
  63. snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
  64. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
  65. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
  66. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
  67. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
  68. snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
  69. snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
  70. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
  71. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
  72. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
  73. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
  74. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
  75. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
  76. snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
  77. snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
  78. snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
  79. snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
  80. snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
  81. snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
  82. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
  83. snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
  84. snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
  85. snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
  86. snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
  87. snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
  88. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
  89. snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
  90. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
  91. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
  92. snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
  93. snowflake/ml/modeling/impute/knn_imputer.py +51 -26
  94. snowflake/ml/modeling/impute/missing_indicator.py +51 -26
  95. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
  96. snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
  97. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
  98. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
  99. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
  100. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
  101. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
  102. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
  103. snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
  104. snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
  105. snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
  106. snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
  107. snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
  108. snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
  109. snowflake/ml/modeling/linear_model/lars.py +51 -26
  110. snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
  111. snowflake/ml/modeling/linear_model/lasso.py +51 -26
  112. snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
  113. snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
  114. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
  115. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
  116. snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
  117. snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
  118. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
  119. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
  120. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
  121. snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
  122. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
  123. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
  124. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
  125. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
  126. snowflake/ml/modeling/linear_model/perceptron.py +51 -26
  127. snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
  128. snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
  129. snowflake/ml/modeling/linear_model/ridge.py +51 -26
  130. snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
  131. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
  132. snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
  133. snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
  134. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
  135. snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
  136. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
  137. snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
  138. snowflake/ml/modeling/manifold/isomap.py +51 -26
  139. snowflake/ml/modeling/manifold/mds.py +51 -26
  140. snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
  141. snowflake/ml/modeling/manifold/tsne.py +51 -26
  142. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
  143. snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
  144. snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
  145. snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
  146. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
  147. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
  148. snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
  149. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
  150. snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
  151. snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
  152. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
  153. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
  154. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
  155. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
  156. snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
  157. snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
  158. snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
  159. snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
  160. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
  161. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
  162. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
  163. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
  164. snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
  165. snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
  166. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
  167. snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
  168. snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
  169. snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
  170. snowflake/ml/modeling/svm/linear_svc.py +51 -26
  171. snowflake/ml/modeling/svm/linear_svr.py +51 -26
  172. snowflake/ml/modeling/svm/nu_svc.py +51 -26
  173. snowflake/ml/modeling/svm/nu_svr.py +51 -26
  174. snowflake/ml/modeling/svm/svc.py +51 -26
  175. snowflake/ml/modeling/svm/svr.py +51 -26
  176. snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
  177. snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
  178. snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
  179. snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
  180. snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
  181. snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
  182. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
  183. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
  184. snowflake/ml/registry/model_registry.py +74 -56
  185. snowflake/ml/version.py +1 -1
  186. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
  187. snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
  188. snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
  189. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -245,7 +247,6 @@ class SpectralEmbedding(BaseTransformer):
245
247
  sample_weight_col: Optional[str] = None,
246
248
  ) -> None:
247
249
  super().__init__()
248
- self.id = str(uuid4()).replace("-", "_").upper()
249
250
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
250
251
 
251
252
  self._deps = list(deps)
@@ -272,6 +273,15 @@ class SpectralEmbedding(BaseTransformer):
272
273
  self.set_drop_input_cols(drop_input_cols)
273
274
  self.set_sample_weight_col(sample_weight_col)
274
275
 
276
+ def _get_rand_id(self) -> str:
277
+ """
278
+ Generate random id to be used in sproc and stage names.
279
+
280
+ Returns:
281
+ Random id string usable in sproc, table, and stage names.
282
+ """
283
+ return str(uuid4()).replace("-", "_").upper()
284
+
275
285
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
276
286
  """
277
287
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -350,7 +360,7 @@ class SpectralEmbedding(BaseTransformer):
350
360
  cp.dump(self._sklearn_object, local_transform_file)
351
361
 
352
362
  # Create temp stage to run fit.
353
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
363
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
354
364
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
355
365
  SqlResultValidator(
356
366
  session=session,
@@ -363,11 +373,12 @@ class SpectralEmbedding(BaseTransformer):
363
373
  expected_value=f"Stage area {transform_stage_name} successfully created."
364
374
  ).validate()
365
375
 
366
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
376
+ # Use posixpath to construct stage paths
377
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
378
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
367
379
  local_result_file_name = get_temp_file_path()
368
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
369
380
 
370
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
381
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
371
382
  statement_params = telemetry.get_function_usage_statement_params(
372
383
  project=_PROJECT,
373
384
  subproject=_SUBPROJECT,
@@ -393,6 +404,7 @@ class SpectralEmbedding(BaseTransformer):
393
404
  replace=True,
394
405
  session=session,
395
406
  statement_params=statement_params,
407
+ anonymous=True
396
408
  )
397
409
  def fit_wrapper_sproc(
398
410
  session: Session,
@@ -401,7 +413,8 @@ class SpectralEmbedding(BaseTransformer):
401
413
  stage_result_file_name: str,
402
414
  input_cols: List[str],
403
415
  label_cols: List[str],
404
- sample_weight_col: Optional[str]
416
+ sample_weight_col: Optional[str],
417
+ statement_params: Dict[str, str]
405
418
  ) -> str:
406
419
  import cloudpickle as cp
407
420
  import numpy as np
@@ -468,15 +481,15 @@ class SpectralEmbedding(BaseTransformer):
468
481
  api_calls=[Session.call],
469
482
  custom_tags=dict([("autogen", True)]),
470
483
  )
471
- sproc_export_file_name = session.call(
472
- fit_sproc_name,
484
+ sproc_export_file_name = fit_wrapper_sproc(
485
+ session,
473
486
  query,
474
487
  stage_transform_file_name,
475
488
  stage_result_file_name,
476
489
  identifier.get_unescaped_names(self.input_cols),
477
490
  identifier.get_unescaped_names(self.label_cols),
478
491
  identifier.get_unescaped_names(self.sample_weight_col),
479
- statement_params=statement_params,
492
+ statement_params,
480
493
  )
481
494
 
482
495
  if "|" in sproc_export_file_name:
@@ -486,7 +499,7 @@ class SpectralEmbedding(BaseTransformer):
486
499
  print("\n".join(fields[1:]))
487
500
 
488
501
  session.file.get(
489
- os.path.join(stage_result_file_name, sproc_export_file_name),
502
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
490
503
  local_result_file_name,
491
504
  statement_params=statement_params
492
505
  )
@@ -532,7 +545,7 @@ class SpectralEmbedding(BaseTransformer):
532
545
 
533
546
  # Register vectorized UDF for batch inference
534
547
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
535
- safe_id=self.id, method=inference_method)
548
+ safe_id=self._get_rand_id(), method=inference_method)
536
549
 
537
550
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
538
551
  # will try to pickle all of self which fails.
@@ -624,7 +637,7 @@ class SpectralEmbedding(BaseTransformer):
624
637
  return transformed_pandas_df.to_dict("records")
625
638
 
626
639
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
627
- safe_id=self.id
640
+ safe_id=self._get_rand_id()
628
641
  )
629
642
 
630
643
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -789,11 +802,18 @@ class SpectralEmbedding(BaseTransformer):
789
802
  Transformed dataset.
790
803
  """
791
804
  if isinstance(dataset, DataFrame):
805
+ expected_type_inferred = ""
806
+ # when it is classifier, infer the datatype from label columns
807
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
808
+ expected_type_inferred = convert_sp_to_sf_type(
809
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
810
+ )
811
+
792
812
  output_df = self._batch_inference(
793
813
  dataset=dataset,
794
814
  inference_method="predict",
795
815
  expected_output_cols_list=self.output_cols,
796
- expected_output_cols_type="",
816
+ expected_output_cols_type=expected_type_inferred,
797
817
  )
798
818
  elif isinstance(dataset, pd.DataFrame):
799
819
  output_df = self._sklearn_inference(
@@ -864,10 +884,10 @@ class SpectralEmbedding(BaseTransformer):
864
884
 
865
885
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
866
886
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
867
- Returns an empty list if current object is not a classifier or not yet fitted.
887
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
868
888
  """
869
889
  if getattr(self._sklearn_object, "classes_", None) is None:
870
- return []
890
+ return [output_cols_prefix]
871
891
 
872
892
  classes = self._sklearn_object.classes_
873
893
  if isinstance(classes, numpy.ndarray):
@@ -1092,7 +1112,7 @@ class SpectralEmbedding(BaseTransformer):
1092
1112
  cp.dump(self._sklearn_object, local_score_file)
1093
1113
 
1094
1114
  # Create temp stage to run score.
1095
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1115
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1096
1116
  session = dataset._session
1097
1117
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1098
1118
  SqlResultValidator(
@@ -1106,8 +1126,9 @@ class SpectralEmbedding(BaseTransformer):
1106
1126
  expected_value=f"Stage area {score_stage_name} successfully created."
1107
1127
  ).validate()
1108
1128
 
1109
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1110
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1129
+ # Use posixpath to construct stage paths
1130
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1131
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1111
1132
  statement_params = telemetry.get_function_usage_statement_params(
1112
1133
  project=_PROJECT,
1113
1134
  subproject=_SUBPROJECT,
@@ -1133,6 +1154,7 @@ class SpectralEmbedding(BaseTransformer):
1133
1154
  replace=True,
1134
1155
  session=session,
1135
1156
  statement_params=statement_params,
1157
+ anonymous=True
1136
1158
  )
1137
1159
  def score_wrapper_sproc(
1138
1160
  session: Session,
@@ -1140,7 +1162,8 @@ class SpectralEmbedding(BaseTransformer):
1140
1162
  stage_score_file_name: str,
1141
1163
  input_cols: List[str],
1142
1164
  label_cols: List[str],
1143
- sample_weight_col: Optional[str]
1165
+ sample_weight_col: Optional[str],
1166
+ statement_params: Dict[str, str]
1144
1167
  ) -> float:
1145
1168
  import cloudpickle as cp
1146
1169
  import numpy as np
@@ -1190,14 +1213,14 @@ class SpectralEmbedding(BaseTransformer):
1190
1213
  api_calls=[Session.call],
1191
1214
  custom_tags=dict([("autogen", True)]),
1192
1215
  )
1193
- score = session.call(
1194
- score_sproc_name,
1216
+ score = score_wrapper_sproc(
1217
+ session,
1195
1218
  query,
1196
1219
  stage_score_file_name,
1197
1220
  identifier.get_unescaped_names(self.input_cols),
1198
1221
  identifier.get_unescaped_names(self.label_cols),
1199
1222
  identifier.get_unescaped_names(self.sample_weight_col),
1200
- statement_params=statement_params,
1223
+ statement_params,
1201
1224
  )
1202
1225
 
1203
1226
  cleanup_temp_files([local_score_file_name])
@@ -1215,18 +1238,20 @@ class SpectralEmbedding(BaseTransformer):
1215
1238
  if self._sklearn_object._estimator_type == 'classifier':
1216
1239
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1217
1240
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1218
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1241
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1242
+ ([] if self._drop_input_cols else inputs) + outputs)
1219
1243
  # For regressor, the type of predict is float64
1220
1244
  elif self._sklearn_object._estimator_type == 'regressor':
1221
1245
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1222
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1223
-
1246
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1247
+ ([] if self._drop_input_cols else inputs) + outputs)
1224
1248
  for prob_func in PROB_FUNCTIONS:
1225
1249
  if hasattr(self, prob_func):
1226
1250
  output_cols_prefix: str = f"{prob_func}_"
1227
1251
  output_column_names = self._get_output_column_names(output_cols_prefix)
1228
1252
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1229
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1253
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1254
+ ([] if self._drop_input_cols else inputs) + outputs)
1230
1255
 
1231
1256
  @property
1232
1257
  def model_signatures(self) -> Dict[str, ModelSignature]:
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -302,7 +304,6 @@ class TSNE(BaseTransformer):
302
304
  sample_weight_col: Optional[str] = None,
303
305
  ) -> None:
304
306
  super().__init__()
305
- self.id = str(uuid4()).replace("-", "_").upper()
306
307
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
307
308
 
308
309
  self._deps = list(deps)
@@ -337,6 +338,15 @@ class TSNE(BaseTransformer):
337
338
  self.set_drop_input_cols(drop_input_cols)
338
339
  self.set_sample_weight_col(sample_weight_col)
339
340
 
341
+ def _get_rand_id(self) -> str:
342
+ """
343
+ Generate random id to be used in sproc and stage names.
344
+
345
+ Returns:
346
+ Random id string usable in sproc, table, and stage names.
347
+ """
348
+ return str(uuid4()).replace("-", "_").upper()
349
+
340
350
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
341
351
  """
342
352
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -415,7 +425,7 @@ class TSNE(BaseTransformer):
415
425
  cp.dump(self._sklearn_object, local_transform_file)
416
426
 
417
427
  # Create temp stage to run fit.
418
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
428
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
419
429
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
420
430
  SqlResultValidator(
421
431
  session=session,
@@ -428,11 +438,12 @@ class TSNE(BaseTransformer):
428
438
  expected_value=f"Stage area {transform_stage_name} successfully created."
429
439
  ).validate()
430
440
 
431
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
441
+ # Use posixpath to construct stage paths
442
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
443
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
432
444
  local_result_file_name = get_temp_file_path()
433
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
434
445
 
435
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
446
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
436
447
  statement_params = telemetry.get_function_usage_statement_params(
437
448
  project=_PROJECT,
438
449
  subproject=_SUBPROJECT,
@@ -458,6 +469,7 @@ class TSNE(BaseTransformer):
458
469
  replace=True,
459
470
  session=session,
460
471
  statement_params=statement_params,
472
+ anonymous=True
461
473
  )
462
474
  def fit_wrapper_sproc(
463
475
  session: Session,
@@ -466,7 +478,8 @@ class TSNE(BaseTransformer):
466
478
  stage_result_file_name: str,
467
479
  input_cols: List[str],
468
480
  label_cols: List[str],
469
- sample_weight_col: Optional[str]
481
+ sample_weight_col: Optional[str],
482
+ statement_params: Dict[str, str]
470
483
  ) -> str:
471
484
  import cloudpickle as cp
472
485
  import numpy as np
@@ -533,15 +546,15 @@ class TSNE(BaseTransformer):
533
546
  api_calls=[Session.call],
534
547
  custom_tags=dict([("autogen", True)]),
535
548
  )
536
- sproc_export_file_name = session.call(
537
- fit_sproc_name,
549
+ sproc_export_file_name = fit_wrapper_sproc(
550
+ session,
538
551
  query,
539
552
  stage_transform_file_name,
540
553
  stage_result_file_name,
541
554
  identifier.get_unescaped_names(self.input_cols),
542
555
  identifier.get_unescaped_names(self.label_cols),
543
556
  identifier.get_unescaped_names(self.sample_weight_col),
544
- statement_params=statement_params,
557
+ statement_params,
545
558
  )
546
559
 
547
560
  if "|" in sproc_export_file_name:
@@ -551,7 +564,7 @@ class TSNE(BaseTransformer):
551
564
  print("\n".join(fields[1:]))
552
565
 
553
566
  session.file.get(
554
- os.path.join(stage_result_file_name, sproc_export_file_name),
567
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
555
568
  local_result_file_name,
556
569
  statement_params=statement_params
557
570
  )
@@ -597,7 +610,7 @@ class TSNE(BaseTransformer):
597
610
 
598
611
  # Register vectorized UDF for batch inference
599
612
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
600
- safe_id=self.id, method=inference_method)
613
+ safe_id=self._get_rand_id(), method=inference_method)
601
614
 
602
615
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
603
616
  # will try to pickle all of self which fails.
@@ -689,7 +702,7 @@ class TSNE(BaseTransformer):
689
702
  return transformed_pandas_df.to_dict("records")
690
703
 
691
704
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
692
- safe_id=self.id
705
+ safe_id=self._get_rand_id()
693
706
  )
694
707
 
695
708
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -854,11 +867,18 @@ class TSNE(BaseTransformer):
854
867
  Transformed dataset.
855
868
  """
856
869
  if isinstance(dataset, DataFrame):
870
+ expected_type_inferred = ""
871
+ # when it is classifier, infer the datatype from label columns
872
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
873
+ expected_type_inferred = convert_sp_to_sf_type(
874
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
875
+ )
876
+
857
877
  output_df = self._batch_inference(
858
878
  dataset=dataset,
859
879
  inference_method="predict",
860
880
  expected_output_cols_list=self.output_cols,
861
- expected_output_cols_type="",
881
+ expected_output_cols_type=expected_type_inferred,
862
882
  )
863
883
  elif isinstance(dataset, pd.DataFrame):
864
884
  output_df = self._sklearn_inference(
@@ -929,10 +949,10 @@ class TSNE(BaseTransformer):
929
949
 
930
950
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
931
951
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
932
- Returns an empty list if current object is not a classifier or not yet fitted.
952
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
933
953
  """
934
954
  if getattr(self._sklearn_object, "classes_", None) is None:
935
- return []
955
+ return [output_cols_prefix]
936
956
 
937
957
  classes = self._sklearn_object.classes_
938
958
  if isinstance(classes, numpy.ndarray):
@@ -1157,7 +1177,7 @@ class TSNE(BaseTransformer):
1157
1177
  cp.dump(self._sklearn_object, local_score_file)
1158
1178
 
1159
1179
  # Create temp stage to run score.
1160
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1180
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1161
1181
  session = dataset._session
1162
1182
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1163
1183
  SqlResultValidator(
@@ -1171,8 +1191,9 @@ class TSNE(BaseTransformer):
1171
1191
  expected_value=f"Stage area {score_stage_name} successfully created."
1172
1192
  ).validate()
1173
1193
 
1174
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1175
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1194
+ # Use posixpath to construct stage paths
1195
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1196
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1176
1197
  statement_params = telemetry.get_function_usage_statement_params(
1177
1198
  project=_PROJECT,
1178
1199
  subproject=_SUBPROJECT,
@@ -1198,6 +1219,7 @@ class TSNE(BaseTransformer):
1198
1219
  replace=True,
1199
1220
  session=session,
1200
1221
  statement_params=statement_params,
1222
+ anonymous=True
1201
1223
  )
1202
1224
  def score_wrapper_sproc(
1203
1225
  session: Session,
@@ -1205,7 +1227,8 @@ class TSNE(BaseTransformer):
1205
1227
  stage_score_file_name: str,
1206
1228
  input_cols: List[str],
1207
1229
  label_cols: List[str],
1208
- sample_weight_col: Optional[str]
1230
+ sample_weight_col: Optional[str],
1231
+ statement_params: Dict[str, str]
1209
1232
  ) -> float:
1210
1233
  import cloudpickle as cp
1211
1234
  import numpy as np
@@ -1255,14 +1278,14 @@ class TSNE(BaseTransformer):
1255
1278
  api_calls=[Session.call],
1256
1279
  custom_tags=dict([("autogen", True)]),
1257
1280
  )
1258
- score = session.call(
1259
- score_sproc_name,
1281
+ score = score_wrapper_sproc(
1282
+ session,
1260
1283
  query,
1261
1284
  stage_score_file_name,
1262
1285
  identifier.get_unescaped_names(self.input_cols),
1263
1286
  identifier.get_unescaped_names(self.label_cols),
1264
1287
  identifier.get_unescaped_names(self.sample_weight_col),
1265
- statement_params=statement_params,
1288
+ statement_params,
1266
1289
  )
1267
1290
 
1268
1291
  cleanup_temp_files([local_score_file_name])
@@ -1280,18 +1303,20 @@ class TSNE(BaseTransformer):
1280
1303
  if self._sklearn_object._estimator_type == 'classifier':
1281
1304
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1282
1305
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1283
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1306
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1307
+ ([] if self._drop_input_cols else inputs) + outputs)
1284
1308
  # For regressor, the type of predict is float64
1285
1309
  elif self._sklearn_object._estimator_type == 'regressor':
1286
1310
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1287
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1288
-
1311
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1312
+ ([] if self._drop_input_cols else inputs) + outputs)
1289
1313
  for prob_func in PROB_FUNCTIONS:
1290
1314
  if hasattr(self, prob_func):
1291
1315
  output_cols_prefix: str = f"{prob_func}_"
1292
1316
  output_column_names = self._get_output_column_names(output_cols_prefix)
1293
1317
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1294
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1318
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1319
+ ([] if self._drop_input_cols else inputs) + outputs)
1295
1320
 
1296
1321
  @property
1297
1322
  def model_signatures(self) -> Dict[str, ModelSignature]: