snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. snowflake/ml/_internal/env_utils.py +2 -1
  2. snowflake/ml/_internal/file_utils.py +29 -7
  3. snowflake/ml/_internal/telemetry.py +5 -8
  4. snowflake/ml/_internal/utils/uri.py +7 -2
  5. snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
  6. snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
  7. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
  8. snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
  9. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
  10. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
  11. snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
  12. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
  13. snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
  14. snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
  15. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
  16. snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
  17. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
  18. snowflake/ml/model/_deployer.py +14 -27
  19. snowflake/ml/model/_env.py +4 -4
  20. snowflake/ml/model/_handlers/custom.py +14 -2
  21. snowflake/ml/model/_handlers/pytorch.py +186 -0
  22. snowflake/ml/model/_handlers/sklearn.py +14 -9
  23. snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
  24. snowflake/ml/model/_handlers/torchscript.py +180 -0
  25. snowflake/ml/model/_handlers/xgboost.py +19 -9
  26. snowflake/ml/model/_model.py +3 -2
  27. snowflake/ml/model/_model_meta.py +12 -7
  28. snowflake/ml/model/model_signature.py +446 -66
  29. snowflake/ml/model/type_hints.py +23 -4
  30. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
  31. snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
  32. snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
  33. snowflake/ml/modeling/cluster/birch.py +51 -26
  34. snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
  35. snowflake/ml/modeling/cluster/dbscan.py +51 -26
  36. snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
  37. snowflake/ml/modeling/cluster/k_means.py +51 -26
  38. snowflake/ml/modeling/cluster/mean_shift.py +51 -26
  39. snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
  40. snowflake/ml/modeling/cluster/optics.py +51 -26
  41. snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
  42. snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
  43. snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
  44. snowflake/ml/modeling/compose/column_transformer.py +51 -26
  45. snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
  46. snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
  47. snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
  48. snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
  49. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
  50. snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
  51. snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
  52. snowflake/ml/modeling/covariance/oas.py +51 -26
  53. snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
  54. snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
  55. snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
  56. snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
  57. snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
  58. snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
  59. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
  60. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
  61. snowflake/ml/modeling/decomposition/pca.py +51 -26
  62. snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
  63. snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
  64. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
  65. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
  66. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
  67. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
  68. snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
  69. snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
  70. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
  71. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
  72. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
  73. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
  74. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
  75. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
  76. snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
  77. snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
  78. snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
  79. snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
  80. snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
  81. snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
  82. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
  83. snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
  84. snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
  85. snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
  86. snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
  87. snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
  88. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
  89. snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
  90. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
  91. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
  92. snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
  93. snowflake/ml/modeling/impute/knn_imputer.py +51 -26
  94. snowflake/ml/modeling/impute/missing_indicator.py +51 -26
  95. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
  96. snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
  97. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
  98. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
  99. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
  100. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
  101. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
  102. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
  103. snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
  104. snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
  105. snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
  106. snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
  107. snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
  108. snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
  109. snowflake/ml/modeling/linear_model/lars.py +51 -26
  110. snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
  111. snowflake/ml/modeling/linear_model/lasso.py +51 -26
  112. snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
  113. snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
  114. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
  115. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
  116. snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
  117. snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
  118. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
  119. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
  120. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
  121. snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
  122. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
  123. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
  124. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
  125. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
  126. snowflake/ml/modeling/linear_model/perceptron.py +51 -26
  127. snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
  128. snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
  129. snowflake/ml/modeling/linear_model/ridge.py +51 -26
  130. snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
  131. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
  132. snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
  133. snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
  134. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
  135. snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
  136. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
  137. snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
  138. snowflake/ml/modeling/manifold/isomap.py +51 -26
  139. snowflake/ml/modeling/manifold/mds.py +51 -26
  140. snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
  141. snowflake/ml/modeling/manifold/tsne.py +51 -26
  142. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
  143. snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
  144. snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
  145. snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
  146. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
  147. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
  148. snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
  149. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
  150. snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
  151. snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
  152. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
  153. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
  154. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
  155. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
  156. snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
  157. snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
  158. snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
  159. snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
  160. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
  161. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
  162. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
  163. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
  164. snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
  165. snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
  166. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
  167. snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
  168. snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
  169. snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
  170. snowflake/ml/modeling/svm/linear_svc.py +51 -26
  171. snowflake/ml/modeling/svm/linear_svr.py +51 -26
  172. snowflake/ml/modeling/svm/nu_svc.py +51 -26
  173. snowflake/ml/modeling/svm/nu_svr.py +51 -26
  174. snowflake/ml/modeling/svm/svc.py +51 -26
  175. snowflake/ml/modeling/svm/svr.py +51 -26
  176. snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
  177. snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
  178. snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
  179. snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
  180. snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
  181. snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
  182. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
  183. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
  184. snowflake/ml/registry/model_registry.py +74 -56
  185. snowflake/ml/version.py +1 -1
  186. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
  187. snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
  188. snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
  189. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -294,7 +296,6 @@ class SpectralClustering(BaseTransformer):
294
296
  sample_weight_col: Optional[str] = None,
295
297
  ) -> None:
296
298
  super().__init__()
297
- self.id = str(uuid4()).replace("-", "_").upper()
298
299
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
299
300
 
300
301
  self._deps = list(deps)
@@ -328,6 +329,15 @@ class SpectralClustering(BaseTransformer):
328
329
  self.set_drop_input_cols(drop_input_cols)
329
330
  self.set_sample_weight_col(sample_weight_col)
330
331
 
332
+ def _get_rand_id(self) -> str:
333
+ """
334
+ Generate random id to be used in sproc and stage names.
335
+
336
+ Returns:
337
+ Random id string usable in sproc, table, and stage names.
338
+ """
339
+ return str(uuid4()).replace("-", "_").upper()
340
+
331
341
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
332
342
  """
333
343
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -406,7 +416,7 @@ class SpectralClustering(BaseTransformer):
406
416
  cp.dump(self._sklearn_object, local_transform_file)
407
417
 
408
418
  # Create temp stage to run fit.
409
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
419
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
410
420
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
411
421
  SqlResultValidator(
412
422
  session=session,
@@ -419,11 +429,12 @@ class SpectralClustering(BaseTransformer):
419
429
  expected_value=f"Stage area {transform_stage_name} successfully created."
420
430
  ).validate()
421
431
 
422
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
432
+ # Use posixpath to construct stage paths
433
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
434
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
423
435
  local_result_file_name = get_temp_file_path()
424
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
425
436
 
426
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
437
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
427
438
  statement_params = telemetry.get_function_usage_statement_params(
428
439
  project=_PROJECT,
429
440
  subproject=_SUBPROJECT,
@@ -449,6 +460,7 @@ class SpectralClustering(BaseTransformer):
449
460
  replace=True,
450
461
  session=session,
451
462
  statement_params=statement_params,
463
+ anonymous=True
452
464
  )
453
465
  def fit_wrapper_sproc(
454
466
  session: Session,
@@ -457,7 +469,8 @@ class SpectralClustering(BaseTransformer):
457
469
  stage_result_file_name: str,
458
470
  input_cols: List[str],
459
471
  label_cols: List[str],
460
- sample_weight_col: Optional[str]
472
+ sample_weight_col: Optional[str],
473
+ statement_params: Dict[str, str]
461
474
  ) -> str:
462
475
  import cloudpickle as cp
463
476
  import numpy as np
@@ -524,15 +537,15 @@ class SpectralClustering(BaseTransformer):
524
537
  api_calls=[Session.call],
525
538
  custom_tags=dict([("autogen", True)]),
526
539
  )
527
- sproc_export_file_name = session.call(
528
- fit_sproc_name,
540
+ sproc_export_file_name = fit_wrapper_sproc(
541
+ session,
529
542
  query,
530
543
  stage_transform_file_name,
531
544
  stage_result_file_name,
532
545
  identifier.get_unescaped_names(self.input_cols),
533
546
  identifier.get_unescaped_names(self.label_cols),
534
547
  identifier.get_unescaped_names(self.sample_weight_col),
535
- statement_params=statement_params,
548
+ statement_params,
536
549
  )
537
550
 
538
551
  if "|" in sproc_export_file_name:
@@ -542,7 +555,7 @@ class SpectralClustering(BaseTransformer):
542
555
  print("\n".join(fields[1:]))
543
556
 
544
557
  session.file.get(
545
- os.path.join(stage_result_file_name, sproc_export_file_name),
558
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
546
559
  local_result_file_name,
547
560
  statement_params=statement_params
548
561
  )
@@ -588,7 +601,7 @@ class SpectralClustering(BaseTransformer):
588
601
 
589
602
  # Register vectorized UDF for batch inference
590
603
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
591
- safe_id=self.id, method=inference_method)
604
+ safe_id=self._get_rand_id(), method=inference_method)
592
605
 
593
606
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
594
607
  # will try to pickle all of self which fails.
@@ -680,7 +693,7 @@ class SpectralClustering(BaseTransformer):
680
693
  return transformed_pandas_df.to_dict("records")
681
694
 
682
695
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
683
- safe_id=self.id
696
+ safe_id=self._get_rand_id()
684
697
  )
685
698
 
686
699
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -845,11 +858,18 @@ class SpectralClustering(BaseTransformer):
845
858
  Transformed dataset.
846
859
  """
847
860
  if isinstance(dataset, DataFrame):
861
+ expected_type_inferred = ""
862
+ # when it is classifier, infer the datatype from label columns
863
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
864
+ expected_type_inferred = convert_sp_to_sf_type(
865
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
866
+ )
867
+
848
868
  output_df = self._batch_inference(
849
869
  dataset=dataset,
850
870
  inference_method="predict",
851
871
  expected_output_cols_list=self.output_cols,
852
- expected_output_cols_type="",
872
+ expected_output_cols_type=expected_type_inferred,
853
873
  )
854
874
  elif isinstance(dataset, pd.DataFrame):
855
875
  output_df = self._sklearn_inference(
@@ -920,10 +940,10 @@ class SpectralClustering(BaseTransformer):
920
940
 
921
941
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
922
942
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
923
- Returns an empty list if current object is not a classifier or not yet fitted.
943
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
924
944
  """
925
945
  if getattr(self._sklearn_object, "classes_", None) is None:
926
- return []
946
+ return [output_cols_prefix]
927
947
 
928
948
  classes = self._sklearn_object.classes_
929
949
  if isinstance(classes, numpy.ndarray):
@@ -1148,7 +1168,7 @@ class SpectralClustering(BaseTransformer):
1148
1168
  cp.dump(self._sklearn_object, local_score_file)
1149
1169
 
1150
1170
  # Create temp stage to run score.
1151
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1171
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1152
1172
  session = dataset._session
1153
1173
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1154
1174
  SqlResultValidator(
@@ -1162,8 +1182,9 @@ class SpectralClustering(BaseTransformer):
1162
1182
  expected_value=f"Stage area {score_stage_name} successfully created."
1163
1183
  ).validate()
1164
1184
 
1165
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1166
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1185
+ # Use posixpath to construct stage paths
1186
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1187
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1167
1188
  statement_params = telemetry.get_function_usage_statement_params(
1168
1189
  project=_PROJECT,
1169
1190
  subproject=_SUBPROJECT,
@@ -1189,6 +1210,7 @@ class SpectralClustering(BaseTransformer):
1189
1210
  replace=True,
1190
1211
  session=session,
1191
1212
  statement_params=statement_params,
1213
+ anonymous=True
1192
1214
  )
1193
1215
  def score_wrapper_sproc(
1194
1216
  session: Session,
@@ -1196,7 +1218,8 @@ class SpectralClustering(BaseTransformer):
1196
1218
  stage_score_file_name: str,
1197
1219
  input_cols: List[str],
1198
1220
  label_cols: List[str],
1199
- sample_weight_col: Optional[str]
1221
+ sample_weight_col: Optional[str],
1222
+ statement_params: Dict[str, str]
1200
1223
  ) -> float:
1201
1224
  import cloudpickle as cp
1202
1225
  import numpy as np
@@ -1246,14 +1269,14 @@ class SpectralClustering(BaseTransformer):
1246
1269
  api_calls=[Session.call],
1247
1270
  custom_tags=dict([("autogen", True)]),
1248
1271
  )
1249
- score = session.call(
1250
- score_sproc_name,
1272
+ score = score_wrapper_sproc(
1273
+ session,
1251
1274
  query,
1252
1275
  stage_score_file_name,
1253
1276
  identifier.get_unescaped_names(self.input_cols),
1254
1277
  identifier.get_unescaped_names(self.label_cols),
1255
1278
  identifier.get_unescaped_names(self.sample_weight_col),
1256
- statement_params=statement_params,
1279
+ statement_params,
1257
1280
  )
1258
1281
 
1259
1282
  cleanup_temp_files([local_score_file_name])
@@ -1271,18 +1294,20 @@ class SpectralClustering(BaseTransformer):
1271
1294
  if self._sklearn_object._estimator_type == 'classifier':
1272
1295
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1273
1296
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1274
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1297
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1298
+ ([] if self._drop_input_cols else inputs) + outputs)
1275
1299
  # For regressor, the type of predict is float64
1276
1300
  elif self._sklearn_object._estimator_type == 'regressor':
1277
1301
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1278
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1279
-
1302
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1303
+ ([] if self._drop_input_cols else inputs) + outputs)
1280
1304
  for prob_func in PROB_FUNCTIONS:
1281
1305
  if hasattr(self, prob_func):
1282
1306
  output_cols_prefix: str = f"{prob_func}_"
1283
1307
  output_column_names = self._get_output_column_names(output_cols_prefix)
1284
1308
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1285
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1309
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1310
+ ([] if self._drop_input_cols else inputs) + outputs)
1286
1311
 
1287
1312
  @property
1288
1313
  def model_signatures(self) -> Dict[str, ModelSignature]:
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -223,7 +225,6 @@ class SpectralCoclustering(BaseTransformer):
223
225
  sample_weight_col: Optional[str] = None,
224
226
  ) -> None:
225
227
  super().__init__()
226
- self.id = str(uuid4()).replace("-", "_").upper()
227
228
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
228
229
 
229
230
  self._deps = list(deps)
@@ -249,6 +250,15 @@ class SpectralCoclustering(BaseTransformer):
249
250
  self.set_drop_input_cols(drop_input_cols)
250
251
  self.set_sample_weight_col(sample_weight_col)
251
252
 
253
+ def _get_rand_id(self) -> str:
254
+ """
255
+ Generate random id to be used in sproc and stage names.
256
+
257
+ Returns:
258
+ Random id string usable in sproc, table, and stage names.
259
+ """
260
+ return str(uuid4()).replace("-", "_").upper()
261
+
252
262
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
253
263
  """
254
264
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -327,7 +337,7 @@ class SpectralCoclustering(BaseTransformer):
327
337
  cp.dump(self._sklearn_object, local_transform_file)
328
338
 
329
339
  # Create temp stage to run fit.
330
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
340
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
331
341
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
332
342
  SqlResultValidator(
333
343
  session=session,
@@ -340,11 +350,12 @@ class SpectralCoclustering(BaseTransformer):
340
350
  expected_value=f"Stage area {transform_stage_name} successfully created."
341
351
  ).validate()
342
352
 
343
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
353
+ # Use posixpath to construct stage paths
354
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
355
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
344
356
  local_result_file_name = get_temp_file_path()
345
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
346
357
 
347
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
358
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
348
359
  statement_params = telemetry.get_function_usage_statement_params(
349
360
  project=_PROJECT,
350
361
  subproject=_SUBPROJECT,
@@ -370,6 +381,7 @@ class SpectralCoclustering(BaseTransformer):
370
381
  replace=True,
371
382
  session=session,
372
383
  statement_params=statement_params,
384
+ anonymous=True
373
385
  )
374
386
  def fit_wrapper_sproc(
375
387
  session: Session,
@@ -378,7 +390,8 @@ class SpectralCoclustering(BaseTransformer):
378
390
  stage_result_file_name: str,
379
391
  input_cols: List[str],
380
392
  label_cols: List[str],
381
- sample_weight_col: Optional[str]
393
+ sample_weight_col: Optional[str],
394
+ statement_params: Dict[str, str]
382
395
  ) -> str:
383
396
  import cloudpickle as cp
384
397
  import numpy as np
@@ -445,15 +458,15 @@ class SpectralCoclustering(BaseTransformer):
445
458
  api_calls=[Session.call],
446
459
  custom_tags=dict([("autogen", True)]),
447
460
  )
448
- sproc_export_file_name = session.call(
449
- fit_sproc_name,
461
+ sproc_export_file_name = fit_wrapper_sproc(
462
+ session,
450
463
  query,
451
464
  stage_transform_file_name,
452
465
  stage_result_file_name,
453
466
  identifier.get_unescaped_names(self.input_cols),
454
467
  identifier.get_unescaped_names(self.label_cols),
455
468
  identifier.get_unescaped_names(self.sample_weight_col),
456
- statement_params=statement_params,
469
+ statement_params,
457
470
  )
458
471
 
459
472
  if "|" in sproc_export_file_name:
@@ -463,7 +476,7 @@ class SpectralCoclustering(BaseTransformer):
463
476
  print("\n".join(fields[1:]))
464
477
 
465
478
  session.file.get(
466
- os.path.join(stage_result_file_name, sproc_export_file_name),
479
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
467
480
  local_result_file_name,
468
481
  statement_params=statement_params
469
482
  )
@@ -509,7 +522,7 @@ class SpectralCoclustering(BaseTransformer):
509
522
 
510
523
  # Register vectorized UDF for batch inference
511
524
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
512
- safe_id=self.id, method=inference_method)
525
+ safe_id=self._get_rand_id(), method=inference_method)
513
526
 
514
527
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
515
528
  # will try to pickle all of self which fails.
@@ -601,7 +614,7 @@ class SpectralCoclustering(BaseTransformer):
601
614
  return transformed_pandas_df.to_dict("records")
602
615
 
603
616
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
604
- safe_id=self.id
617
+ safe_id=self._get_rand_id()
605
618
  )
606
619
 
607
620
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -766,11 +779,18 @@ class SpectralCoclustering(BaseTransformer):
766
779
  Transformed dataset.
767
780
  """
768
781
  if isinstance(dataset, DataFrame):
782
+ expected_type_inferred = ""
783
+ # when it is classifier, infer the datatype from label columns
784
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
785
+ expected_type_inferred = convert_sp_to_sf_type(
786
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
787
+ )
788
+
769
789
  output_df = self._batch_inference(
770
790
  dataset=dataset,
771
791
  inference_method="predict",
772
792
  expected_output_cols_list=self.output_cols,
773
- expected_output_cols_type="",
793
+ expected_output_cols_type=expected_type_inferred,
774
794
  )
775
795
  elif isinstance(dataset, pd.DataFrame):
776
796
  output_df = self._sklearn_inference(
@@ -841,10 +861,10 @@ class SpectralCoclustering(BaseTransformer):
841
861
 
842
862
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
843
863
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
844
- Returns an empty list if current object is not a classifier or not yet fitted.
864
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
845
865
  """
846
866
  if getattr(self._sklearn_object, "classes_", None) is None:
847
- return []
867
+ return [output_cols_prefix]
848
868
 
849
869
  classes = self._sklearn_object.classes_
850
870
  if isinstance(classes, numpy.ndarray):
@@ -1069,7 +1089,7 @@ class SpectralCoclustering(BaseTransformer):
1069
1089
  cp.dump(self._sklearn_object, local_score_file)
1070
1090
 
1071
1091
  # Create temp stage to run score.
1072
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1092
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1073
1093
  session = dataset._session
1074
1094
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1075
1095
  SqlResultValidator(
@@ -1083,8 +1103,9 @@ class SpectralCoclustering(BaseTransformer):
1083
1103
  expected_value=f"Stage area {score_stage_name} successfully created."
1084
1104
  ).validate()
1085
1105
 
1086
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1087
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1106
+ # Use posixpath to construct stage paths
1107
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1108
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1088
1109
  statement_params = telemetry.get_function_usage_statement_params(
1089
1110
  project=_PROJECT,
1090
1111
  subproject=_SUBPROJECT,
@@ -1110,6 +1131,7 @@ class SpectralCoclustering(BaseTransformer):
1110
1131
  replace=True,
1111
1132
  session=session,
1112
1133
  statement_params=statement_params,
1134
+ anonymous=True
1113
1135
  )
1114
1136
  def score_wrapper_sproc(
1115
1137
  session: Session,
@@ -1117,7 +1139,8 @@ class SpectralCoclustering(BaseTransformer):
1117
1139
  stage_score_file_name: str,
1118
1140
  input_cols: List[str],
1119
1141
  label_cols: List[str],
1120
- sample_weight_col: Optional[str]
1142
+ sample_weight_col: Optional[str],
1143
+ statement_params: Dict[str, str]
1121
1144
  ) -> float:
1122
1145
  import cloudpickle as cp
1123
1146
  import numpy as np
@@ -1167,14 +1190,14 @@ class SpectralCoclustering(BaseTransformer):
1167
1190
  api_calls=[Session.call],
1168
1191
  custom_tags=dict([("autogen", True)]),
1169
1192
  )
1170
- score = session.call(
1171
- score_sproc_name,
1193
+ score = score_wrapper_sproc(
1194
+ session,
1172
1195
  query,
1173
1196
  stage_score_file_name,
1174
1197
  identifier.get_unescaped_names(self.input_cols),
1175
1198
  identifier.get_unescaped_names(self.label_cols),
1176
1199
  identifier.get_unescaped_names(self.sample_weight_col),
1177
- statement_params=statement_params,
1200
+ statement_params,
1178
1201
  )
1179
1202
 
1180
1203
  cleanup_temp_files([local_score_file_name])
@@ -1192,18 +1215,20 @@ class SpectralCoclustering(BaseTransformer):
1192
1215
  if self._sklearn_object._estimator_type == 'classifier':
1193
1216
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1194
1217
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1195
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1218
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1219
+ ([] if self._drop_input_cols else inputs) + outputs)
1196
1220
  # For regressor, the type of predict is float64
1197
1221
  elif self._sklearn_object._estimator_type == 'regressor':
1198
1222
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1199
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1200
-
1223
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1224
+ ([] if self._drop_input_cols else inputs) + outputs)
1201
1225
  for prob_func in PROB_FUNCTIONS:
1202
1226
  if hasattr(self, prob_func):
1203
1227
  output_cols_prefix: str = f"{prob_func}_"
1204
1228
  output_column_names = self._get_output_column_names(output_cols_prefix)
1205
1229
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1206
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1230
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1231
+ ([] if self._drop_input_cols else inputs) + outputs)
1207
1232
 
1208
1233
  @property
1209
1234
  def model_signatures(self) -> Dict[str, ModelSignature]: