snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. snowflake/ml/_internal/env_utils.py +2 -1
  2. snowflake/ml/_internal/file_utils.py +29 -7
  3. snowflake/ml/_internal/telemetry.py +5 -8
  4. snowflake/ml/_internal/utils/uri.py +7 -2
  5. snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
  6. snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
  7. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
  8. snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
  9. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
  10. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
  11. snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
  12. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
  13. snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
  14. snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
  15. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
  16. snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
  17. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
  18. snowflake/ml/model/_deployer.py +14 -27
  19. snowflake/ml/model/_env.py +4 -4
  20. snowflake/ml/model/_handlers/custom.py +14 -2
  21. snowflake/ml/model/_handlers/pytorch.py +186 -0
  22. snowflake/ml/model/_handlers/sklearn.py +14 -9
  23. snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
  24. snowflake/ml/model/_handlers/torchscript.py +180 -0
  25. snowflake/ml/model/_handlers/xgboost.py +19 -9
  26. snowflake/ml/model/_model.py +3 -2
  27. snowflake/ml/model/_model_meta.py +12 -7
  28. snowflake/ml/model/model_signature.py +446 -66
  29. snowflake/ml/model/type_hints.py +23 -4
  30. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
  31. snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
  32. snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
  33. snowflake/ml/modeling/cluster/birch.py +51 -26
  34. snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
  35. snowflake/ml/modeling/cluster/dbscan.py +51 -26
  36. snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
  37. snowflake/ml/modeling/cluster/k_means.py +51 -26
  38. snowflake/ml/modeling/cluster/mean_shift.py +51 -26
  39. snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
  40. snowflake/ml/modeling/cluster/optics.py +51 -26
  41. snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
  42. snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
  43. snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
  44. snowflake/ml/modeling/compose/column_transformer.py +51 -26
  45. snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
  46. snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
  47. snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
  48. snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
  49. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
  50. snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
  51. snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
  52. snowflake/ml/modeling/covariance/oas.py +51 -26
  53. snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
  54. snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
  55. snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
  56. snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
  57. snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
  58. snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
  59. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
  60. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
  61. snowflake/ml/modeling/decomposition/pca.py +51 -26
  62. snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
  63. snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
  64. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
  65. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
  66. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
  67. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
  68. snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
  69. snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
  70. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
  71. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
  72. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
  73. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
  74. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
  75. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
  76. snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
  77. snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
  78. snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
  79. snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
  80. snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
  81. snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
  82. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
  83. snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
  84. snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
  85. snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
  86. snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
  87. snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
  88. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
  89. snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
  90. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
  91. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
  92. snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
  93. snowflake/ml/modeling/impute/knn_imputer.py +51 -26
  94. snowflake/ml/modeling/impute/missing_indicator.py +51 -26
  95. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
  96. snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
  97. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
  98. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
  99. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
  100. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
  101. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
  102. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
  103. snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
  104. snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
  105. snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
  106. snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
  107. snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
  108. snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
  109. snowflake/ml/modeling/linear_model/lars.py +51 -26
  110. snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
  111. snowflake/ml/modeling/linear_model/lasso.py +51 -26
  112. snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
  113. snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
  114. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
  115. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
  116. snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
  117. snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
  118. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
  119. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
  120. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
  121. snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
  122. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
  123. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
  124. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
  125. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
  126. snowflake/ml/modeling/linear_model/perceptron.py +51 -26
  127. snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
  128. snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
  129. snowflake/ml/modeling/linear_model/ridge.py +51 -26
  130. snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
  131. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
  132. snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
  133. snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
  134. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
  135. snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
  136. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
  137. snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
  138. snowflake/ml/modeling/manifold/isomap.py +51 -26
  139. snowflake/ml/modeling/manifold/mds.py +51 -26
  140. snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
  141. snowflake/ml/modeling/manifold/tsne.py +51 -26
  142. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
  143. snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
  144. snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
  145. snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
  146. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
  147. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
  148. snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
  149. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
  150. snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
  151. snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
  152. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
  153. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
  154. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
  155. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
  156. snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
  157. snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
  158. snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
  159. snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
  160. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
  161. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
  162. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
  163. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
  164. snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
  165. snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
  166. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
  167. snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
  168. snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
  169. snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
  170. snowflake/ml/modeling/svm/linear_svc.py +51 -26
  171. snowflake/ml/modeling/svm/linear_svr.py +51 -26
  172. snowflake/ml/modeling/svm/nu_svc.py +51 -26
  173. snowflake/ml/modeling/svm/nu_svr.py +51 -26
  174. snowflake/ml/modeling/svm/svc.py +51 -26
  175. snowflake/ml/modeling/svm/svr.py +51 -26
  176. snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
  177. snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
  178. snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
  179. snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
  180. snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
  181. snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
  182. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
  183. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
  184. snowflake/ml/registry/model_registry.py +74 -56
  185. snowflake/ml/version.py +1 -1
  186. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
  187. snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
  188. snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
  189. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -299,7 +301,6 @@ class OPTICS(BaseTransformer):
299
301
  sample_weight_col: Optional[str] = None,
300
302
  ) -> None:
301
303
  super().__init__()
302
- self.id = str(uuid4()).replace("-", "_").upper()
303
304
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
304
305
 
305
306
  self._deps = list(deps)
@@ -332,6 +333,15 @@ class OPTICS(BaseTransformer):
332
333
  self.set_drop_input_cols(drop_input_cols)
333
334
  self.set_sample_weight_col(sample_weight_col)
334
335
 
336
+ def _get_rand_id(self) -> str:
337
+ """
338
+ Generate random id to be used in sproc and stage names.
339
+
340
+ Returns:
341
+ Random id string usable in sproc, table, and stage names.
342
+ """
343
+ return str(uuid4()).replace("-", "_").upper()
344
+
335
345
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
336
346
  """
337
347
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -410,7 +420,7 @@ class OPTICS(BaseTransformer):
410
420
  cp.dump(self._sklearn_object, local_transform_file)
411
421
 
412
422
  # Create temp stage to run fit.
413
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
423
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
414
424
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
415
425
  SqlResultValidator(
416
426
  session=session,
@@ -423,11 +433,12 @@ class OPTICS(BaseTransformer):
423
433
  expected_value=f"Stage area {transform_stage_name} successfully created."
424
434
  ).validate()
425
435
 
426
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
436
+ # Use posixpath to construct stage paths
437
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
438
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
427
439
  local_result_file_name = get_temp_file_path()
428
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
429
440
 
430
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
441
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
431
442
  statement_params = telemetry.get_function_usage_statement_params(
432
443
  project=_PROJECT,
433
444
  subproject=_SUBPROJECT,
@@ -453,6 +464,7 @@ class OPTICS(BaseTransformer):
453
464
  replace=True,
454
465
  session=session,
455
466
  statement_params=statement_params,
467
+ anonymous=True
456
468
  )
457
469
  def fit_wrapper_sproc(
458
470
  session: Session,
@@ -461,7 +473,8 @@ class OPTICS(BaseTransformer):
461
473
  stage_result_file_name: str,
462
474
  input_cols: List[str],
463
475
  label_cols: List[str],
464
- sample_weight_col: Optional[str]
476
+ sample_weight_col: Optional[str],
477
+ statement_params: Dict[str, str]
465
478
  ) -> str:
466
479
  import cloudpickle as cp
467
480
  import numpy as np
@@ -528,15 +541,15 @@ class OPTICS(BaseTransformer):
528
541
  api_calls=[Session.call],
529
542
  custom_tags=dict([("autogen", True)]),
530
543
  )
531
- sproc_export_file_name = session.call(
532
- fit_sproc_name,
544
+ sproc_export_file_name = fit_wrapper_sproc(
545
+ session,
533
546
  query,
534
547
  stage_transform_file_name,
535
548
  stage_result_file_name,
536
549
  identifier.get_unescaped_names(self.input_cols),
537
550
  identifier.get_unescaped_names(self.label_cols),
538
551
  identifier.get_unescaped_names(self.sample_weight_col),
539
- statement_params=statement_params,
552
+ statement_params,
540
553
  )
541
554
 
542
555
  if "|" in sproc_export_file_name:
@@ -546,7 +559,7 @@ class OPTICS(BaseTransformer):
546
559
  print("\n".join(fields[1:]))
547
560
 
548
561
  session.file.get(
549
- os.path.join(stage_result_file_name, sproc_export_file_name),
562
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
550
563
  local_result_file_name,
551
564
  statement_params=statement_params
552
565
  )
@@ -592,7 +605,7 @@ class OPTICS(BaseTransformer):
592
605
 
593
606
  # Register vectorized UDF for batch inference
594
607
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
595
- safe_id=self.id, method=inference_method)
608
+ safe_id=self._get_rand_id(), method=inference_method)
596
609
 
597
610
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
598
611
  # will try to pickle all of self which fails.
@@ -684,7 +697,7 @@ class OPTICS(BaseTransformer):
684
697
  return transformed_pandas_df.to_dict("records")
685
698
 
686
699
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
687
- safe_id=self.id
700
+ safe_id=self._get_rand_id()
688
701
  )
689
702
 
690
703
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -849,11 +862,18 @@ class OPTICS(BaseTransformer):
849
862
  Transformed dataset.
850
863
  """
851
864
  if isinstance(dataset, DataFrame):
865
+ expected_type_inferred = ""
866
+ # when it is classifier, infer the datatype from label columns
867
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
868
+ expected_type_inferred = convert_sp_to_sf_type(
869
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
870
+ )
871
+
852
872
  output_df = self._batch_inference(
853
873
  dataset=dataset,
854
874
  inference_method="predict",
855
875
  expected_output_cols_list=self.output_cols,
856
- expected_output_cols_type="",
876
+ expected_output_cols_type=expected_type_inferred,
857
877
  )
858
878
  elif isinstance(dataset, pd.DataFrame):
859
879
  output_df = self._sklearn_inference(
@@ -924,10 +944,10 @@ class OPTICS(BaseTransformer):
924
944
 
925
945
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
926
946
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
927
- Returns an empty list if current object is not a classifier or not yet fitted.
947
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
928
948
  """
929
949
  if getattr(self._sklearn_object, "classes_", None) is None:
930
- return []
950
+ return [output_cols_prefix]
931
951
 
932
952
  classes = self._sklearn_object.classes_
933
953
  if isinstance(classes, numpy.ndarray):
@@ -1152,7 +1172,7 @@ class OPTICS(BaseTransformer):
1152
1172
  cp.dump(self._sklearn_object, local_score_file)
1153
1173
 
1154
1174
  # Create temp stage to run score.
1155
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1175
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1156
1176
  session = dataset._session
1157
1177
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1158
1178
  SqlResultValidator(
@@ -1166,8 +1186,9 @@ class OPTICS(BaseTransformer):
1166
1186
  expected_value=f"Stage area {score_stage_name} successfully created."
1167
1187
  ).validate()
1168
1188
 
1169
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1170
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1189
+ # Use posixpath to construct stage paths
1190
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1191
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1171
1192
  statement_params = telemetry.get_function_usage_statement_params(
1172
1193
  project=_PROJECT,
1173
1194
  subproject=_SUBPROJECT,
@@ -1193,6 +1214,7 @@ class OPTICS(BaseTransformer):
1193
1214
  replace=True,
1194
1215
  session=session,
1195
1216
  statement_params=statement_params,
1217
+ anonymous=True
1196
1218
  )
1197
1219
  def score_wrapper_sproc(
1198
1220
  session: Session,
@@ -1200,7 +1222,8 @@ class OPTICS(BaseTransformer):
1200
1222
  stage_score_file_name: str,
1201
1223
  input_cols: List[str],
1202
1224
  label_cols: List[str],
1203
- sample_weight_col: Optional[str]
1225
+ sample_weight_col: Optional[str],
1226
+ statement_params: Dict[str, str]
1204
1227
  ) -> float:
1205
1228
  import cloudpickle as cp
1206
1229
  import numpy as np
@@ -1250,14 +1273,14 @@ class OPTICS(BaseTransformer):
1250
1273
  api_calls=[Session.call],
1251
1274
  custom_tags=dict([("autogen", True)]),
1252
1275
  )
1253
- score = session.call(
1254
- score_sproc_name,
1276
+ score = score_wrapper_sproc(
1277
+ session,
1255
1278
  query,
1256
1279
  stage_score_file_name,
1257
1280
  identifier.get_unescaped_names(self.input_cols),
1258
1281
  identifier.get_unescaped_names(self.label_cols),
1259
1282
  identifier.get_unescaped_names(self.sample_weight_col),
1260
- statement_params=statement_params,
1283
+ statement_params,
1261
1284
  )
1262
1285
 
1263
1286
  cleanup_temp_files([local_score_file_name])
@@ -1275,18 +1298,20 @@ class OPTICS(BaseTransformer):
1275
1298
  if self._sklearn_object._estimator_type == 'classifier':
1276
1299
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1277
1300
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1278
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1301
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1302
+ ([] if self._drop_input_cols else inputs) + outputs)
1279
1303
  # For regressor, the type of predict is float64
1280
1304
  elif self._sklearn_object._estimator_type == 'regressor':
1281
1305
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1282
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1283
-
1306
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1307
+ ([] if self._drop_input_cols else inputs) + outputs)
1284
1308
  for prob_func in PROB_FUNCTIONS:
1285
1309
  if hasattr(self, prob_func):
1286
1310
  output_cols_prefix: str = f"{prob_func}_"
1287
1311
  output_column_names = self._get_output_column_names(output_cols_prefix)
1288
1312
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1289
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1313
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1314
+ ([] if self._drop_input_cols else inputs) + outputs)
1290
1315
 
1291
1316
  @property
1292
1317
  def model_signatures(self) -> Dict[str, ModelSignature]:
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -241,7 +243,6 @@ class SpectralBiclustering(BaseTransformer):
241
243
  sample_weight_col: Optional[str] = None,
242
244
  ) -> None:
243
245
  super().__init__()
244
- self.id = str(uuid4()).replace("-", "_").upper()
245
246
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
246
247
 
247
248
  self._deps = list(deps)
@@ -270,6 +271,15 @@ class SpectralBiclustering(BaseTransformer):
270
271
  self.set_drop_input_cols(drop_input_cols)
271
272
  self.set_sample_weight_col(sample_weight_col)
272
273
 
274
+ def _get_rand_id(self) -> str:
275
+ """
276
+ Generate random id to be used in sproc and stage names.
277
+
278
+ Returns:
279
+ Random id string usable in sproc, table, and stage names.
280
+ """
281
+ return str(uuid4()).replace("-", "_").upper()
282
+
273
283
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
274
284
  """
275
285
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -348,7 +358,7 @@ class SpectralBiclustering(BaseTransformer):
348
358
  cp.dump(self._sklearn_object, local_transform_file)
349
359
 
350
360
  # Create temp stage to run fit.
351
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
361
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
352
362
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
353
363
  SqlResultValidator(
354
364
  session=session,
@@ -361,11 +371,12 @@ class SpectralBiclustering(BaseTransformer):
361
371
  expected_value=f"Stage area {transform_stage_name} successfully created."
362
372
  ).validate()
363
373
 
364
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
374
+ # Use posixpath to construct stage paths
375
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
376
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
365
377
  local_result_file_name = get_temp_file_path()
366
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
367
378
 
368
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
379
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
369
380
  statement_params = telemetry.get_function_usage_statement_params(
370
381
  project=_PROJECT,
371
382
  subproject=_SUBPROJECT,
@@ -391,6 +402,7 @@ class SpectralBiclustering(BaseTransformer):
391
402
  replace=True,
392
403
  session=session,
393
404
  statement_params=statement_params,
405
+ anonymous=True
394
406
  )
395
407
  def fit_wrapper_sproc(
396
408
  session: Session,
@@ -399,7 +411,8 @@ class SpectralBiclustering(BaseTransformer):
399
411
  stage_result_file_name: str,
400
412
  input_cols: List[str],
401
413
  label_cols: List[str],
402
- sample_weight_col: Optional[str]
414
+ sample_weight_col: Optional[str],
415
+ statement_params: Dict[str, str]
403
416
  ) -> str:
404
417
  import cloudpickle as cp
405
418
  import numpy as np
@@ -466,15 +479,15 @@ class SpectralBiclustering(BaseTransformer):
466
479
  api_calls=[Session.call],
467
480
  custom_tags=dict([("autogen", True)]),
468
481
  )
469
- sproc_export_file_name = session.call(
470
- fit_sproc_name,
482
+ sproc_export_file_name = fit_wrapper_sproc(
483
+ session,
471
484
  query,
472
485
  stage_transform_file_name,
473
486
  stage_result_file_name,
474
487
  identifier.get_unescaped_names(self.input_cols),
475
488
  identifier.get_unescaped_names(self.label_cols),
476
489
  identifier.get_unescaped_names(self.sample_weight_col),
477
- statement_params=statement_params,
490
+ statement_params,
478
491
  )
479
492
 
480
493
  if "|" in sproc_export_file_name:
@@ -484,7 +497,7 @@ class SpectralBiclustering(BaseTransformer):
484
497
  print("\n".join(fields[1:]))
485
498
 
486
499
  session.file.get(
487
- os.path.join(stage_result_file_name, sproc_export_file_name),
500
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
488
501
  local_result_file_name,
489
502
  statement_params=statement_params
490
503
  )
@@ -530,7 +543,7 @@ class SpectralBiclustering(BaseTransformer):
530
543
 
531
544
  # Register vectorized UDF for batch inference
532
545
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
533
- safe_id=self.id, method=inference_method)
546
+ safe_id=self._get_rand_id(), method=inference_method)
534
547
 
535
548
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
536
549
  # will try to pickle all of self which fails.
@@ -622,7 +635,7 @@ class SpectralBiclustering(BaseTransformer):
622
635
  return transformed_pandas_df.to_dict("records")
623
636
 
624
637
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
625
- safe_id=self.id
638
+ safe_id=self._get_rand_id()
626
639
  )
627
640
 
628
641
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -787,11 +800,18 @@ class SpectralBiclustering(BaseTransformer):
787
800
  Transformed dataset.
788
801
  """
789
802
  if isinstance(dataset, DataFrame):
803
+ expected_type_inferred = ""
804
+ # when it is classifier, infer the datatype from label columns
805
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
806
+ expected_type_inferred = convert_sp_to_sf_type(
807
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
808
+ )
809
+
790
810
  output_df = self._batch_inference(
791
811
  dataset=dataset,
792
812
  inference_method="predict",
793
813
  expected_output_cols_list=self.output_cols,
794
- expected_output_cols_type="",
814
+ expected_output_cols_type=expected_type_inferred,
795
815
  )
796
816
  elif isinstance(dataset, pd.DataFrame):
797
817
  output_df = self._sklearn_inference(
@@ -862,10 +882,10 @@ class SpectralBiclustering(BaseTransformer):
862
882
 
863
883
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
864
884
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
865
- Returns an empty list if current object is not a classifier or not yet fitted.
885
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
866
886
  """
867
887
  if getattr(self._sklearn_object, "classes_", None) is None:
868
- return []
888
+ return [output_cols_prefix]
869
889
 
870
890
  classes = self._sklearn_object.classes_
871
891
  if isinstance(classes, numpy.ndarray):
@@ -1090,7 +1110,7 @@ class SpectralBiclustering(BaseTransformer):
1090
1110
  cp.dump(self._sklearn_object, local_score_file)
1091
1111
 
1092
1112
  # Create temp stage to run score.
1093
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1113
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1094
1114
  session = dataset._session
1095
1115
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1096
1116
  SqlResultValidator(
@@ -1104,8 +1124,9 @@ class SpectralBiclustering(BaseTransformer):
1104
1124
  expected_value=f"Stage area {score_stage_name} successfully created."
1105
1125
  ).validate()
1106
1126
 
1107
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1108
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1127
+ # Use posixpath to construct stage paths
1128
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1129
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1109
1130
  statement_params = telemetry.get_function_usage_statement_params(
1110
1131
  project=_PROJECT,
1111
1132
  subproject=_SUBPROJECT,
@@ -1131,6 +1152,7 @@ class SpectralBiclustering(BaseTransformer):
1131
1152
  replace=True,
1132
1153
  session=session,
1133
1154
  statement_params=statement_params,
1155
+ anonymous=True
1134
1156
  )
1135
1157
  def score_wrapper_sproc(
1136
1158
  session: Session,
@@ -1138,7 +1160,8 @@ class SpectralBiclustering(BaseTransformer):
1138
1160
  stage_score_file_name: str,
1139
1161
  input_cols: List[str],
1140
1162
  label_cols: List[str],
1141
- sample_weight_col: Optional[str]
1163
+ sample_weight_col: Optional[str],
1164
+ statement_params: Dict[str, str]
1142
1165
  ) -> float:
1143
1166
  import cloudpickle as cp
1144
1167
  import numpy as np
@@ -1188,14 +1211,14 @@ class SpectralBiclustering(BaseTransformer):
1188
1211
  api_calls=[Session.call],
1189
1212
  custom_tags=dict([("autogen", True)]),
1190
1213
  )
1191
- score = session.call(
1192
- score_sproc_name,
1214
+ score = score_wrapper_sproc(
1215
+ session,
1193
1216
  query,
1194
1217
  stage_score_file_name,
1195
1218
  identifier.get_unescaped_names(self.input_cols),
1196
1219
  identifier.get_unescaped_names(self.label_cols),
1197
1220
  identifier.get_unescaped_names(self.sample_weight_col),
1198
- statement_params=statement_params,
1221
+ statement_params,
1199
1222
  )
1200
1223
 
1201
1224
  cleanup_temp_files([local_score_file_name])
@@ -1213,18 +1236,20 @@ class SpectralBiclustering(BaseTransformer):
1213
1236
  if self._sklearn_object._estimator_type == 'classifier':
1214
1237
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1215
1238
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1216
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1239
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1240
+ ([] if self._drop_input_cols else inputs) + outputs)
1217
1241
  # For regressor, the type of predict is float64
1218
1242
  elif self._sklearn_object._estimator_type == 'regressor':
1219
1243
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1220
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1221
-
1244
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1245
+ ([] if self._drop_input_cols else inputs) + outputs)
1222
1246
  for prob_func in PROB_FUNCTIONS:
1223
1247
  if hasattr(self, prob_func):
1224
1248
  output_cols_prefix: str = f"{prob_func}_"
1225
1249
  output_column_names = self._get_output_column_names(output_cols_prefix)
1226
1250
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1227
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1251
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1252
+ ([] if self._drop_input_cols else inputs) + outputs)
1228
1253
 
1229
1254
  @property
1230
1255
  def model_signatures(self) -> Dict[str, ModelSignature]: