snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. snowflake/ml/_internal/env_utils.py +2 -1
  2. snowflake/ml/_internal/file_utils.py +29 -7
  3. snowflake/ml/_internal/telemetry.py +5 -8
  4. snowflake/ml/_internal/utils/uri.py +7 -2
  5. snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
  6. snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
  7. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
  8. snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
  9. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
  10. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
  11. snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
  12. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
  13. snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
  14. snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
  15. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
  16. snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
  17. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
  18. snowflake/ml/model/_deployer.py +14 -27
  19. snowflake/ml/model/_env.py +4 -4
  20. snowflake/ml/model/_handlers/custom.py +14 -2
  21. snowflake/ml/model/_handlers/pytorch.py +186 -0
  22. snowflake/ml/model/_handlers/sklearn.py +14 -9
  23. snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
  24. snowflake/ml/model/_handlers/torchscript.py +180 -0
  25. snowflake/ml/model/_handlers/xgboost.py +19 -9
  26. snowflake/ml/model/_model.py +3 -2
  27. snowflake/ml/model/_model_meta.py +12 -7
  28. snowflake/ml/model/model_signature.py +446 -66
  29. snowflake/ml/model/type_hints.py +23 -4
  30. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
  31. snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
  32. snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
  33. snowflake/ml/modeling/cluster/birch.py +51 -26
  34. snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
  35. snowflake/ml/modeling/cluster/dbscan.py +51 -26
  36. snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
  37. snowflake/ml/modeling/cluster/k_means.py +51 -26
  38. snowflake/ml/modeling/cluster/mean_shift.py +51 -26
  39. snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
  40. snowflake/ml/modeling/cluster/optics.py +51 -26
  41. snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
  42. snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
  43. snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
  44. snowflake/ml/modeling/compose/column_transformer.py +51 -26
  45. snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
  46. snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
  47. snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
  48. snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
  49. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
  50. snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
  51. snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
  52. snowflake/ml/modeling/covariance/oas.py +51 -26
  53. snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
  54. snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
  55. snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
  56. snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
  57. snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
  58. snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
  59. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
  60. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
  61. snowflake/ml/modeling/decomposition/pca.py +51 -26
  62. snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
  63. snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
  64. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
  65. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
  66. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
  67. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
  68. snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
  69. snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
  70. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
  71. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
  72. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
  73. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
  74. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
  75. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
  76. snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
  77. snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
  78. snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
  79. snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
  80. snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
  81. snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
  82. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
  83. snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
  84. snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
  85. snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
  86. snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
  87. snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
  88. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
  89. snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
  90. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
  91. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
  92. snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
  93. snowflake/ml/modeling/impute/knn_imputer.py +51 -26
  94. snowflake/ml/modeling/impute/missing_indicator.py +51 -26
  95. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
  96. snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
  97. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
  98. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
  99. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
  100. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
  101. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
  102. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
  103. snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
  104. snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
  105. snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
  106. snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
  107. snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
  108. snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
  109. snowflake/ml/modeling/linear_model/lars.py +51 -26
  110. snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
  111. snowflake/ml/modeling/linear_model/lasso.py +51 -26
  112. snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
  113. snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
  114. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
  115. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
  116. snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
  117. snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
  118. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
  119. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
  120. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
  121. snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
  122. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
  123. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
  124. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
  125. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
  126. snowflake/ml/modeling/linear_model/perceptron.py +51 -26
  127. snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
  128. snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
  129. snowflake/ml/modeling/linear_model/ridge.py +51 -26
  130. snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
  131. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
  132. snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
  133. snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
  134. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
  135. snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
  136. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
  137. snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
  138. snowflake/ml/modeling/manifold/isomap.py +51 -26
  139. snowflake/ml/modeling/manifold/mds.py +51 -26
  140. snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
  141. snowflake/ml/modeling/manifold/tsne.py +51 -26
  142. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
  143. snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
  144. snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
  145. snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
  146. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
  147. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
  148. snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
  149. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
  150. snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
  151. snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
  152. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
  153. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
  154. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
  155. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
  156. snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
  157. snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
  158. snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
  159. snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
  160. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
  161. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
  162. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
  163. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
  164. snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
  165. snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
  166. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
  167. snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
  168. snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
  169. snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
  170. snowflake/ml/modeling/svm/linear_svc.py +51 -26
  171. snowflake/ml/modeling/svm/linear_svr.py +51 -26
  172. snowflake/ml/modeling/svm/nu_svc.py +51 -26
  173. snowflake/ml/modeling/svm/nu_svr.py +51 -26
  174. snowflake/ml/modeling/svm/svc.py +51 -26
  175. snowflake/ml/modeling/svm/svr.py +51 -26
  176. snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
  177. snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
  178. snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
  179. snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
  180. snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
  181. snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
  182. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
  183. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
  184. snowflake/ml/registry/model_registry.py +74 -56
  185. snowflake/ml/version.py +1 -1
  186. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
  187. snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
  188. snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
  189. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -256,7 +258,6 @@ class Isomap(BaseTransformer):
256
258
  sample_weight_col: Optional[str] = None,
257
259
  ) -> None:
258
260
  super().__init__()
259
- self.id = str(uuid4()).replace("-", "_").upper()
260
261
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
261
262
 
262
263
  self._deps = list(deps)
@@ -287,6 +288,15 @@ class Isomap(BaseTransformer):
287
288
  self.set_drop_input_cols(drop_input_cols)
288
289
  self.set_sample_weight_col(sample_weight_col)
289
290
 
291
+ def _get_rand_id(self) -> str:
292
+ """
293
+ Generate random id to be used in sproc and stage names.
294
+
295
+ Returns:
296
+ Random id string usable in sproc, table, and stage names.
297
+ """
298
+ return str(uuid4()).replace("-", "_").upper()
299
+
290
300
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
291
301
  """
292
302
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -365,7 +375,7 @@ class Isomap(BaseTransformer):
365
375
  cp.dump(self._sklearn_object, local_transform_file)
366
376
 
367
377
  # Create temp stage to run fit.
368
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
378
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
369
379
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
370
380
  SqlResultValidator(
371
381
  session=session,
@@ -378,11 +388,12 @@ class Isomap(BaseTransformer):
378
388
  expected_value=f"Stage area {transform_stage_name} successfully created."
379
389
  ).validate()
380
390
 
381
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
391
+ # Use posixpath to construct stage paths
392
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
393
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
382
394
  local_result_file_name = get_temp_file_path()
383
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
384
395
 
385
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
396
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
386
397
  statement_params = telemetry.get_function_usage_statement_params(
387
398
  project=_PROJECT,
388
399
  subproject=_SUBPROJECT,
@@ -408,6 +419,7 @@ class Isomap(BaseTransformer):
408
419
  replace=True,
409
420
  session=session,
410
421
  statement_params=statement_params,
422
+ anonymous=True
411
423
  )
412
424
  def fit_wrapper_sproc(
413
425
  session: Session,
@@ -416,7 +428,8 @@ class Isomap(BaseTransformer):
416
428
  stage_result_file_name: str,
417
429
  input_cols: List[str],
418
430
  label_cols: List[str],
419
- sample_weight_col: Optional[str]
431
+ sample_weight_col: Optional[str],
432
+ statement_params: Dict[str, str]
420
433
  ) -> str:
421
434
  import cloudpickle as cp
422
435
  import numpy as np
@@ -483,15 +496,15 @@ class Isomap(BaseTransformer):
483
496
  api_calls=[Session.call],
484
497
  custom_tags=dict([("autogen", True)]),
485
498
  )
486
- sproc_export_file_name = session.call(
487
- fit_sproc_name,
499
+ sproc_export_file_name = fit_wrapper_sproc(
500
+ session,
488
501
  query,
489
502
  stage_transform_file_name,
490
503
  stage_result_file_name,
491
504
  identifier.get_unescaped_names(self.input_cols),
492
505
  identifier.get_unescaped_names(self.label_cols),
493
506
  identifier.get_unescaped_names(self.sample_weight_col),
494
- statement_params=statement_params,
507
+ statement_params,
495
508
  )
496
509
 
497
510
  if "|" in sproc_export_file_name:
@@ -501,7 +514,7 @@ class Isomap(BaseTransformer):
501
514
  print("\n".join(fields[1:]))
502
515
 
503
516
  session.file.get(
504
- os.path.join(stage_result_file_name, sproc_export_file_name),
517
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
505
518
  local_result_file_name,
506
519
  statement_params=statement_params
507
520
  )
@@ -547,7 +560,7 @@ class Isomap(BaseTransformer):
547
560
 
548
561
  # Register vectorized UDF for batch inference
549
562
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
550
- safe_id=self.id, method=inference_method)
563
+ safe_id=self._get_rand_id(), method=inference_method)
551
564
 
552
565
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
553
566
  # will try to pickle all of self which fails.
@@ -639,7 +652,7 @@ class Isomap(BaseTransformer):
639
652
  return transformed_pandas_df.to_dict("records")
640
653
 
641
654
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
642
- safe_id=self.id
655
+ safe_id=self._get_rand_id()
643
656
  )
644
657
 
645
658
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -804,11 +817,18 @@ class Isomap(BaseTransformer):
804
817
  Transformed dataset.
805
818
  """
806
819
  if isinstance(dataset, DataFrame):
820
+ expected_type_inferred = ""
821
+ # when it is classifier, infer the datatype from label columns
822
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
823
+ expected_type_inferred = convert_sp_to_sf_type(
824
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
825
+ )
826
+
807
827
  output_df = self._batch_inference(
808
828
  dataset=dataset,
809
829
  inference_method="predict",
810
830
  expected_output_cols_list=self.output_cols,
811
- expected_output_cols_type="",
831
+ expected_output_cols_type=expected_type_inferred,
812
832
  )
813
833
  elif isinstance(dataset, pd.DataFrame):
814
834
  output_df = self._sklearn_inference(
@@ -881,10 +901,10 @@ class Isomap(BaseTransformer):
881
901
 
882
902
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
883
903
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
884
- Returns an empty list if current object is not a classifier or not yet fitted.
904
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
885
905
  """
886
906
  if getattr(self._sklearn_object, "classes_", None) is None:
887
- return []
907
+ return [output_cols_prefix]
888
908
 
889
909
  classes = self._sklearn_object.classes_
890
910
  if isinstance(classes, numpy.ndarray):
@@ -1109,7 +1129,7 @@ class Isomap(BaseTransformer):
1109
1129
  cp.dump(self._sklearn_object, local_score_file)
1110
1130
 
1111
1131
  # Create temp stage to run score.
1112
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1132
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1113
1133
  session = dataset._session
1114
1134
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1115
1135
  SqlResultValidator(
@@ -1123,8 +1143,9 @@ class Isomap(BaseTransformer):
1123
1143
  expected_value=f"Stage area {score_stage_name} successfully created."
1124
1144
  ).validate()
1125
1145
 
1126
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1127
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1146
+ # Use posixpath to construct stage paths
1147
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1148
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1128
1149
  statement_params = telemetry.get_function_usage_statement_params(
1129
1150
  project=_PROJECT,
1130
1151
  subproject=_SUBPROJECT,
@@ -1150,6 +1171,7 @@ class Isomap(BaseTransformer):
1150
1171
  replace=True,
1151
1172
  session=session,
1152
1173
  statement_params=statement_params,
1174
+ anonymous=True
1153
1175
  )
1154
1176
  def score_wrapper_sproc(
1155
1177
  session: Session,
@@ -1157,7 +1179,8 @@ class Isomap(BaseTransformer):
1157
1179
  stage_score_file_name: str,
1158
1180
  input_cols: List[str],
1159
1181
  label_cols: List[str],
1160
- sample_weight_col: Optional[str]
1182
+ sample_weight_col: Optional[str],
1183
+ statement_params: Dict[str, str]
1161
1184
  ) -> float:
1162
1185
  import cloudpickle as cp
1163
1186
  import numpy as np
@@ -1207,14 +1230,14 @@ class Isomap(BaseTransformer):
1207
1230
  api_calls=[Session.call],
1208
1231
  custom_tags=dict([("autogen", True)]),
1209
1232
  )
1210
- score = session.call(
1211
- score_sproc_name,
1233
+ score = score_wrapper_sproc(
1234
+ session,
1212
1235
  query,
1213
1236
  stage_score_file_name,
1214
1237
  identifier.get_unescaped_names(self.input_cols),
1215
1238
  identifier.get_unescaped_names(self.label_cols),
1216
1239
  identifier.get_unescaped_names(self.sample_weight_col),
1217
- statement_params=statement_params,
1240
+ statement_params,
1218
1241
  )
1219
1242
 
1220
1243
  cleanup_temp_files([local_score_file_name])
@@ -1232,18 +1255,20 @@ class Isomap(BaseTransformer):
1232
1255
  if self._sklearn_object._estimator_type == 'classifier':
1233
1256
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1234
1257
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1235
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1258
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1259
+ ([] if self._drop_input_cols else inputs) + outputs)
1236
1260
  # For regressor, the type of predict is float64
1237
1261
  elif self._sklearn_object._estimator_type == 'regressor':
1238
1262
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1239
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1240
-
1263
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1264
+ ([] if self._drop_input_cols else inputs) + outputs)
1241
1265
  for prob_func in PROB_FUNCTIONS:
1242
1266
  if hasattr(self, prob_func):
1243
1267
  output_cols_prefix: str = f"{prob_func}_"
1244
1268
  output_column_names = self._get_output_column_names(output_cols_prefix)
1245
1269
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1246
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1270
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1271
+ ([] if self._drop_input_cols else inputs) + outputs)
1247
1272
 
1248
1273
  @property
1249
1274
  def model_signatures(self) -> Dict[str, ModelSignature]:
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -241,7 +243,6 @@ class MDS(BaseTransformer):
241
243
  sample_weight_col: Optional[str] = None,
242
244
  ) -> None:
243
245
  super().__init__()
244
- self.id = str(uuid4()).replace("-", "_").upper()
245
246
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
246
247
 
247
248
  self._deps = list(deps)
@@ -270,6 +271,15 @@ class MDS(BaseTransformer):
270
271
  self.set_drop_input_cols(drop_input_cols)
271
272
  self.set_sample_weight_col(sample_weight_col)
272
273
 
274
+ def _get_rand_id(self) -> str:
275
+ """
276
+ Generate random id to be used in sproc and stage names.
277
+
278
+ Returns:
279
+ Random id string usable in sproc, table, and stage names.
280
+ """
281
+ return str(uuid4()).replace("-", "_").upper()
282
+
273
283
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
274
284
  """
275
285
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -348,7 +358,7 @@ class MDS(BaseTransformer):
348
358
  cp.dump(self._sklearn_object, local_transform_file)
349
359
 
350
360
  # Create temp stage to run fit.
351
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
361
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
352
362
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
353
363
  SqlResultValidator(
354
364
  session=session,
@@ -361,11 +371,12 @@ class MDS(BaseTransformer):
361
371
  expected_value=f"Stage area {transform_stage_name} successfully created."
362
372
  ).validate()
363
373
 
364
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
374
+ # Use posixpath to construct stage paths
375
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
376
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
365
377
  local_result_file_name = get_temp_file_path()
366
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
367
378
 
368
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
379
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
369
380
  statement_params = telemetry.get_function_usage_statement_params(
370
381
  project=_PROJECT,
371
382
  subproject=_SUBPROJECT,
@@ -391,6 +402,7 @@ class MDS(BaseTransformer):
391
402
  replace=True,
392
403
  session=session,
393
404
  statement_params=statement_params,
405
+ anonymous=True
394
406
  )
395
407
  def fit_wrapper_sproc(
396
408
  session: Session,
@@ -399,7 +411,8 @@ class MDS(BaseTransformer):
399
411
  stage_result_file_name: str,
400
412
  input_cols: List[str],
401
413
  label_cols: List[str],
402
- sample_weight_col: Optional[str]
414
+ sample_weight_col: Optional[str],
415
+ statement_params: Dict[str, str]
403
416
  ) -> str:
404
417
  import cloudpickle as cp
405
418
  import numpy as np
@@ -466,15 +479,15 @@ class MDS(BaseTransformer):
466
479
  api_calls=[Session.call],
467
480
  custom_tags=dict([("autogen", True)]),
468
481
  )
469
- sproc_export_file_name = session.call(
470
- fit_sproc_name,
482
+ sproc_export_file_name = fit_wrapper_sproc(
483
+ session,
471
484
  query,
472
485
  stage_transform_file_name,
473
486
  stage_result_file_name,
474
487
  identifier.get_unescaped_names(self.input_cols),
475
488
  identifier.get_unescaped_names(self.label_cols),
476
489
  identifier.get_unescaped_names(self.sample_weight_col),
477
- statement_params=statement_params,
490
+ statement_params,
478
491
  )
479
492
 
480
493
  if "|" in sproc_export_file_name:
@@ -484,7 +497,7 @@ class MDS(BaseTransformer):
484
497
  print("\n".join(fields[1:]))
485
498
 
486
499
  session.file.get(
487
- os.path.join(stage_result_file_name, sproc_export_file_name),
500
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
488
501
  local_result_file_name,
489
502
  statement_params=statement_params
490
503
  )
@@ -530,7 +543,7 @@ class MDS(BaseTransformer):
530
543
 
531
544
  # Register vectorized UDF for batch inference
532
545
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
533
- safe_id=self.id, method=inference_method)
546
+ safe_id=self._get_rand_id(), method=inference_method)
534
547
 
535
548
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
536
549
  # will try to pickle all of self which fails.
@@ -622,7 +635,7 @@ class MDS(BaseTransformer):
622
635
  return transformed_pandas_df.to_dict("records")
623
636
 
624
637
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
625
- safe_id=self.id
638
+ safe_id=self._get_rand_id()
626
639
  )
627
640
 
628
641
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -787,11 +800,18 @@ class MDS(BaseTransformer):
787
800
  Transformed dataset.
788
801
  """
789
802
  if isinstance(dataset, DataFrame):
803
+ expected_type_inferred = ""
804
+ # when it is classifier, infer the datatype from label columns
805
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
806
+ expected_type_inferred = convert_sp_to_sf_type(
807
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
808
+ )
809
+
790
810
  output_df = self._batch_inference(
791
811
  dataset=dataset,
792
812
  inference_method="predict",
793
813
  expected_output_cols_list=self.output_cols,
794
- expected_output_cols_type="",
814
+ expected_output_cols_type=expected_type_inferred,
795
815
  )
796
816
  elif isinstance(dataset, pd.DataFrame):
797
817
  output_df = self._sklearn_inference(
@@ -862,10 +882,10 @@ class MDS(BaseTransformer):
862
882
 
863
883
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
864
884
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
865
- Returns an empty list if current object is not a classifier or not yet fitted.
885
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
866
886
  """
867
887
  if getattr(self._sklearn_object, "classes_", None) is None:
868
- return []
888
+ return [output_cols_prefix]
869
889
 
870
890
  classes = self._sklearn_object.classes_
871
891
  if isinstance(classes, numpy.ndarray):
@@ -1090,7 +1110,7 @@ class MDS(BaseTransformer):
1090
1110
  cp.dump(self._sklearn_object, local_score_file)
1091
1111
 
1092
1112
  # Create temp stage to run score.
1093
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1113
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1094
1114
  session = dataset._session
1095
1115
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1096
1116
  SqlResultValidator(
@@ -1104,8 +1124,9 @@ class MDS(BaseTransformer):
1104
1124
  expected_value=f"Stage area {score_stage_name} successfully created."
1105
1125
  ).validate()
1106
1126
 
1107
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1108
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1127
+ # Use posixpath to construct stage paths
1128
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1129
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1109
1130
  statement_params = telemetry.get_function_usage_statement_params(
1110
1131
  project=_PROJECT,
1111
1132
  subproject=_SUBPROJECT,
@@ -1131,6 +1152,7 @@ class MDS(BaseTransformer):
1131
1152
  replace=True,
1132
1153
  session=session,
1133
1154
  statement_params=statement_params,
1155
+ anonymous=True
1134
1156
  )
1135
1157
  def score_wrapper_sproc(
1136
1158
  session: Session,
@@ -1138,7 +1160,8 @@ class MDS(BaseTransformer):
1138
1160
  stage_score_file_name: str,
1139
1161
  input_cols: List[str],
1140
1162
  label_cols: List[str],
1141
- sample_weight_col: Optional[str]
1163
+ sample_weight_col: Optional[str],
1164
+ statement_params: Dict[str, str]
1142
1165
  ) -> float:
1143
1166
  import cloudpickle as cp
1144
1167
  import numpy as np
@@ -1188,14 +1211,14 @@ class MDS(BaseTransformer):
1188
1211
  api_calls=[Session.call],
1189
1212
  custom_tags=dict([("autogen", True)]),
1190
1213
  )
1191
- score = session.call(
1192
- score_sproc_name,
1214
+ score = score_wrapper_sproc(
1215
+ session,
1193
1216
  query,
1194
1217
  stage_score_file_name,
1195
1218
  identifier.get_unescaped_names(self.input_cols),
1196
1219
  identifier.get_unescaped_names(self.label_cols),
1197
1220
  identifier.get_unescaped_names(self.sample_weight_col),
1198
- statement_params=statement_params,
1221
+ statement_params,
1199
1222
  )
1200
1223
 
1201
1224
  cleanup_temp_files([local_score_file_name])
@@ -1213,18 +1236,20 @@ class MDS(BaseTransformer):
1213
1236
  if self._sklearn_object._estimator_type == 'classifier':
1214
1237
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1215
1238
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1216
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1239
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1240
+ ([] if self._drop_input_cols else inputs) + outputs)
1217
1241
  # For regressor, the type of predict is float64
1218
1242
  elif self._sklearn_object._estimator_type == 'regressor':
1219
1243
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1220
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1221
-
1244
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1245
+ ([] if self._drop_input_cols else inputs) + outputs)
1222
1246
  for prob_func in PROB_FUNCTIONS:
1223
1247
  if hasattr(self, prob_func):
1224
1248
  output_cols_prefix: str = f"{prob_func}_"
1225
1249
  output_column_names = self._get_output_column_names(output_cols_prefix)
1226
1250
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1227
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1251
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1252
+ ([] if self._drop_input_cols else inputs) + outputs)
1228
1253
 
1229
1254
  @property
1230
1255
  def model_signatures(self) -> Dict[str, ModelSignature]: