snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. snowflake/ml/_internal/env_utils.py +2 -1
  2. snowflake/ml/_internal/file_utils.py +29 -7
  3. snowflake/ml/_internal/telemetry.py +5 -8
  4. snowflake/ml/_internal/utils/uri.py +7 -2
  5. snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
  6. snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
  7. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
  8. snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
  9. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
  10. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
  11. snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
  12. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
  13. snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
  14. snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
  15. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
  16. snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
  17. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
  18. snowflake/ml/model/_deployer.py +14 -27
  19. snowflake/ml/model/_env.py +4 -4
  20. snowflake/ml/model/_handlers/custom.py +14 -2
  21. snowflake/ml/model/_handlers/pytorch.py +186 -0
  22. snowflake/ml/model/_handlers/sklearn.py +14 -9
  23. snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
  24. snowflake/ml/model/_handlers/torchscript.py +180 -0
  25. snowflake/ml/model/_handlers/xgboost.py +19 -9
  26. snowflake/ml/model/_model.py +3 -2
  27. snowflake/ml/model/_model_meta.py +12 -7
  28. snowflake/ml/model/model_signature.py +446 -66
  29. snowflake/ml/model/type_hints.py +23 -4
  30. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
  31. snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
  32. snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
  33. snowflake/ml/modeling/cluster/birch.py +51 -26
  34. snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
  35. snowflake/ml/modeling/cluster/dbscan.py +51 -26
  36. snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
  37. snowflake/ml/modeling/cluster/k_means.py +51 -26
  38. snowflake/ml/modeling/cluster/mean_shift.py +51 -26
  39. snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
  40. snowflake/ml/modeling/cluster/optics.py +51 -26
  41. snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
  42. snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
  43. snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
  44. snowflake/ml/modeling/compose/column_transformer.py +51 -26
  45. snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
  46. snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
  47. snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
  48. snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
  49. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
  50. snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
  51. snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
  52. snowflake/ml/modeling/covariance/oas.py +51 -26
  53. snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
  54. snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
  55. snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
  56. snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
  57. snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
  58. snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
  59. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
  60. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
  61. snowflake/ml/modeling/decomposition/pca.py +51 -26
  62. snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
  63. snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
  64. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
  65. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
  66. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
  67. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
  68. snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
  69. snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
  70. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
  71. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
  72. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
  73. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
  74. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
  75. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
  76. snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
  77. snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
  78. snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
  79. snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
  80. snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
  81. snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
  82. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
  83. snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
  84. snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
  85. snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
  86. snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
  87. snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
  88. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
  89. snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
  90. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
  91. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
  92. snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
  93. snowflake/ml/modeling/impute/knn_imputer.py +51 -26
  94. snowflake/ml/modeling/impute/missing_indicator.py +51 -26
  95. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
  96. snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
  97. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
  98. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
  99. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
  100. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
  101. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
  102. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
  103. snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
  104. snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
  105. snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
  106. snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
  107. snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
  108. snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
  109. snowflake/ml/modeling/linear_model/lars.py +51 -26
  110. snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
  111. snowflake/ml/modeling/linear_model/lasso.py +51 -26
  112. snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
  113. snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
  114. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
  115. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
  116. snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
  117. snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
  118. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
  119. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
  120. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
  121. snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
  122. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
  123. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
  124. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
  125. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
  126. snowflake/ml/modeling/linear_model/perceptron.py +51 -26
  127. snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
  128. snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
  129. snowflake/ml/modeling/linear_model/ridge.py +51 -26
  130. snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
  131. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
  132. snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
  133. snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
  134. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
  135. snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
  136. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
  137. snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
  138. snowflake/ml/modeling/manifold/isomap.py +51 -26
  139. snowflake/ml/modeling/manifold/mds.py +51 -26
  140. snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
  141. snowflake/ml/modeling/manifold/tsne.py +51 -26
  142. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
  143. snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
  144. snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
  145. snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
  146. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
  147. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
  148. snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
  149. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
  150. snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
  151. snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
  152. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
  153. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
  154. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
  155. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
  156. snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
  157. snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
  158. snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
  159. snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
  160. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
  161. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
  162. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
  163. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
  164. snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
  165. snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
  166. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
  167. snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
  168. snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
  169. snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
  170. snowflake/ml/modeling/svm/linear_svc.py +51 -26
  171. snowflake/ml/modeling/svm/linear_svr.py +51 -26
  172. snowflake/ml/modeling/svm/nu_svc.py +51 -26
  173. snowflake/ml/modeling/svm/nu_svr.py +51 -26
  174. snowflake/ml/modeling/svm/svc.py +51 -26
  175. snowflake/ml/modeling/svm/svr.py +51 -26
  176. snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
  177. snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
  178. snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
  179. snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
  180. snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
  181. snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
  182. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
  183. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
  184. snowflake/ml/registry/model_registry.py +74 -56
  185. snowflake/ml/version.py +1 -1
  186. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
  187. snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
  188. snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
  189. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -229,7 +231,6 @@ class Nystroem(BaseTransformer):
229
231
  sample_weight_col: Optional[str] = None,
230
232
  ) -> None:
231
233
  super().__init__()
232
- self.id = str(uuid4()).replace("-", "_").upper()
233
234
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
234
235
 
235
236
  self._deps = list(deps)
@@ -256,6 +257,15 @@ class Nystroem(BaseTransformer):
256
257
  self.set_drop_input_cols(drop_input_cols)
257
258
  self.set_sample_weight_col(sample_weight_col)
258
259
 
260
+ def _get_rand_id(self) -> str:
261
+ """
262
+ Generate random id to be used in sproc and stage names.
263
+
264
+ Returns:
265
+ Random id string usable in sproc, table, and stage names.
266
+ """
267
+ return str(uuid4()).replace("-", "_").upper()
268
+
259
269
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
260
270
  """
261
271
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -334,7 +344,7 @@ class Nystroem(BaseTransformer):
334
344
  cp.dump(self._sklearn_object, local_transform_file)
335
345
 
336
346
  # Create temp stage to run fit.
337
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
347
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
338
348
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
339
349
  SqlResultValidator(
340
350
  session=session,
@@ -347,11 +357,12 @@ class Nystroem(BaseTransformer):
347
357
  expected_value=f"Stage area {transform_stage_name} successfully created."
348
358
  ).validate()
349
359
 
350
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
360
+ # Use posixpath to construct stage paths
361
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
362
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
351
363
  local_result_file_name = get_temp_file_path()
352
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
353
364
 
354
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
365
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
355
366
  statement_params = telemetry.get_function_usage_statement_params(
356
367
  project=_PROJECT,
357
368
  subproject=_SUBPROJECT,
@@ -377,6 +388,7 @@ class Nystroem(BaseTransformer):
377
388
  replace=True,
378
389
  session=session,
379
390
  statement_params=statement_params,
391
+ anonymous=True
380
392
  )
381
393
  def fit_wrapper_sproc(
382
394
  session: Session,
@@ -385,7 +397,8 @@ class Nystroem(BaseTransformer):
385
397
  stage_result_file_name: str,
386
398
  input_cols: List[str],
387
399
  label_cols: List[str],
388
- sample_weight_col: Optional[str]
400
+ sample_weight_col: Optional[str],
401
+ statement_params: Dict[str, str]
389
402
  ) -> str:
390
403
  import cloudpickle as cp
391
404
  import numpy as np
@@ -452,15 +465,15 @@ class Nystroem(BaseTransformer):
452
465
  api_calls=[Session.call],
453
466
  custom_tags=dict([("autogen", True)]),
454
467
  )
455
- sproc_export_file_name = session.call(
456
- fit_sproc_name,
468
+ sproc_export_file_name = fit_wrapper_sproc(
469
+ session,
457
470
  query,
458
471
  stage_transform_file_name,
459
472
  stage_result_file_name,
460
473
  identifier.get_unescaped_names(self.input_cols),
461
474
  identifier.get_unescaped_names(self.label_cols),
462
475
  identifier.get_unescaped_names(self.sample_weight_col),
463
- statement_params=statement_params,
476
+ statement_params,
464
477
  )
465
478
 
466
479
  if "|" in sproc_export_file_name:
@@ -470,7 +483,7 @@ class Nystroem(BaseTransformer):
470
483
  print("\n".join(fields[1:]))
471
484
 
472
485
  session.file.get(
473
- os.path.join(stage_result_file_name, sproc_export_file_name),
486
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
474
487
  local_result_file_name,
475
488
  statement_params=statement_params
476
489
  )
@@ -516,7 +529,7 @@ class Nystroem(BaseTransformer):
516
529
 
517
530
  # Register vectorized UDF for batch inference
518
531
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
519
- safe_id=self.id, method=inference_method)
532
+ safe_id=self._get_rand_id(), method=inference_method)
520
533
 
521
534
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
522
535
  # will try to pickle all of self which fails.
@@ -608,7 +621,7 @@ class Nystroem(BaseTransformer):
608
621
  return transformed_pandas_df.to_dict("records")
609
622
 
610
623
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
611
- safe_id=self.id
624
+ safe_id=self._get_rand_id()
612
625
  )
613
626
 
614
627
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -773,11 +786,18 @@ class Nystroem(BaseTransformer):
773
786
  Transformed dataset.
774
787
  """
775
788
  if isinstance(dataset, DataFrame):
789
+ expected_type_inferred = ""
790
+ # when it is classifier, infer the datatype from label columns
791
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
792
+ expected_type_inferred = convert_sp_to_sf_type(
793
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
794
+ )
795
+
776
796
  output_df = self._batch_inference(
777
797
  dataset=dataset,
778
798
  inference_method="predict",
779
799
  expected_output_cols_list=self.output_cols,
780
- expected_output_cols_type="",
800
+ expected_output_cols_type=expected_type_inferred,
781
801
  )
782
802
  elif isinstance(dataset, pd.DataFrame):
783
803
  output_df = self._sklearn_inference(
@@ -850,10 +870,10 @@ class Nystroem(BaseTransformer):
850
870
 
851
871
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
852
872
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
853
- Returns an empty list if current object is not a classifier or not yet fitted.
873
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
854
874
  """
855
875
  if getattr(self._sklearn_object, "classes_", None) is None:
856
- return []
876
+ return [output_cols_prefix]
857
877
 
858
878
  classes = self._sklearn_object.classes_
859
879
  if isinstance(classes, numpy.ndarray):
@@ -1078,7 +1098,7 @@ class Nystroem(BaseTransformer):
1078
1098
  cp.dump(self._sklearn_object, local_score_file)
1079
1099
 
1080
1100
  # Create temp stage to run score.
1081
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1101
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1082
1102
  session = dataset._session
1083
1103
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1084
1104
  SqlResultValidator(
@@ -1092,8 +1112,9 @@ class Nystroem(BaseTransformer):
1092
1112
  expected_value=f"Stage area {score_stage_name} successfully created."
1093
1113
  ).validate()
1094
1114
 
1095
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1096
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1115
+ # Use posixpath to construct stage paths
1116
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1117
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1097
1118
  statement_params = telemetry.get_function_usage_statement_params(
1098
1119
  project=_PROJECT,
1099
1120
  subproject=_SUBPROJECT,
@@ -1119,6 +1140,7 @@ class Nystroem(BaseTransformer):
1119
1140
  replace=True,
1120
1141
  session=session,
1121
1142
  statement_params=statement_params,
1143
+ anonymous=True
1122
1144
  )
1123
1145
  def score_wrapper_sproc(
1124
1146
  session: Session,
@@ -1126,7 +1148,8 @@ class Nystroem(BaseTransformer):
1126
1148
  stage_score_file_name: str,
1127
1149
  input_cols: List[str],
1128
1150
  label_cols: List[str],
1129
- sample_weight_col: Optional[str]
1151
+ sample_weight_col: Optional[str],
1152
+ statement_params: Dict[str, str]
1130
1153
  ) -> float:
1131
1154
  import cloudpickle as cp
1132
1155
  import numpy as np
@@ -1176,14 +1199,14 @@ class Nystroem(BaseTransformer):
1176
1199
  api_calls=[Session.call],
1177
1200
  custom_tags=dict([("autogen", True)]),
1178
1201
  )
1179
- score = session.call(
1180
- score_sproc_name,
1202
+ score = score_wrapper_sproc(
1203
+ session,
1181
1204
  query,
1182
1205
  stage_score_file_name,
1183
1206
  identifier.get_unescaped_names(self.input_cols),
1184
1207
  identifier.get_unescaped_names(self.label_cols),
1185
1208
  identifier.get_unescaped_names(self.sample_weight_col),
1186
- statement_params=statement_params,
1209
+ statement_params,
1187
1210
  )
1188
1211
 
1189
1212
  cleanup_temp_files([local_score_file_name])
@@ -1201,18 +1224,20 @@ class Nystroem(BaseTransformer):
1201
1224
  if self._sklearn_object._estimator_type == 'classifier':
1202
1225
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1203
1226
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1204
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1227
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1228
+ ([] if self._drop_input_cols else inputs) + outputs)
1205
1229
  # For regressor, the type of predict is float64
1206
1230
  elif self._sklearn_object._estimator_type == 'regressor':
1207
1231
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1208
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1209
-
1232
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1233
+ ([] if self._drop_input_cols else inputs) + outputs)
1210
1234
  for prob_func in PROB_FUNCTIONS:
1211
1235
  if hasattr(self, prob_func):
1212
1236
  output_cols_prefix: str = f"{prob_func}_"
1213
1237
  output_column_names = self._get_output_column_names(output_cols_prefix)
1214
1238
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1215
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1239
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1240
+ ([] if self._drop_input_cols else inputs) + outputs)
1216
1241
 
1217
1242
  @property
1218
1243
  def model_signatures(self) -> Dict[str, ModelSignature]:
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -208,7 +210,6 @@ class PolynomialCountSketch(BaseTransformer):
208
210
  sample_weight_col: Optional[str] = None,
209
211
  ) -> None:
210
212
  super().__init__()
211
- self.id = str(uuid4()).replace("-", "_").upper()
212
213
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
213
214
 
214
215
  self._deps = list(deps)
@@ -232,6 +233,15 @@ class PolynomialCountSketch(BaseTransformer):
232
233
  self.set_drop_input_cols(drop_input_cols)
233
234
  self.set_sample_weight_col(sample_weight_col)
234
235
 
236
+ def _get_rand_id(self) -> str:
237
+ """
238
+ Generate random id to be used in sproc and stage names.
239
+
240
+ Returns:
241
+ Random id string usable in sproc, table, and stage names.
242
+ """
243
+ return str(uuid4()).replace("-", "_").upper()
244
+
235
245
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
236
246
  """
237
247
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -310,7 +320,7 @@ class PolynomialCountSketch(BaseTransformer):
310
320
  cp.dump(self._sklearn_object, local_transform_file)
311
321
 
312
322
  # Create temp stage to run fit.
313
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
323
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
314
324
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
315
325
  SqlResultValidator(
316
326
  session=session,
@@ -323,11 +333,12 @@ class PolynomialCountSketch(BaseTransformer):
323
333
  expected_value=f"Stage area {transform_stage_name} successfully created."
324
334
  ).validate()
325
335
 
326
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
336
+ # Use posixpath to construct stage paths
337
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
338
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
327
339
  local_result_file_name = get_temp_file_path()
328
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
329
340
 
330
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
341
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
331
342
  statement_params = telemetry.get_function_usage_statement_params(
332
343
  project=_PROJECT,
333
344
  subproject=_SUBPROJECT,
@@ -353,6 +364,7 @@ class PolynomialCountSketch(BaseTransformer):
353
364
  replace=True,
354
365
  session=session,
355
366
  statement_params=statement_params,
367
+ anonymous=True
356
368
  )
357
369
  def fit_wrapper_sproc(
358
370
  session: Session,
@@ -361,7 +373,8 @@ class PolynomialCountSketch(BaseTransformer):
361
373
  stage_result_file_name: str,
362
374
  input_cols: List[str],
363
375
  label_cols: List[str],
364
- sample_weight_col: Optional[str]
376
+ sample_weight_col: Optional[str],
377
+ statement_params: Dict[str, str]
365
378
  ) -> str:
366
379
  import cloudpickle as cp
367
380
  import numpy as np
@@ -428,15 +441,15 @@ class PolynomialCountSketch(BaseTransformer):
428
441
  api_calls=[Session.call],
429
442
  custom_tags=dict([("autogen", True)]),
430
443
  )
431
- sproc_export_file_name = session.call(
432
- fit_sproc_name,
444
+ sproc_export_file_name = fit_wrapper_sproc(
445
+ session,
433
446
  query,
434
447
  stage_transform_file_name,
435
448
  stage_result_file_name,
436
449
  identifier.get_unescaped_names(self.input_cols),
437
450
  identifier.get_unescaped_names(self.label_cols),
438
451
  identifier.get_unescaped_names(self.sample_weight_col),
439
- statement_params=statement_params,
452
+ statement_params,
440
453
  )
441
454
 
442
455
  if "|" in sproc_export_file_name:
@@ -446,7 +459,7 @@ class PolynomialCountSketch(BaseTransformer):
446
459
  print("\n".join(fields[1:]))
447
460
 
448
461
  session.file.get(
449
- os.path.join(stage_result_file_name, sproc_export_file_name),
462
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
450
463
  local_result_file_name,
451
464
  statement_params=statement_params
452
465
  )
@@ -492,7 +505,7 @@ class PolynomialCountSketch(BaseTransformer):
492
505
 
493
506
  # Register vectorized UDF for batch inference
494
507
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
495
- safe_id=self.id, method=inference_method)
508
+ safe_id=self._get_rand_id(), method=inference_method)
496
509
 
497
510
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
498
511
  # will try to pickle all of self which fails.
@@ -584,7 +597,7 @@ class PolynomialCountSketch(BaseTransformer):
584
597
  return transformed_pandas_df.to_dict("records")
585
598
 
586
599
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
587
- safe_id=self.id
600
+ safe_id=self._get_rand_id()
588
601
  )
589
602
 
590
603
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -749,11 +762,18 @@ class PolynomialCountSketch(BaseTransformer):
749
762
  Transformed dataset.
750
763
  """
751
764
  if isinstance(dataset, DataFrame):
765
+ expected_type_inferred = ""
766
+ # when it is classifier, infer the datatype from label columns
767
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
768
+ expected_type_inferred = convert_sp_to_sf_type(
769
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
770
+ )
771
+
752
772
  output_df = self._batch_inference(
753
773
  dataset=dataset,
754
774
  inference_method="predict",
755
775
  expected_output_cols_list=self.output_cols,
756
- expected_output_cols_type="",
776
+ expected_output_cols_type=expected_type_inferred,
757
777
  )
758
778
  elif isinstance(dataset, pd.DataFrame):
759
779
  output_df = self._sklearn_inference(
@@ -826,10 +846,10 @@ class PolynomialCountSketch(BaseTransformer):
826
846
 
827
847
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
828
848
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
829
- Returns an empty list if current object is not a classifier or not yet fitted.
849
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
830
850
  """
831
851
  if getattr(self._sklearn_object, "classes_", None) is None:
832
- return []
852
+ return [output_cols_prefix]
833
853
 
834
854
  classes = self._sklearn_object.classes_
835
855
  if isinstance(classes, numpy.ndarray):
@@ -1054,7 +1074,7 @@ class PolynomialCountSketch(BaseTransformer):
1054
1074
  cp.dump(self._sklearn_object, local_score_file)
1055
1075
 
1056
1076
  # Create temp stage to run score.
1057
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1077
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1058
1078
  session = dataset._session
1059
1079
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1060
1080
  SqlResultValidator(
@@ -1068,8 +1088,9 @@ class PolynomialCountSketch(BaseTransformer):
1068
1088
  expected_value=f"Stage area {score_stage_name} successfully created."
1069
1089
  ).validate()
1070
1090
 
1071
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1072
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1091
+ # Use posixpath to construct stage paths
1092
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1093
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1073
1094
  statement_params = telemetry.get_function_usage_statement_params(
1074
1095
  project=_PROJECT,
1075
1096
  subproject=_SUBPROJECT,
@@ -1095,6 +1116,7 @@ class PolynomialCountSketch(BaseTransformer):
1095
1116
  replace=True,
1096
1117
  session=session,
1097
1118
  statement_params=statement_params,
1119
+ anonymous=True
1098
1120
  )
1099
1121
  def score_wrapper_sproc(
1100
1122
  session: Session,
@@ -1102,7 +1124,8 @@ class PolynomialCountSketch(BaseTransformer):
1102
1124
  stage_score_file_name: str,
1103
1125
  input_cols: List[str],
1104
1126
  label_cols: List[str],
1105
- sample_weight_col: Optional[str]
1127
+ sample_weight_col: Optional[str],
1128
+ statement_params: Dict[str, str]
1106
1129
  ) -> float:
1107
1130
  import cloudpickle as cp
1108
1131
  import numpy as np
@@ -1152,14 +1175,14 @@ class PolynomialCountSketch(BaseTransformer):
1152
1175
  api_calls=[Session.call],
1153
1176
  custom_tags=dict([("autogen", True)]),
1154
1177
  )
1155
- score = session.call(
1156
- score_sproc_name,
1178
+ score = score_wrapper_sproc(
1179
+ session,
1157
1180
  query,
1158
1181
  stage_score_file_name,
1159
1182
  identifier.get_unescaped_names(self.input_cols),
1160
1183
  identifier.get_unescaped_names(self.label_cols),
1161
1184
  identifier.get_unescaped_names(self.sample_weight_col),
1162
- statement_params=statement_params,
1185
+ statement_params,
1163
1186
  )
1164
1187
 
1165
1188
  cleanup_temp_files([local_score_file_name])
@@ -1177,18 +1200,20 @@ class PolynomialCountSketch(BaseTransformer):
1177
1200
  if self._sklearn_object._estimator_type == 'classifier':
1178
1201
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1179
1202
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1180
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1203
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1204
+ ([] if self._drop_input_cols else inputs) + outputs)
1181
1205
  # For regressor, the type of predict is float64
1182
1206
  elif self._sklearn_object._estimator_type == 'regressor':
1183
1207
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1184
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1185
-
1208
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1209
+ ([] if self._drop_input_cols else inputs) + outputs)
1186
1210
  for prob_func in PROB_FUNCTIONS:
1187
1211
  if hasattr(self, prob_func):
1188
1212
  output_cols_prefix: str = f"{prob_func}_"
1189
1213
  output_column_names = self._get_output_column_names(output_cols_prefix)
1190
1214
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1191
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1215
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1216
+ ([] if self._drop_input_cols else inputs) + outputs)
1192
1217
 
1193
1218
  @property
1194
1219
  def model_signatures(self) -> Dict[str, ModelSignature]: