snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. snowflake/ml/_internal/env_utils.py +2 -1
  2. snowflake/ml/_internal/file_utils.py +29 -7
  3. snowflake/ml/_internal/telemetry.py +5 -8
  4. snowflake/ml/_internal/utils/uri.py +7 -2
  5. snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
  6. snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
  7. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
  8. snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
  9. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
  10. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
  11. snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
  12. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
  13. snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
  14. snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
  15. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
  16. snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
  17. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
  18. snowflake/ml/model/_deployer.py +14 -27
  19. snowflake/ml/model/_env.py +4 -4
  20. snowflake/ml/model/_handlers/custom.py +14 -2
  21. snowflake/ml/model/_handlers/pytorch.py +186 -0
  22. snowflake/ml/model/_handlers/sklearn.py +14 -9
  23. snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
  24. snowflake/ml/model/_handlers/torchscript.py +180 -0
  25. snowflake/ml/model/_handlers/xgboost.py +19 -9
  26. snowflake/ml/model/_model.py +3 -2
  27. snowflake/ml/model/_model_meta.py +12 -7
  28. snowflake/ml/model/model_signature.py +446 -66
  29. snowflake/ml/model/type_hints.py +23 -4
  30. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
  31. snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
  32. snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
  33. snowflake/ml/modeling/cluster/birch.py +51 -26
  34. snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
  35. snowflake/ml/modeling/cluster/dbscan.py +51 -26
  36. snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
  37. snowflake/ml/modeling/cluster/k_means.py +51 -26
  38. snowflake/ml/modeling/cluster/mean_shift.py +51 -26
  39. snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
  40. snowflake/ml/modeling/cluster/optics.py +51 -26
  41. snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
  42. snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
  43. snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
  44. snowflake/ml/modeling/compose/column_transformer.py +51 -26
  45. snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
  46. snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
  47. snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
  48. snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
  49. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
  50. snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
  51. snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
  52. snowflake/ml/modeling/covariance/oas.py +51 -26
  53. snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
  54. snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
  55. snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
  56. snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
  57. snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
  58. snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
  59. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
  60. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
  61. snowflake/ml/modeling/decomposition/pca.py +51 -26
  62. snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
  63. snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
  64. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
  65. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
  66. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
  67. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
  68. snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
  69. snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
  70. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
  71. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
  72. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
  73. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
  74. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
  75. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
  76. snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
  77. snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
  78. snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
  79. snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
  80. snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
  81. snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
  82. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
  83. snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
  84. snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
  85. snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
  86. snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
  87. snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
  88. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
  89. snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
  90. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
  91. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
  92. snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
  93. snowflake/ml/modeling/impute/knn_imputer.py +51 -26
  94. snowflake/ml/modeling/impute/missing_indicator.py +51 -26
  95. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
  96. snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
  97. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
  98. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
  99. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
  100. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
  101. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
  102. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
  103. snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
  104. snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
  105. snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
  106. snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
  107. snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
  108. snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
  109. snowflake/ml/modeling/linear_model/lars.py +51 -26
  110. snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
  111. snowflake/ml/modeling/linear_model/lasso.py +51 -26
  112. snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
  113. snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
  114. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
  115. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
  116. snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
  117. snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
  118. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
  119. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
  120. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
  121. snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
  122. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
  123. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
  124. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
  125. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
  126. snowflake/ml/modeling/linear_model/perceptron.py +51 -26
  127. snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
  128. snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
  129. snowflake/ml/modeling/linear_model/ridge.py +51 -26
  130. snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
  131. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
  132. snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
  133. snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
  134. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
  135. snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
  136. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
  137. snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
  138. snowflake/ml/modeling/manifold/isomap.py +51 -26
  139. snowflake/ml/modeling/manifold/mds.py +51 -26
  140. snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
  141. snowflake/ml/modeling/manifold/tsne.py +51 -26
  142. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
  143. snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
  144. snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
  145. snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
  146. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
  147. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
  148. snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
  149. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
  150. snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
  151. snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
  152. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
  153. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
  154. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
  155. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
  156. snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
  157. snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
  158. snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
  159. snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
  160. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
  161. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
  162. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
  163. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
  164. snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
  165. snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
  166. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
  167. snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
  168. snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
  169. snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
  170. snowflake/ml/modeling/svm/linear_svc.py +51 -26
  171. snowflake/ml/modeling/svm/linear_svr.py +51 -26
  172. snowflake/ml/modeling/svm/nu_svc.py +51 -26
  173. snowflake/ml/modeling/svm/nu_svr.py +51 -26
  174. snowflake/ml/modeling/svm/svc.py +51 -26
  175. snowflake/ml/modeling/svm/svr.py +51 -26
  176. snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
  177. snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
  178. snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
  179. snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
  180. snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
  181. snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
  182. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
  183. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
  184. snowflake/ml/registry/model_registry.py +74 -56
  185. snowflake/ml/version.py +1 -1
  186. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
  187. snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
  188. snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
  189. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -239,7 +241,6 @@ class LinearSVR(BaseTransformer):
239
241
  sample_weight_col: Optional[str] = None,
240
242
  ) -> None:
241
243
  super().__init__()
242
- self.id = str(uuid4()).replace("-", "_").upper()
243
244
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
244
245
 
245
246
  self._deps = list(deps)
@@ -268,6 +269,15 @@ class LinearSVR(BaseTransformer):
268
269
  self.set_drop_input_cols(drop_input_cols)
269
270
  self.set_sample_weight_col(sample_weight_col)
270
271
 
272
+ def _get_rand_id(self) -> str:
273
+ """
274
+ Generate random id to be used in sproc and stage names.
275
+
276
+ Returns:
277
+ Random id string usable in sproc, table, and stage names.
278
+ """
279
+ return str(uuid4()).replace("-", "_").upper()
280
+
271
281
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
272
282
  """
273
283
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -346,7 +356,7 @@ class LinearSVR(BaseTransformer):
346
356
  cp.dump(self._sklearn_object, local_transform_file)
347
357
 
348
358
  # Create temp stage to run fit.
349
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
359
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
350
360
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
351
361
  SqlResultValidator(
352
362
  session=session,
@@ -359,11 +369,12 @@ class LinearSVR(BaseTransformer):
359
369
  expected_value=f"Stage area {transform_stage_name} successfully created."
360
370
  ).validate()
361
371
 
362
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
372
+ # Use posixpath to construct stage paths
373
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
374
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
363
375
  local_result_file_name = get_temp_file_path()
364
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
365
376
 
366
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
377
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
367
378
  statement_params = telemetry.get_function_usage_statement_params(
368
379
  project=_PROJECT,
369
380
  subproject=_SUBPROJECT,
@@ -389,6 +400,7 @@ class LinearSVR(BaseTransformer):
389
400
  replace=True,
390
401
  session=session,
391
402
  statement_params=statement_params,
403
+ anonymous=True
392
404
  )
393
405
  def fit_wrapper_sproc(
394
406
  session: Session,
@@ -397,7 +409,8 @@ class LinearSVR(BaseTransformer):
397
409
  stage_result_file_name: str,
398
410
  input_cols: List[str],
399
411
  label_cols: List[str],
400
- sample_weight_col: Optional[str]
412
+ sample_weight_col: Optional[str],
413
+ statement_params: Dict[str, str]
401
414
  ) -> str:
402
415
  import cloudpickle as cp
403
416
  import numpy as np
@@ -464,15 +477,15 @@ class LinearSVR(BaseTransformer):
464
477
  api_calls=[Session.call],
465
478
  custom_tags=dict([("autogen", True)]),
466
479
  )
467
- sproc_export_file_name = session.call(
468
- fit_sproc_name,
480
+ sproc_export_file_name = fit_wrapper_sproc(
481
+ session,
469
482
  query,
470
483
  stage_transform_file_name,
471
484
  stage_result_file_name,
472
485
  identifier.get_unescaped_names(self.input_cols),
473
486
  identifier.get_unescaped_names(self.label_cols),
474
487
  identifier.get_unescaped_names(self.sample_weight_col),
475
- statement_params=statement_params,
488
+ statement_params,
476
489
  )
477
490
 
478
491
  if "|" in sproc_export_file_name:
@@ -482,7 +495,7 @@ class LinearSVR(BaseTransformer):
482
495
  print("\n".join(fields[1:]))
483
496
 
484
497
  session.file.get(
485
- os.path.join(stage_result_file_name, sproc_export_file_name),
498
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
486
499
  local_result_file_name,
487
500
  statement_params=statement_params
488
501
  )
@@ -528,7 +541,7 @@ class LinearSVR(BaseTransformer):
528
541
 
529
542
  # Register vectorized UDF for batch inference
530
543
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
531
- safe_id=self.id, method=inference_method)
544
+ safe_id=self._get_rand_id(), method=inference_method)
532
545
 
533
546
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
534
547
  # will try to pickle all of self which fails.
@@ -620,7 +633,7 @@ class LinearSVR(BaseTransformer):
620
633
  return transformed_pandas_df.to_dict("records")
621
634
 
622
635
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
623
- safe_id=self.id
636
+ safe_id=self._get_rand_id()
624
637
  )
625
638
 
626
639
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -787,11 +800,18 @@ class LinearSVR(BaseTransformer):
787
800
  Transformed dataset.
788
801
  """
789
802
  if isinstance(dataset, DataFrame):
803
+ expected_type_inferred = "float"
804
+ # when it is classifier, infer the datatype from label columns
805
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
806
+ expected_type_inferred = convert_sp_to_sf_type(
807
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
808
+ )
809
+
790
810
  output_df = self._batch_inference(
791
811
  dataset=dataset,
792
812
  inference_method="predict",
793
813
  expected_output_cols_list=self.output_cols,
794
- expected_output_cols_type="float",
814
+ expected_output_cols_type=expected_type_inferred,
795
815
  )
796
816
  elif isinstance(dataset, pd.DataFrame):
797
817
  output_df = self._sklearn_inference(
@@ -862,10 +882,10 @@ class LinearSVR(BaseTransformer):
862
882
 
863
883
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
864
884
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
865
- Returns an empty list if current object is not a classifier or not yet fitted.
885
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
866
886
  """
867
887
  if getattr(self._sklearn_object, "classes_", None) is None:
868
- return []
888
+ return [output_cols_prefix]
869
889
 
870
890
  classes = self._sklearn_object.classes_
871
891
  if isinstance(classes, numpy.ndarray):
@@ -1090,7 +1110,7 @@ class LinearSVR(BaseTransformer):
1090
1110
  cp.dump(self._sklearn_object, local_score_file)
1091
1111
 
1092
1112
  # Create temp stage to run score.
1093
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1113
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1094
1114
  session = dataset._session
1095
1115
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1096
1116
  SqlResultValidator(
@@ -1104,8 +1124,9 @@ class LinearSVR(BaseTransformer):
1104
1124
  expected_value=f"Stage area {score_stage_name} successfully created."
1105
1125
  ).validate()
1106
1126
 
1107
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1108
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1127
+ # Use posixpath to construct stage paths
1128
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1129
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1109
1130
  statement_params = telemetry.get_function_usage_statement_params(
1110
1131
  project=_PROJECT,
1111
1132
  subproject=_SUBPROJECT,
@@ -1131,6 +1152,7 @@ class LinearSVR(BaseTransformer):
1131
1152
  replace=True,
1132
1153
  session=session,
1133
1154
  statement_params=statement_params,
1155
+ anonymous=True
1134
1156
  )
1135
1157
  def score_wrapper_sproc(
1136
1158
  session: Session,
@@ -1138,7 +1160,8 @@ class LinearSVR(BaseTransformer):
1138
1160
  stage_score_file_name: str,
1139
1161
  input_cols: List[str],
1140
1162
  label_cols: List[str],
1141
- sample_weight_col: Optional[str]
1163
+ sample_weight_col: Optional[str],
1164
+ statement_params: Dict[str, str]
1142
1165
  ) -> float:
1143
1166
  import cloudpickle as cp
1144
1167
  import numpy as np
@@ -1188,14 +1211,14 @@ class LinearSVR(BaseTransformer):
1188
1211
  api_calls=[Session.call],
1189
1212
  custom_tags=dict([("autogen", True)]),
1190
1213
  )
1191
- score = session.call(
1192
- score_sproc_name,
1214
+ score = score_wrapper_sproc(
1215
+ session,
1193
1216
  query,
1194
1217
  stage_score_file_name,
1195
1218
  identifier.get_unescaped_names(self.input_cols),
1196
1219
  identifier.get_unescaped_names(self.label_cols),
1197
1220
  identifier.get_unescaped_names(self.sample_weight_col),
1198
- statement_params=statement_params,
1221
+ statement_params,
1199
1222
  )
1200
1223
 
1201
1224
  cleanup_temp_files([local_score_file_name])
@@ -1213,18 +1236,20 @@ class LinearSVR(BaseTransformer):
1213
1236
  if self._sklearn_object._estimator_type == 'classifier':
1214
1237
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1215
1238
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1216
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1239
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1240
+ ([] if self._drop_input_cols else inputs) + outputs)
1217
1241
  # For regressor, the type of predict is float64
1218
1242
  elif self._sklearn_object._estimator_type == 'regressor':
1219
1243
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1220
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1221
-
1244
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1245
+ ([] if self._drop_input_cols else inputs) + outputs)
1222
1246
  for prob_func in PROB_FUNCTIONS:
1223
1247
  if hasattr(self, prob_func):
1224
1248
  output_cols_prefix: str = f"{prob_func}_"
1225
1249
  output_column_names = self._get_output_column_names(output_cols_prefix)
1226
1250
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1227
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1251
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1252
+ ([] if self._drop_input_cols else inputs) + outputs)
1228
1253
 
1229
1254
  @property
1230
1255
  def model_signatures(self) -> Dict[str, ModelSignature]:
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -272,7 +274,6 @@ class NuSVC(BaseTransformer):
272
274
  sample_weight_col: Optional[str] = None,
273
275
  ) -> None:
274
276
  super().__init__()
275
- self.id = str(uuid4()).replace("-", "_").upper()
276
277
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
277
278
 
278
279
  self._deps = list(deps)
@@ -306,6 +307,15 @@ class NuSVC(BaseTransformer):
306
307
  self.set_drop_input_cols(drop_input_cols)
307
308
  self.set_sample_weight_col(sample_weight_col)
308
309
 
310
+ def _get_rand_id(self) -> str:
311
+ """
312
+ Generate random id to be used in sproc and stage names.
313
+
314
+ Returns:
315
+ Random id string usable in sproc, table, and stage names.
316
+ """
317
+ return str(uuid4()).replace("-", "_").upper()
318
+
309
319
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
310
320
  """
311
321
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -384,7 +394,7 @@ class NuSVC(BaseTransformer):
384
394
  cp.dump(self._sklearn_object, local_transform_file)
385
395
 
386
396
  # Create temp stage to run fit.
387
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
397
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
388
398
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
389
399
  SqlResultValidator(
390
400
  session=session,
@@ -397,11 +407,12 @@ class NuSVC(BaseTransformer):
397
407
  expected_value=f"Stage area {transform_stage_name} successfully created."
398
408
  ).validate()
399
409
 
400
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
410
+ # Use posixpath to construct stage paths
411
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
412
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
401
413
  local_result_file_name = get_temp_file_path()
402
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
403
414
 
404
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
415
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
405
416
  statement_params = telemetry.get_function_usage_statement_params(
406
417
  project=_PROJECT,
407
418
  subproject=_SUBPROJECT,
@@ -427,6 +438,7 @@ class NuSVC(BaseTransformer):
427
438
  replace=True,
428
439
  session=session,
429
440
  statement_params=statement_params,
441
+ anonymous=True
430
442
  )
431
443
  def fit_wrapper_sproc(
432
444
  session: Session,
@@ -435,7 +447,8 @@ class NuSVC(BaseTransformer):
435
447
  stage_result_file_name: str,
436
448
  input_cols: List[str],
437
449
  label_cols: List[str],
438
- sample_weight_col: Optional[str]
450
+ sample_weight_col: Optional[str],
451
+ statement_params: Dict[str, str]
439
452
  ) -> str:
440
453
  import cloudpickle as cp
441
454
  import numpy as np
@@ -502,15 +515,15 @@ class NuSVC(BaseTransformer):
502
515
  api_calls=[Session.call],
503
516
  custom_tags=dict([("autogen", True)]),
504
517
  )
505
- sproc_export_file_name = session.call(
506
- fit_sproc_name,
518
+ sproc_export_file_name = fit_wrapper_sproc(
519
+ session,
507
520
  query,
508
521
  stage_transform_file_name,
509
522
  stage_result_file_name,
510
523
  identifier.get_unescaped_names(self.input_cols),
511
524
  identifier.get_unescaped_names(self.label_cols),
512
525
  identifier.get_unescaped_names(self.sample_weight_col),
513
- statement_params=statement_params,
526
+ statement_params,
514
527
  )
515
528
 
516
529
  if "|" in sproc_export_file_name:
@@ -520,7 +533,7 @@ class NuSVC(BaseTransformer):
520
533
  print("\n".join(fields[1:]))
521
534
 
522
535
  session.file.get(
523
- os.path.join(stage_result_file_name, sproc_export_file_name),
536
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
524
537
  local_result_file_name,
525
538
  statement_params=statement_params
526
539
  )
@@ -566,7 +579,7 @@ class NuSVC(BaseTransformer):
566
579
 
567
580
  # Register vectorized UDF for batch inference
568
581
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
569
- safe_id=self.id, method=inference_method)
582
+ safe_id=self._get_rand_id(), method=inference_method)
570
583
 
571
584
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
572
585
  # will try to pickle all of self which fails.
@@ -658,7 +671,7 @@ class NuSVC(BaseTransformer):
658
671
  return transformed_pandas_df.to_dict("records")
659
672
 
660
673
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
661
- safe_id=self.id
674
+ safe_id=self._get_rand_id()
662
675
  )
663
676
 
664
677
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -825,11 +838,18 @@ class NuSVC(BaseTransformer):
825
838
  Transformed dataset.
826
839
  """
827
840
  if isinstance(dataset, DataFrame):
841
+ expected_type_inferred = ""
842
+ # when it is classifier, infer the datatype from label columns
843
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
844
+ expected_type_inferred = convert_sp_to_sf_type(
845
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
846
+ )
847
+
828
848
  output_df = self._batch_inference(
829
849
  dataset=dataset,
830
850
  inference_method="predict",
831
851
  expected_output_cols_list=self.output_cols,
832
- expected_output_cols_type="",
852
+ expected_output_cols_type=expected_type_inferred,
833
853
  )
834
854
  elif isinstance(dataset, pd.DataFrame):
835
855
  output_df = self._sklearn_inference(
@@ -900,10 +920,10 @@ class NuSVC(BaseTransformer):
900
920
 
901
921
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
902
922
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
903
- Returns an empty list if current object is not a classifier or not yet fitted.
923
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
904
924
  """
905
925
  if getattr(self._sklearn_object, "classes_", None) is None:
906
- return []
926
+ return [output_cols_prefix]
907
927
 
908
928
  classes = self._sklearn_object.classes_
909
929
  if isinstance(classes, numpy.ndarray):
@@ -1134,7 +1154,7 @@ class NuSVC(BaseTransformer):
1134
1154
  cp.dump(self._sklearn_object, local_score_file)
1135
1155
 
1136
1156
  # Create temp stage to run score.
1137
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1157
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1138
1158
  session = dataset._session
1139
1159
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1140
1160
  SqlResultValidator(
@@ -1148,8 +1168,9 @@ class NuSVC(BaseTransformer):
1148
1168
  expected_value=f"Stage area {score_stage_name} successfully created."
1149
1169
  ).validate()
1150
1170
 
1151
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1152
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1171
+ # Use posixpath to construct stage paths
1172
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1173
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1153
1174
  statement_params = telemetry.get_function_usage_statement_params(
1154
1175
  project=_PROJECT,
1155
1176
  subproject=_SUBPROJECT,
@@ -1175,6 +1196,7 @@ class NuSVC(BaseTransformer):
1175
1196
  replace=True,
1176
1197
  session=session,
1177
1198
  statement_params=statement_params,
1199
+ anonymous=True
1178
1200
  )
1179
1201
  def score_wrapper_sproc(
1180
1202
  session: Session,
@@ -1182,7 +1204,8 @@ class NuSVC(BaseTransformer):
1182
1204
  stage_score_file_name: str,
1183
1205
  input_cols: List[str],
1184
1206
  label_cols: List[str],
1185
- sample_weight_col: Optional[str]
1207
+ sample_weight_col: Optional[str],
1208
+ statement_params: Dict[str, str]
1186
1209
  ) -> float:
1187
1210
  import cloudpickle as cp
1188
1211
  import numpy as np
@@ -1232,14 +1255,14 @@ class NuSVC(BaseTransformer):
1232
1255
  api_calls=[Session.call],
1233
1256
  custom_tags=dict([("autogen", True)]),
1234
1257
  )
1235
- score = session.call(
1236
- score_sproc_name,
1258
+ score = score_wrapper_sproc(
1259
+ session,
1237
1260
  query,
1238
1261
  stage_score_file_name,
1239
1262
  identifier.get_unescaped_names(self.input_cols),
1240
1263
  identifier.get_unescaped_names(self.label_cols),
1241
1264
  identifier.get_unescaped_names(self.sample_weight_col),
1242
- statement_params=statement_params,
1265
+ statement_params,
1243
1266
  )
1244
1267
 
1245
1268
  cleanup_temp_files([local_score_file_name])
@@ -1257,18 +1280,20 @@ class NuSVC(BaseTransformer):
1257
1280
  if self._sklearn_object._estimator_type == 'classifier':
1258
1281
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1259
1282
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1260
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1283
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1284
+ ([] if self._drop_input_cols else inputs) + outputs)
1261
1285
  # For regressor, the type of predict is float64
1262
1286
  elif self._sklearn_object._estimator_type == 'regressor':
1263
1287
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1264
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1265
-
1288
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1289
+ ([] if self._drop_input_cols else inputs) + outputs)
1266
1290
  for prob_func in PROB_FUNCTIONS:
1267
1291
  if hasattr(self, prob_func):
1268
1292
  output_cols_prefix: str = f"{prob_func}_"
1269
1293
  output_column_names = self._get_output_column_names(output_cols_prefix)
1270
1294
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1271
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1295
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1296
+ ([] if self._drop_input_cols else inputs) + outputs)
1272
1297
 
1273
1298
  @property
1274
1299
  def model_signatures(self) -> Dict[str, ModelSignature]: