snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. snowflake/ml/_internal/env_utils.py +2 -1
  2. snowflake/ml/_internal/file_utils.py +29 -7
  3. snowflake/ml/_internal/telemetry.py +5 -8
  4. snowflake/ml/_internal/utils/uri.py +7 -2
  5. snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
  6. snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
  7. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
  8. snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
  9. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
  10. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
  11. snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
  12. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
  13. snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
  14. snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
  15. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
  16. snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
  17. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
  18. snowflake/ml/model/_deployer.py +14 -27
  19. snowflake/ml/model/_env.py +4 -4
  20. snowflake/ml/model/_handlers/custom.py +14 -2
  21. snowflake/ml/model/_handlers/pytorch.py +186 -0
  22. snowflake/ml/model/_handlers/sklearn.py +14 -9
  23. snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
  24. snowflake/ml/model/_handlers/torchscript.py +180 -0
  25. snowflake/ml/model/_handlers/xgboost.py +19 -9
  26. snowflake/ml/model/_model.py +3 -2
  27. snowflake/ml/model/_model_meta.py +12 -7
  28. snowflake/ml/model/model_signature.py +446 -66
  29. snowflake/ml/model/type_hints.py +23 -4
  30. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
  31. snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
  32. snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
  33. snowflake/ml/modeling/cluster/birch.py +51 -26
  34. snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
  35. snowflake/ml/modeling/cluster/dbscan.py +51 -26
  36. snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
  37. snowflake/ml/modeling/cluster/k_means.py +51 -26
  38. snowflake/ml/modeling/cluster/mean_shift.py +51 -26
  39. snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
  40. snowflake/ml/modeling/cluster/optics.py +51 -26
  41. snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
  42. snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
  43. snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
  44. snowflake/ml/modeling/compose/column_transformer.py +51 -26
  45. snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
  46. snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
  47. snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
  48. snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
  49. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
  50. snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
  51. snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
  52. snowflake/ml/modeling/covariance/oas.py +51 -26
  53. snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
  54. snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
  55. snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
  56. snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
  57. snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
  58. snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
  59. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
  60. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
  61. snowflake/ml/modeling/decomposition/pca.py +51 -26
  62. snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
  63. snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
  64. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
  65. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
  66. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
  67. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
  68. snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
  69. snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
  70. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
  71. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
  72. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
  73. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
  74. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
  75. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
  76. snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
  77. snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
  78. snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
  79. snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
  80. snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
  81. snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
  82. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
  83. snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
  84. snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
  85. snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
  86. snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
  87. snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
  88. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
  89. snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
  90. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
  91. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
  92. snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
  93. snowflake/ml/modeling/impute/knn_imputer.py +51 -26
  94. snowflake/ml/modeling/impute/missing_indicator.py +51 -26
  95. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
  96. snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
  97. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
  98. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
  99. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
  100. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
  101. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
  102. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
  103. snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
  104. snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
  105. snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
  106. snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
  107. snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
  108. snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
  109. snowflake/ml/modeling/linear_model/lars.py +51 -26
  110. snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
  111. snowflake/ml/modeling/linear_model/lasso.py +51 -26
  112. snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
  113. snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
  114. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
  115. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
  116. snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
  117. snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
  118. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
  119. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
  120. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
  121. snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
  122. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
  123. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
  124. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
  125. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
  126. snowflake/ml/modeling/linear_model/perceptron.py +51 -26
  127. snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
  128. snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
  129. snowflake/ml/modeling/linear_model/ridge.py +51 -26
  130. snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
  131. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
  132. snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
  133. snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
  134. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
  135. snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
  136. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
  137. snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
  138. snowflake/ml/modeling/manifold/isomap.py +51 -26
  139. snowflake/ml/modeling/manifold/mds.py +51 -26
  140. snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
  141. snowflake/ml/modeling/manifold/tsne.py +51 -26
  142. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
  143. snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
  144. snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
  145. snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
  146. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
  147. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
  148. snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
  149. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
  150. snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
  151. snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
  152. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
  153. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
  154. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
  155. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
  156. snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
  157. snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
  158. snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
  159. snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
  160. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
  161. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
  162. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
  163. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
  164. snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
  165. snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
  166. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
  167. snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
  168. snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
  169. snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
  170. snowflake/ml/modeling/svm/linear_svc.py +51 -26
  171. snowflake/ml/modeling/svm/linear_svr.py +51 -26
  172. snowflake/ml/modeling/svm/nu_svc.py +51 -26
  173. snowflake/ml/modeling/svm/nu_svr.py +51 -26
  174. snowflake/ml/modeling/svm/svc.py +51 -26
  175. snowflake/ml/modeling/svm/svr.py +51 -26
  176. snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
  177. snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
  178. snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
  179. snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
  180. snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
  181. snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
  182. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
  183. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
  184. snowflake/ml/registry/model_registry.py +74 -56
  185. snowflake/ml/version.py +1 -1
  186. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
  187. snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
  188. snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
  189. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -237,7 +239,6 @@ class NuSVR(BaseTransformer):
237
239
  sample_weight_col: Optional[str] = None,
238
240
  ) -> None:
239
241
  super().__init__()
240
- self.id = str(uuid4()).replace("-", "_").upper()
241
242
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
242
243
 
243
244
  self._deps = list(deps)
@@ -267,6 +268,15 @@ class NuSVR(BaseTransformer):
267
268
  self.set_drop_input_cols(drop_input_cols)
268
269
  self.set_sample_weight_col(sample_weight_col)
269
270
 
271
+ def _get_rand_id(self) -> str:
272
+ """
273
+ Generate random id to be used in sproc and stage names.
274
+
275
+ Returns:
276
+ Random id string usable in sproc, table, and stage names.
277
+ """
278
+ return str(uuid4()).replace("-", "_").upper()
279
+
270
280
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
271
281
  """
272
282
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -345,7 +355,7 @@ class NuSVR(BaseTransformer):
345
355
  cp.dump(self._sklearn_object, local_transform_file)
346
356
 
347
357
  # Create temp stage to run fit.
348
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
358
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
349
359
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
350
360
  SqlResultValidator(
351
361
  session=session,
@@ -358,11 +368,12 @@ class NuSVR(BaseTransformer):
358
368
  expected_value=f"Stage area {transform_stage_name} successfully created."
359
369
  ).validate()
360
370
 
361
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
371
+ # Use posixpath to construct stage paths
372
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
373
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
362
374
  local_result_file_name = get_temp_file_path()
363
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
364
375
 
365
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
376
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
366
377
  statement_params = telemetry.get_function_usage_statement_params(
367
378
  project=_PROJECT,
368
379
  subproject=_SUBPROJECT,
@@ -388,6 +399,7 @@ class NuSVR(BaseTransformer):
388
399
  replace=True,
389
400
  session=session,
390
401
  statement_params=statement_params,
402
+ anonymous=True
391
403
  )
392
404
  def fit_wrapper_sproc(
393
405
  session: Session,
@@ -396,7 +408,8 @@ class NuSVR(BaseTransformer):
396
408
  stage_result_file_name: str,
397
409
  input_cols: List[str],
398
410
  label_cols: List[str],
399
- sample_weight_col: Optional[str]
411
+ sample_weight_col: Optional[str],
412
+ statement_params: Dict[str, str]
400
413
  ) -> str:
401
414
  import cloudpickle as cp
402
415
  import numpy as np
@@ -463,15 +476,15 @@ class NuSVR(BaseTransformer):
463
476
  api_calls=[Session.call],
464
477
  custom_tags=dict([("autogen", True)]),
465
478
  )
466
- sproc_export_file_name = session.call(
467
- fit_sproc_name,
479
+ sproc_export_file_name = fit_wrapper_sproc(
480
+ session,
468
481
  query,
469
482
  stage_transform_file_name,
470
483
  stage_result_file_name,
471
484
  identifier.get_unescaped_names(self.input_cols),
472
485
  identifier.get_unescaped_names(self.label_cols),
473
486
  identifier.get_unescaped_names(self.sample_weight_col),
474
- statement_params=statement_params,
487
+ statement_params,
475
488
  )
476
489
 
477
490
  if "|" in sproc_export_file_name:
@@ -481,7 +494,7 @@ class NuSVR(BaseTransformer):
481
494
  print("\n".join(fields[1:]))
482
495
 
483
496
  session.file.get(
484
- os.path.join(stage_result_file_name, sproc_export_file_name),
497
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
485
498
  local_result_file_name,
486
499
  statement_params=statement_params
487
500
  )
@@ -527,7 +540,7 @@ class NuSVR(BaseTransformer):
527
540
 
528
541
  # Register vectorized UDF for batch inference
529
542
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
530
- safe_id=self.id, method=inference_method)
543
+ safe_id=self._get_rand_id(), method=inference_method)
531
544
 
532
545
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
533
546
  # will try to pickle all of self which fails.
@@ -619,7 +632,7 @@ class NuSVR(BaseTransformer):
619
632
  return transformed_pandas_df.to_dict("records")
620
633
 
621
634
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
622
- safe_id=self.id
635
+ safe_id=self._get_rand_id()
623
636
  )
624
637
 
625
638
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -786,11 +799,18 @@ class NuSVR(BaseTransformer):
786
799
  Transformed dataset.
787
800
  """
788
801
  if isinstance(dataset, DataFrame):
802
+ expected_type_inferred = "float"
803
+ # when it is classifier, infer the datatype from label columns
804
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
805
+ expected_type_inferred = convert_sp_to_sf_type(
806
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
807
+ )
808
+
789
809
  output_df = self._batch_inference(
790
810
  dataset=dataset,
791
811
  inference_method="predict",
792
812
  expected_output_cols_list=self.output_cols,
793
- expected_output_cols_type="float",
813
+ expected_output_cols_type=expected_type_inferred,
794
814
  )
795
815
  elif isinstance(dataset, pd.DataFrame):
796
816
  output_df = self._sklearn_inference(
@@ -861,10 +881,10 @@ class NuSVR(BaseTransformer):
861
881
 
862
882
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
863
883
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
864
- Returns an empty list if current object is not a classifier or not yet fitted.
884
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
865
885
  """
866
886
  if getattr(self._sklearn_object, "classes_", None) is None:
867
- return []
887
+ return [output_cols_prefix]
868
888
 
869
889
  classes = self._sklearn_object.classes_
870
890
  if isinstance(classes, numpy.ndarray):
@@ -1089,7 +1109,7 @@ class NuSVR(BaseTransformer):
1089
1109
  cp.dump(self._sklearn_object, local_score_file)
1090
1110
 
1091
1111
  # Create temp stage to run score.
1092
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1112
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1093
1113
  session = dataset._session
1094
1114
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1095
1115
  SqlResultValidator(
@@ -1103,8 +1123,9 @@ class NuSVR(BaseTransformer):
1103
1123
  expected_value=f"Stage area {score_stage_name} successfully created."
1104
1124
  ).validate()
1105
1125
 
1106
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1107
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1126
+ # Use posixpath to construct stage paths
1127
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1128
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1108
1129
  statement_params = telemetry.get_function_usage_statement_params(
1109
1130
  project=_PROJECT,
1110
1131
  subproject=_SUBPROJECT,
@@ -1130,6 +1151,7 @@ class NuSVR(BaseTransformer):
1130
1151
  replace=True,
1131
1152
  session=session,
1132
1153
  statement_params=statement_params,
1154
+ anonymous=True
1133
1155
  )
1134
1156
  def score_wrapper_sproc(
1135
1157
  session: Session,
@@ -1137,7 +1159,8 @@ class NuSVR(BaseTransformer):
1137
1159
  stage_score_file_name: str,
1138
1160
  input_cols: List[str],
1139
1161
  label_cols: List[str],
1140
- sample_weight_col: Optional[str]
1162
+ sample_weight_col: Optional[str],
1163
+ statement_params: Dict[str, str]
1141
1164
  ) -> float:
1142
1165
  import cloudpickle as cp
1143
1166
  import numpy as np
@@ -1187,14 +1210,14 @@ class NuSVR(BaseTransformer):
1187
1210
  api_calls=[Session.call],
1188
1211
  custom_tags=dict([("autogen", True)]),
1189
1212
  )
1190
- score = session.call(
1191
- score_sproc_name,
1213
+ score = score_wrapper_sproc(
1214
+ session,
1192
1215
  query,
1193
1216
  stage_score_file_name,
1194
1217
  identifier.get_unescaped_names(self.input_cols),
1195
1218
  identifier.get_unescaped_names(self.label_cols),
1196
1219
  identifier.get_unescaped_names(self.sample_weight_col),
1197
- statement_params=statement_params,
1220
+ statement_params,
1198
1221
  )
1199
1222
 
1200
1223
  cleanup_temp_files([local_score_file_name])
@@ -1212,18 +1235,20 @@ class NuSVR(BaseTransformer):
1212
1235
  if self._sklearn_object._estimator_type == 'classifier':
1213
1236
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1214
1237
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1215
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1238
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1239
+ ([] if self._drop_input_cols else inputs) + outputs)
1216
1240
  # For regressor, the type of predict is float64
1217
1241
  elif self._sklearn_object._estimator_type == 'regressor':
1218
1242
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1219
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1220
-
1243
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1244
+ ([] if self._drop_input_cols else inputs) + outputs)
1221
1245
  for prob_func in PROB_FUNCTIONS:
1222
1246
  if hasattr(self, prob_func):
1223
1247
  output_cols_prefix: str = f"{prob_func}_"
1224
1248
  output_column_names = self._get_output_column_names(output_cols_prefix)
1225
1249
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1226
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1250
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1251
+ ([] if self._drop_input_cols else inputs) + outputs)
1227
1252
 
1228
1253
  @property
1229
1254
  def model_signatures(self) -> Dict[str, ModelSignature]:
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -275,7 +277,6 @@ class SVC(BaseTransformer):
275
277
  sample_weight_col: Optional[str] = None,
276
278
  ) -> None:
277
279
  super().__init__()
278
- self.id = str(uuid4()).replace("-", "_").upper()
279
280
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
280
281
 
281
282
  self._deps = list(deps)
@@ -309,6 +310,15 @@ class SVC(BaseTransformer):
309
310
  self.set_drop_input_cols(drop_input_cols)
310
311
  self.set_sample_weight_col(sample_weight_col)
311
312
 
313
+ def _get_rand_id(self) -> str:
314
+ """
315
+ Generate random id to be used in sproc and stage names.
316
+
317
+ Returns:
318
+ Random id string usable in sproc, table, and stage names.
319
+ """
320
+ return str(uuid4()).replace("-", "_").upper()
321
+
312
322
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
313
323
  """
314
324
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -387,7 +397,7 @@ class SVC(BaseTransformer):
387
397
  cp.dump(self._sklearn_object, local_transform_file)
388
398
 
389
399
  # Create temp stage to run fit.
390
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
400
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
391
401
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
392
402
  SqlResultValidator(
393
403
  session=session,
@@ -400,11 +410,12 @@ class SVC(BaseTransformer):
400
410
  expected_value=f"Stage area {transform_stage_name} successfully created."
401
411
  ).validate()
402
412
 
403
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
413
+ # Use posixpath to construct stage paths
414
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
415
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
404
416
  local_result_file_name = get_temp_file_path()
405
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
406
417
 
407
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
418
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
408
419
  statement_params = telemetry.get_function_usage_statement_params(
409
420
  project=_PROJECT,
410
421
  subproject=_SUBPROJECT,
@@ -430,6 +441,7 @@ class SVC(BaseTransformer):
430
441
  replace=True,
431
442
  session=session,
432
443
  statement_params=statement_params,
444
+ anonymous=True
433
445
  )
434
446
  def fit_wrapper_sproc(
435
447
  session: Session,
@@ -438,7 +450,8 @@ class SVC(BaseTransformer):
438
450
  stage_result_file_name: str,
439
451
  input_cols: List[str],
440
452
  label_cols: List[str],
441
- sample_weight_col: Optional[str]
453
+ sample_weight_col: Optional[str],
454
+ statement_params: Dict[str, str]
442
455
  ) -> str:
443
456
  import cloudpickle as cp
444
457
  import numpy as np
@@ -505,15 +518,15 @@ class SVC(BaseTransformer):
505
518
  api_calls=[Session.call],
506
519
  custom_tags=dict([("autogen", True)]),
507
520
  )
508
- sproc_export_file_name = session.call(
509
- fit_sproc_name,
521
+ sproc_export_file_name = fit_wrapper_sproc(
522
+ session,
510
523
  query,
511
524
  stage_transform_file_name,
512
525
  stage_result_file_name,
513
526
  identifier.get_unescaped_names(self.input_cols),
514
527
  identifier.get_unescaped_names(self.label_cols),
515
528
  identifier.get_unescaped_names(self.sample_weight_col),
516
- statement_params=statement_params,
529
+ statement_params,
517
530
  )
518
531
 
519
532
  if "|" in sproc_export_file_name:
@@ -523,7 +536,7 @@ class SVC(BaseTransformer):
523
536
  print("\n".join(fields[1:]))
524
537
 
525
538
  session.file.get(
526
- os.path.join(stage_result_file_name, sproc_export_file_name),
539
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
527
540
  local_result_file_name,
528
541
  statement_params=statement_params
529
542
  )
@@ -569,7 +582,7 @@ class SVC(BaseTransformer):
569
582
 
570
583
  # Register vectorized UDF for batch inference
571
584
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
572
- safe_id=self.id, method=inference_method)
585
+ safe_id=self._get_rand_id(), method=inference_method)
573
586
 
574
587
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
575
588
  # will try to pickle all of self which fails.
@@ -661,7 +674,7 @@ class SVC(BaseTransformer):
661
674
  return transformed_pandas_df.to_dict("records")
662
675
 
663
676
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
664
- safe_id=self.id
677
+ safe_id=self._get_rand_id()
665
678
  )
666
679
 
667
680
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -828,11 +841,18 @@ class SVC(BaseTransformer):
828
841
  Transformed dataset.
829
842
  """
830
843
  if isinstance(dataset, DataFrame):
844
+ expected_type_inferred = ""
845
+ # when it is classifier, infer the datatype from label columns
846
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
847
+ expected_type_inferred = convert_sp_to_sf_type(
848
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
849
+ )
850
+
831
851
  output_df = self._batch_inference(
832
852
  dataset=dataset,
833
853
  inference_method="predict",
834
854
  expected_output_cols_list=self.output_cols,
835
- expected_output_cols_type="",
855
+ expected_output_cols_type=expected_type_inferred,
836
856
  )
837
857
  elif isinstance(dataset, pd.DataFrame):
838
858
  output_df = self._sklearn_inference(
@@ -903,10 +923,10 @@ class SVC(BaseTransformer):
903
923
 
904
924
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
905
925
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
906
- Returns an empty list if current object is not a classifier or not yet fitted.
926
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
907
927
  """
908
928
  if getattr(self._sklearn_object, "classes_", None) is None:
909
- return []
929
+ return [output_cols_prefix]
910
930
 
911
931
  classes = self._sklearn_object.classes_
912
932
  if isinstance(classes, numpy.ndarray):
@@ -1137,7 +1157,7 @@ class SVC(BaseTransformer):
1137
1157
  cp.dump(self._sklearn_object, local_score_file)
1138
1158
 
1139
1159
  # Create temp stage to run score.
1140
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1160
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1141
1161
  session = dataset._session
1142
1162
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1143
1163
  SqlResultValidator(
@@ -1151,8 +1171,9 @@ class SVC(BaseTransformer):
1151
1171
  expected_value=f"Stage area {score_stage_name} successfully created."
1152
1172
  ).validate()
1153
1173
 
1154
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1155
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1174
+ # Use posixpath to construct stage paths
1175
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1176
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1156
1177
  statement_params = telemetry.get_function_usage_statement_params(
1157
1178
  project=_PROJECT,
1158
1179
  subproject=_SUBPROJECT,
@@ -1178,6 +1199,7 @@ class SVC(BaseTransformer):
1178
1199
  replace=True,
1179
1200
  session=session,
1180
1201
  statement_params=statement_params,
1202
+ anonymous=True
1181
1203
  )
1182
1204
  def score_wrapper_sproc(
1183
1205
  session: Session,
@@ -1185,7 +1207,8 @@ class SVC(BaseTransformer):
1185
1207
  stage_score_file_name: str,
1186
1208
  input_cols: List[str],
1187
1209
  label_cols: List[str],
1188
- sample_weight_col: Optional[str]
1210
+ sample_weight_col: Optional[str],
1211
+ statement_params: Dict[str, str]
1189
1212
  ) -> float:
1190
1213
  import cloudpickle as cp
1191
1214
  import numpy as np
@@ -1235,14 +1258,14 @@ class SVC(BaseTransformer):
1235
1258
  api_calls=[Session.call],
1236
1259
  custom_tags=dict([("autogen", True)]),
1237
1260
  )
1238
- score = session.call(
1239
- score_sproc_name,
1261
+ score = score_wrapper_sproc(
1262
+ session,
1240
1263
  query,
1241
1264
  stage_score_file_name,
1242
1265
  identifier.get_unescaped_names(self.input_cols),
1243
1266
  identifier.get_unescaped_names(self.label_cols),
1244
1267
  identifier.get_unescaped_names(self.sample_weight_col),
1245
- statement_params=statement_params,
1268
+ statement_params,
1246
1269
  )
1247
1270
 
1248
1271
  cleanup_temp_files([local_score_file_name])
@@ -1260,18 +1283,20 @@ class SVC(BaseTransformer):
1260
1283
  if self._sklearn_object._estimator_type == 'classifier':
1261
1284
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1262
1285
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1263
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1286
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1287
+ ([] if self._drop_input_cols else inputs) + outputs)
1264
1288
  # For regressor, the type of predict is float64
1265
1289
  elif self._sklearn_object._estimator_type == 'regressor':
1266
1290
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1267
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1268
-
1291
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1292
+ ([] if self._drop_input_cols else inputs) + outputs)
1269
1293
  for prob_func in PROB_FUNCTIONS:
1270
1294
  if hasattr(self, prob_func):
1271
1295
  output_cols_prefix: str = f"{prob_func}_"
1272
1296
  output_column_names = self._get_output_column_names(output_cols_prefix)
1273
1297
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1274
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1298
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1299
+ ([] if self._drop_input_cols else inputs) + outputs)
1275
1300
 
1276
1301
  @property
1277
1302
  def model_signatures(self) -> Dict[str, ModelSignature]: