snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. snowflake/ml/_internal/env_utils.py +2 -1
  2. snowflake/ml/_internal/file_utils.py +29 -7
  3. snowflake/ml/_internal/telemetry.py +5 -8
  4. snowflake/ml/_internal/utils/uri.py +7 -2
  5. snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
  6. snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
  7. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
  8. snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
  9. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
  10. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
  11. snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
  12. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
  13. snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
  14. snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
  15. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
  16. snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
  17. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
  18. snowflake/ml/model/_deployer.py +14 -27
  19. snowflake/ml/model/_env.py +4 -4
  20. snowflake/ml/model/_handlers/custom.py +14 -2
  21. snowflake/ml/model/_handlers/pytorch.py +186 -0
  22. snowflake/ml/model/_handlers/sklearn.py +14 -9
  23. snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
  24. snowflake/ml/model/_handlers/torchscript.py +180 -0
  25. snowflake/ml/model/_handlers/xgboost.py +19 -9
  26. snowflake/ml/model/_model.py +3 -2
  27. snowflake/ml/model/_model_meta.py +12 -7
  28. snowflake/ml/model/model_signature.py +446 -66
  29. snowflake/ml/model/type_hints.py +23 -4
  30. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
  31. snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
  32. snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
  33. snowflake/ml/modeling/cluster/birch.py +51 -26
  34. snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
  35. snowflake/ml/modeling/cluster/dbscan.py +51 -26
  36. snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
  37. snowflake/ml/modeling/cluster/k_means.py +51 -26
  38. snowflake/ml/modeling/cluster/mean_shift.py +51 -26
  39. snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
  40. snowflake/ml/modeling/cluster/optics.py +51 -26
  41. snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
  42. snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
  43. snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
  44. snowflake/ml/modeling/compose/column_transformer.py +51 -26
  45. snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
  46. snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
  47. snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
  48. snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
  49. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
  50. snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
  51. snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
  52. snowflake/ml/modeling/covariance/oas.py +51 -26
  53. snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
  54. snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
  55. snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
  56. snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
  57. snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
  58. snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
  59. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
  60. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
  61. snowflake/ml/modeling/decomposition/pca.py +51 -26
  62. snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
  63. snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
  64. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
  65. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
  66. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
  67. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
  68. snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
  69. snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
  70. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
  71. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
  72. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
  73. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
  74. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
  75. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
  76. snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
  77. snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
  78. snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
  79. snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
  80. snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
  81. snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
  82. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
  83. snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
  84. snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
  85. snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
  86. snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
  87. snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
  88. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
  89. snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
  90. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
  91. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
  92. snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
  93. snowflake/ml/modeling/impute/knn_imputer.py +51 -26
  94. snowflake/ml/modeling/impute/missing_indicator.py +51 -26
  95. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
  96. snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
  97. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
  98. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
  99. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
  100. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
  101. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
  102. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
  103. snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
  104. snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
  105. snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
  106. snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
  107. snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
  108. snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
  109. snowflake/ml/modeling/linear_model/lars.py +51 -26
  110. snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
  111. snowflake/ml/modeling/linear_model/lasso.py +51 -26
  112. snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
  113. snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
  114. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
  115. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
  116. snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
  117. snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
  118. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
  119. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
  120. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
  121. snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
  122. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
  123. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
  124. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
  125. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
  126. snowflake/ml/modeling/linear_model/perceptron.py +51 -26
  127. snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
  128. snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
  129. snowflake/ml/modeling/linear_model/ridge.py +51 -26
  130. snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
  131. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
  132. snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
  133. snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
  134. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
  135. snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
  136. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
  137. snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
  138. snowflake/ml/modeling/manifold/isomap.py +51 -26
  139. snowflake/ml/modeling/manifold/mds.py +51 -26
  140. snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
  141. snowflake/ml/modeling/manifold/tsne.py +51 -26
  142. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
  143. snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
  144. snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
  145. snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
  146. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
  147. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
  148. snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
  149. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
  150. snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
  151. snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
  152. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
  153. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
  154. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
  155. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
  156. snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
  157. snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
  158. snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
  159. snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
  160. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
  161. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
  162. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
  163. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
  164. snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
  165. snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
  166. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
  167. snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
  168. snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
  169. snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
  170. snowflake/ml/modeling/svm/linear_svc.py +51 -26
  171. snowflake/ml/modeling/svm/linear_svr.py +51 -26
  172. snowflake/ml/modeling/svm/nu_svc.py +51 -26
  173. snowflake/ml/modeling/svm/nu_svr.py +51 -26
  174. snowflake/ml/modeling/svm/svc.py +51 -26
  175. snowflake/ml/modeling/svm/svr.py +51 -26
  176. snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
  177. snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
  178. snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
  179. snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
  180. snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
  181. snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
  182. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
  183. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
  184. snowflake/ml/registry/model_registry.py +74 -56
  185. snowflake/ml/version.py +1 -1
  186. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
  187. snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
  188. snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
  189. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -264,7 +266,6 @@ class SGDOneClassSVM(BaseTransformer):
264
266
  sample_weight_col: Optional[str] = None,
265
267
  ) -> None:
266
268
  super().__init__()
267
- self.id = str(uuid4()).replace("-", "_").upper()
268
269
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
269
270
 
270
271
  self._deps = list(deps)
@@ -295,6 +296,15 @@ class SGDOneClassSVM(BaseTransformer):
295
296
  self.set_drop_input_cols(drop_input_cols)
296
297
  self.set_sample_weight_col(sample_weight_col)
297
298
 
299
+ def _get_rand_id(self) -> str:
300
+ """
301
+ Generate random id to be used in sproc and stage names.
302
+
303
+ Returns:
304
+ Random id string usable in sproc, table, and stage names.
305
+ """
306
+ return str(uuid4()).replace("-", "_").upper()
307
+
298
308
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
299
309
  """
300
310
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -373,7 +383,7 @@ class SGDOneClassSVM(BaseTransformer):
373
383
  cp.dump(self._sklearn_object, local_transform_file)
374
384
 
375
385
  # Create temp stage to run fit.
376
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
386
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
377
387
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
378
388
  SqlResultValidator(
379
389
  session=session,
@@ -386,11 +396,12 @@ class SGDOneClassSVM(BaseTransformer):
386
396
  expected_value=f"Stage area {transform_stage_name} successfully created."
387
397
  ).validate()
388
398
 
389
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
399
+ # Use posixpath to construct stage paths
400
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
401
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
390
402
  local_result_file_name = get_temp_file_path()
391
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
392
403
 
393
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
404
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
394
405
  statement_params = telemetry.get_function_usage_statement_params(
395
406
  project=_PROJECT,
396
407
  subproject=_SUBPROJECT,
@@ -416,6 +427,7 @@ class SGDOneClassSVM(BaseTransformer):
416
427
  replace=True,
417
428
  session=session,
418
429
  statement_params=statement_params,
430
+ anonymous=True
419
431
  )
420
432
  def fit_wrapper_sproc(
421
433
  session: Session,
@@ -424,7 +436,8 @@ class SGDOneClassSVM(BaseTransformer):
424
436
  stage_result_file_name: str,
425
437
  input_cols: List[str],
426
438
  label_cols: List[str],
427
- sample_weight_col: Optional[str]
439
+ sample_weight_col: Optional[str],
440
+ statement_params: Dict[str, str]
428
441
  ) -> str:
429
442
  import cloudpickle as cp
430
443
  import numpy as np
@@ -491,15 +504,15 @@ class SGDOneClassSVM(BaseTransformer):
491
504
  api_calls=[Session.call],
492
505
  custom_tags=dict([("autogen", True)]),
493
506
  )
494
- sproc_export_file_name = session.call(
495
- fit_sproc_name,
507
+ sproc_export_file_name = fit_wrapper_sproc(
508
+ session,
496
509
  query,
497
510
  stage_transform_file_name,
498
511
  stage_result_file_name,
499
512
  identifier.get_unescaped_names(self.input_cols),
500
513
  identifier.get_unescaped_names(self.label_cols),
501
514
  identifier.get_unescaped_names(self.sample_weight_col),
502
- statement_params=statement_params,
515
+ statement_params,
503
516
  )
504
517
 
505
518
  if "|" in sproc_export_file_name:
@@ -509,7 +522,7 @@ class SGDOneClassSVM(BaseTransformer):
509
522
  print("\n".join(fields[1:]))
510
523
 
511
524
  session.file.get(
512
- os.path.join(stage_result_file_name, sproc_export_file_name),
525
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
513
526
  local_result_file_name,
514
527
  statement_params=statement_params
515
528
  )
@@ -555,7 +568,7 @@ class SGDOneClassSVM(BaseTransformer):
555
568
 
556
569
  # Register vectorized UDF for batch inference
557
570
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
558
- safe_id=self.id, method=inference_method)
571
+ safe_id=self._get_rand_id(), method=inference_method)
559
572
 
560
573
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
561
574
  # will try to pickle all of self which fails.
@@ -647,7 +660,7 @@ class SGDOneClassSVM(BaseTransformer):
647
660
  return transformed_pandas_df.to_dict("records")
648
661
 
649
662
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
650
- safe_id=self.id
663
+ safe_id=self._get_rand_id()
651
664
  )
652
665
 
653
666
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -814,11 +827,18 @@ class SGDOneClassSVM(BaseTransformer):
814
827
  Transformed dataset.
815
828
  """
816
829
  if isinstance(dataset, DataFrame):
830
+ expected_type_inferred = ""
831
+ # when it is classifier, infer the datatype from label columns
832
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
833
+ expected_type_inferred = convert_sp_to_sf_type(
834
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
835
+ )
836
+
817
837
  output_df = self._batch_inference(
818
838
  dataset=dataset,
819
839
  inference_method="predict",
820
840
  expected_output_cols_list=self.output_cols,
821
- expected_output_cols_type="",
841
+ expected_output_cols_type=expected_type_inferred,
822
842
  )
823
843
  elif isinstance(dataset, pd.DataFrame):
824
844
  output_df = self._sklearn_inference(
@@ -889,10 +909,10 @@ class SGDOneClassSVM(BaseTransformer):
889
909
 
890
910
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
891
911
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
892
- Returns an empty list if current object is not a classifier or not yet fitted.
912
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
893
913
  """
894
914
  if getattr(self._sklearn_object, "classes_", None) is None:
895
- return []
915
+ return [output_cols_prefix]
896
916
 
897
917
  classes = self._sklearn_object.classes_
898
918
  if isinstance(classes, numpy.ndarray):
@@ -1119,7 +1139,7 @@ class SGDOneClassSVM(BaseTransformer):
1119
1139
  cp.dump(self._sklearn_object, local_score_file)
1120
1140
 
1121
1141
  # Create temp stage to run score.
1122
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1142
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1123
1143
  session = dataset._session
1124
1144
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1125
1145
  SqlResultValidator(
@@ -1133,8 +1153,9 @@ class SGDOneClassSVM(BaseTransformer):
1133
1153
  expected_value=f"Stage area {score_stage_name} successfully created."
1134
1154
  ).validate()
1135
1155
 
1136
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1137
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1156
+ # Use posixpath to construct stage paths
1157
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1158
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1138
1159
  statement_params = telemetry.get_function_usage_statement_params(
1139
1160
  project=_PROJECT,
1140
1161
  subproject=_SUBPROJECT,
@@ -1160,6 +1181,7 @@ class SGDOneClassSVM(BaseTransformer):
1160
1181
  replace=True,
1161
1182
  session=session,
1162
1183
  statement_params=statement_params,
1184
+ anonymous=True
1163
1185
  )
1164
1186
  def score_wrapper_sproc(
1165
1187
  session: Session,
@@ -1167,7 +1189,8 @@ class SGDOneClassSVM(BaseTransformer):
1167
1189
  stage_score_file_name: str,
1168
1190
  input_cols: List[str],
1169
1191
  label_cols: List[str],
1170
- sample_weight_col: Optional[str]
1192
+ sample_weight_col: Optional[str],
1193
+ statement_params: Dict[str, str]
1171
1194
  ) -> float:
1172
1195
  import cloudpickle as cp
1173
1196
  import numpy as np
@@ -1217,14 +1240,14 @@ class SGDOneClassSVM(BaseTransformer):
1217
1240
  api_calls=[Session.call],
1218
1241
  custom_tags=dict([("autogen", True)]),
1219
1242
  )
1220
- score = session.call(
1221
- score_sproc_name,
1243
+ score = score_wrapper_sproc(
1244
+ session,
1222
1245
  query,
1223
1246
  stage_score_file_name,
1224
1247
  identifier.get_unescaped_names(self.input_cols),
1225
1248
  identifier.get_unescaped_names(self.label_cols),
1226
1249
  identifier.get_unescaped_names(self.sample_weight_col),
1227
- statement_params=statement_params,
1250
+ statement_params,
1228
1251
  )
1229
1252
 
1230
1253
  cleanup_temp_files([local_score_file_name])
@@ -1242,18 +1265,20 @@ class SGDOneClassSVM(BaseTransformer):
1242
1265
  if self._sklearn_object._estimator_type == 'classifier':
1243
1266
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1244
1267
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1245
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1268
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1269
+ ([] if self._drop_input_cols else inputs) + outputs)
1246
1270
  # For regressor, the type of predict is float64
1247
1271
  elif self._sklearn_object._estimator_type == 'regressor':
1248
1272
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1249
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1250
-
1273
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1274
+ ([] if self._drop_input_cols else inputs) + outputs)
1251
1275
  for prob_func in PROB_FUNCTIONS:
1252
1276
  if hasattr(self, prob_func):
1253
1277
  output_cols_prefix: str = f"{prob_func}_"
1254
1278
  output_column_names = self._get_output_column_names(output_cols_prefix)
1255
1279
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1256
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1280
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1281
+ ([] if self._drop_input_cols else inputs) + outputs)
1257
1282
 
1258
1283
  @property
1259
1284
  def model_signatures(self) -> Dict[str, ModelSignature]:
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -323,7 +325,6 @@ class SGDRegressor(BaseTransformer):
323
325
  sample_weight_col: Optional[str] = None,
324
326
  ) -> None:
325
327
  super().__init__()
326
- self.id = str(uuid4()).replace("-", "_").upper()
327
328
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
328
329
 
329
330
  self._deps = list(deps)
@@ -361,6 +362,15 @@ class SGDRegressor(BaseTransformer):
361
362
  self.set_drop_input_cols(drop_input_cols)
362
363
  self.set_sample_weight_col(sample_weight_col)
363
364
 
365
+ def _get_rand_id(self) -> str:
366
+ """
367
+ Generate random id to be used in sproc and stage names.
368
+
369
+ Returns:
370
+ Random id string usable in sproc, table, and stage names.
371
+ """
372
+ return str(uuid4()).replace("-", "_").upper()
373
+
364
374
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
365
375
  """
366
376
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -439,7 +449,7 @@ class SGDRegressor(BaseTransformer):
439
449
  cp.dump(self._sklearn_object, local_transform_file)
440
450
 
441
451
  # Create temp stage to run fit.
442
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
452
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
443
453
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
444
454
  SqlResultValidator(
445
455
  session=session,
@@ -452,11 +462,12 @@ class SGDRegressor(BaseTransformer):
452
462
  expected_value=f"Stage area {transform_stage_name} successfully created."
453
463
  ).validate()
454
464
 
455
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
465
+ # Use posixpath to construct stage paths
466
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
467
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
456
468
  local_result_file_name = get_temp_file_path()
457
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
458
469
 
459
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
470
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
460
471
  statement_params = telemetry.get_function_usage_statement_params(
461
472
  project=_PROJECT,
462
473
  subproject=_SUBPROJECT,
@@ -482,6 +493,7 @@ class SGDRegressor(BaseTransformer):
482
493
  replace=True,
483
494
  session=session,
484
495
  statement_params=statement_params,
496
+ anonymous=True
485
497
  )
486
498
  def fit_wrapper_sproc(
487
499
  session: Session,
@@ -490,7 +502,8 @@ class SGDRegressor(BaseTransformer):
490
502
  stage_result_file_name: str,
491
503
  input_cols: List[str],
492
504
  label_cols: List[str],
493
- sample_weight_col: Optional[str]
505
+ sample_weight_col: Optional[str],
506
+ statement_params: Dict[str, str]
494
507
  ) -> str:
495
508
  import cloudpickle as cp
496
509
  import numpy as np
@@ -557,15 +570,15 @@ class SGDRegressor(BaseTransformer):
557
570
  api_calls=[Session.call],
558
571
  custom_tags=dict([("autogen", True)]),
559
572
  )
560
- sproc_export_file_name = session.call(
561
- fit_sproc_name,
573
+ sproc_export_file_name = fit_wrapper_sproc(
574
+ session,
562
575
  query,
563
576
  stage_transform_file_name,
564
577
  stage_result_file_name,
565
578
  identifier.get_unescaped_names(self.input_cols),
566
579
  identifier.get_unescaped_names(self.label_cols),
567
580
  identifier.get_unescaped_names(self.sample_weight_col),
568
- statement_params=statement_params,
581
+ statement_params,
569
582
  )
570
583
 
571
584
  if "|" in sproc_export_file_name:
@@ -575,7 +588,7 @@ class SGDRegressor(BaseTransformer):
575
588
  print("\n".join(fields[1:]))
576
589
 
577
590
  session.file.get(
578
- os.path.join(stage_result_file_name, sproc_export_file_name),
591
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
579
592
  local_result_file_name,
580
593
  statement_params=statement_params
581
594
  )
@@ -621,7 +634,7 @@ class SGDRegressor(BaseTransformer):
621
634
 
622
635
  # Register vectorized UDF for batch inference
623
636
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
624
- safe_id=self.id, method=inference_method)
637
+ safe_id=self._get_rand_id(), method=inference_method)
625
638
 
626
639
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
627
640
  # will try to pickle all of self which fails.
@@ -713,7 +726,7 @@ class SGDRegressor(BaseTransformer):
713
726
  return transformed_pandas_df.to_dict("records")
714
727
 
715
728
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
716
- safe_id=self.id
729
+ safe_id=self._get_rand_id()
717
730
  )
718
731
 
719
732
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -880,11 +893,18 @@ class SGDRegressor(BaseTransformer):
880
893
  Transformed dataset.
881
894
  """
882
895
  if isinstance(dataset, DataFrame):
896
+ expected_type_inferred = "float"
897
+ # when it is classifier, infer the datatype from label columns
898
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
899
+ expected_type_inferred = convert_sp_to_sf_type(
900
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
901
+ )
902
+
883
903
  output_df = self._batch_inference(
884
904
  dataset=dataset,
885
905
  inference_method="predict",
886
906
  expected_output_cols_list=self.output_cols,
887
- expected_output_cols_type="float",
907
+ expected_output_cols_type=expected_type_inferred,
888
908
  )
889
909
  elif isinstance(dataset, pd.DataFrame):
890
910
  output_df = self._sklearn_inference(
@@ -955,10 +975,10 @@ class SGDRegressor(BaseTransformer):
955
975
 
956
976
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
957
977
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
958
- Returns an empty list if current object is not a classifier or not yet fitted.
978
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
959
979
  """
960
980
  if getattr(self._sklearn_object, "classes_", None) is None:
961
- return []
981
+ return [output_cols_prefix]
962
982
 
963
983
  classes = self._sklearn_object.classes_
964
984
  if isinstance(classes, numpy.ndarray):
@@ -1183,7 +1203,7 @@ class SGDRegressor(BaseTransformer):
1183
1203
  cp.dump(self._sklearn_object, local_score_file)
1184
1204
 
1185
1205
  # Create temp stage to run score.
1186
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1206
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1187
1207
  session = dataset._session
1188
1208
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1189
1209
  SqlResultValidator(
@@ -1197,8 +1217,9 @@ class SGDRegressor(BaseTransformer):
1197
1217
  expected_value=f"Stage area {score_stage_name} successfully created."
1198
1218
  ).validate()
1199
1219
 
1200
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1201
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1220
+ # Use posixpath to construct stage paths
1221
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1222
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1202
1223
  statement_params = telemetry.get_function_usage_statement_params(
1203
1224
  project=_PROJECT,
1204
1225
  subproject=_SUBPROJECT,
@@ -1224,6 +1245,7 @@ class SGDRegressor(BaseTransformer):
1224
1245
  replace=True,
1225
1246
  session=session,
1226
1247
  statement_params=statement_params,
1248
+ anonymous=True
1227
1249
  )
1228
1250
  def score_wrapper_sproc(
1229
1251
  session: Session,
@@ -1231,7 +1253,8 @@ class SGDRegressor(BaseTransformer):
1231
1253
  stage_score_file_name: str,
1232
1254
  input_cols: List[str],
1233
1255
  label_cols: List[str],
1234
- sample_weight_col: Optional[str]
1256
+ sample_weight_col: Optional[str],
1257
+ statement_params: Dict[str, str]
1235
1258
  ) -> float:
1236
1259
  import cloudpickle as cp
1237
1260
  import numpy as np
@@ -1281,14 +1304,14 @@ class SGDRegressor(BaseTransformer):
1281
1304
  api_calls=[Session.call],
1282
1305
  custom_tags=dict([("autogen", True)]),
1283
1306
  )
1284
- score = session.call(
1285
- score_sproc_name,
1307
+ score = score_wrapper_sproc(
1308
+ session,
1286
1309
  query,
1287
1310
  stage_score_file_name,
1288
1311
  identifier.get_unescaped_names(self.input_cols),
1289
1312
  identifier.get_unescaped_names(self.label_cols),
1290
1313
  identifier.get_unescaped_names(self.sample_weight_col),
1291
- statement_params=statement_params,
1314
+ statement_params,
1292
1315
  )
1293
1316
 
1294
1317
  cleanup_temp_files([local_score_file_name])
@@ -1306,18 +1329,20 @@ class SGDRegressor(BaseTransformer):
1306
1329
  if self._sklearn_object._estimator_type == 'classifier':
1307
1330
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1308
1331
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1309
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1332
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1333
+ ([] if self._drop_input_cols else inputs) + outputs)
1310
1334
  # For regressor, the type of predict is float64
1311
1335
  elif self._sklearn_object._estimator_type == 'regressor':
1312
1336
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1313
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1314
-
1337
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1338
+ ([] if self._drop_input_cols else inputs) + outputs)
1315
1339
  for prob_func in PROB_FUNCTIONS:
1316
1340
  if hasattr(self, prob_func):
1317
1341
  output_cols_prefix: str = f"{prob_func}_"
1318
1342
  output_column_names = self._get_output_column_names(output_cols_prefix)
1319
1343
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1320
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1344
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1345
+ ([] if self._drop_input_cols else inputs) + outputs)
1321
1346
 
1322
1347
  @property
1323
1348
  def model_signatures(self) -> Dict[str, ModelSignature]: