snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. snowflake/ml/_internal/env_utils.py +2 -1
  2. snowflake/ml/_internal/file_utils.py +29 -7
  3. snowflake/ml/_internal/telemetry.py +5 -8
  4. snowflake/ml/_internal/utils/uri.py +7 -2
  5. snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
  6. snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
  7. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
  8. snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
  9. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
  10. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
  11. snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
  12. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
  13. snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
  14. snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
  15. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
  16. snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
  17. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
  18. snowflake/ml/model/_deployer.py +14 -27
  19. snowflake/ml/model/_env.py +4 -4
  20. snowflake/ml/model/_handlers/custom.py +14 -2
  21. snowflake/ml/model/_handlers/pytorch.py +186 -0
  22. snowflake/ml/model/_handlers/sklearn.py +14 -9
  23. snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
  24. snowflake/ml/model/_handlers/torchscript.py +180 -0
  25. snowflake/ml/model/_handlers/xgboost.py +19 -9
  26. snowflake/ml/model/_model.py +3 -2
  27. snowflake/ml/model/_model_meta.py +12 -7
  28. snowflake/ml/model/model_signature.py +446 -66
  29. snowflake/ml/model/type_hints.py +23 -4
  30. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
  31. snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
  32. snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
  33. snowflake/ml/modeling/cluster/birch.py +51 -26
  34. snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
  35. snowflake/ml/modeling/cluster/dbscan.py +51 -26
  36. snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
  37. snowflake/ml/modeling/cluster/k_means.py +51 -26
  38. snowflake/ml/modeling/cluster/mean_shift.py +51 -26
  39. snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
  40. snowflake/ml/modeling/cluster/optics.py +51 -26
  41. snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
  42. snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
  43. snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
  44. snowflake/ml/modeling/compose/column_transformer.py +51 -26
  45. snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
  46. snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
  47. snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
  48. snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
  49. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
  50. snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
  51. snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
  52. snowflake/ml/modeling/covariance/oas.py +51 -26
  53. snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
  54. snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
  55. snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
  56. snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
  57. snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
  58. snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
  59. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
  60. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
  61. snowflake/ml/modeling/decomposition/pca.py +51 -26
  62. snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
  63. snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
  64. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
  65. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
  66. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
  67. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
  68. snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
  69. snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
  70. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
  71. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
  72. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
  73. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
  74. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
  75. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
  76. snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
  77. snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
  78. snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
  79. snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
  80. snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
  81. snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
  82. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
  83. snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
  84. snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
  85. snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
  86. snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
  87. snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
  88. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
  89. snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
  90. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
  91. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
  92. snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
  93. snowflake/ml/modeling/impute/knn_imputer.py +51 -26
  94. snowflake/ml/modeling/impute/missing_indicator.py +51 -26
  95. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
  96. snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
  97. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
  98. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
  99. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
  100. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
  101. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
  102. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
  103. snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
  104. snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
  105. snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
  106. snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
  107. snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
  108. snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
  109. snowflake/ml/modeling/linear_model/lars.py +51 -26
  110. snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
  111. snowflake/ml/modeling/linear_model/lasso.py +51 -26
  112. snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
  113. snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
  114. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
  115. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
  116. snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
  117. snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
  118. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
  119. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
  120. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
  121. snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
  122. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
  123. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
  124. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
  125. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
  126. snowflake/ml/modeling/linear_model/perceptron.py +51 -26
  127. snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
  128. snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
  129. snowflake/ml/modeling/linear_model/ridge.py +51 -26
  130. snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
  131. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
  132. snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
  133. snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
  134. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
  135. snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
  136. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
  137. snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
  138. snowflake/ml/modeling/manifold/isomap.py +51 -26
  139. snowflake/ml/modeling/manifold/mds.py +51 -26
  140. snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
  141. snowflake/ml/modeling/manifold/tsne.py +51 -26
  142. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
  143. snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
  144. snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
  145. snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
  146. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
  147. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
  148. snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
  149. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
  150. snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
  151. snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
  152. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
  153. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
  154. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
  155. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
  156. snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
  157. snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
  158. snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
  159. snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
  160. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
  161. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
  162. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
  163. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
  164. snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
  165. snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
  166. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
  167. snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
  168. snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
  169. snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
  170. snowflake/ml/modeling/svm/linear_svc.py +51 -26
  171. snowflake/ml/modeling/svm/linear_svr.py +51 -26
  172. snowflake/ml/modeling/svm/nu_svc.py +51 -26
  173. snowflake/ml/modeling/svm/nu_svr.py +51 -26
  174. snowflake/ml/modeling/svm/svc.py +51 -26
  175. snowflake/ml/modeling/svm/svr.py +51 -26
  176. snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
  177. snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
  178. snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
  179. snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
  180. snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
  181. snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
  182. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
  183. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
  184. snowflake/ml/registry/model_registry.py +74 -56
  185. snowflake/ml/version.py +1 -1
  186. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
  187. snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
  188. snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
  189. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -250,7 +252,6 @@ class RidgeCV(BaseTransformer):
250
252
  sample_weight_col: Optional[str] = None,
251
253
  ) -> None:
252
254
  super().__init__()
253
- self.id = str(uuid4()).replace("-", "_").upper()
254
255
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
255
256
 
256
257
  self._deps = list(deps)
@@ -276,6 +277,15 @@ class RidgeCV(BaseTransformer):
276
277
  self.set_drop_input_cols(drop_input_cols)
277
278
  self.set_sample_weight_col(sample_weight_col)
278
279
 
280
+ def _get_rand_id(self) -> str:
281
+ """
282
+ Generate random id to be used in sproc and stage names.
283
+
284
+ Returns:
285
+ Random id string usable in sproc, table, and stage names.
286
+ """
287
+ return str(uuid4()).replace("-", "_").upper()
288
+
279
289
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
280
290
  """
281
291
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -354,7 +364,7 @@ class RidgeCV(BaseTransformer):
354
364
  cp.dump(self._sklearn_object, local_transform_file)
355
365
 
356
366
  # Create temp stage to run fit.
357
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
367
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
358
368
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
359
369
  SqlResultValidator(
360
370
  session=session,
@@ -367,11 +377,12 @@ class RidgeCV(BaseTransformer):
367
377
  expected_value=f"Stage area {transform_stage_name} successfully created."
368
378
  ).validate()
369
379
 
370
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
380
+ # Use posixpath to construct stage paths
381
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
382
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
371
383
  local_result_file_name = get_temp_file_path()
372
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
373
384
 
374
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
385
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
375
386
  statement_params = telemetry.get_function_usage_statement_params(
376
387
  project=_PROJECT,
377
388
  subproject=_SUBPROJECT,
@@ -397,6 +408,7 @@ class RidgeCV(BaseTransformer):
397
408
  replace=True,
398
409
  session=session,
399
410
  statement_params=statement_params,
411
+ anonymous=True
400
412
  )
401
413
  def fit_wrapper_sproc(
402
414
  session: Session,
@@ -405,7 +417,8 @@ class RidgeCV(BaseTransformer):
405
417
  stage_result_file_name: str,
406
418
  input_cols: List[str],
407
419
  label_cols: List[str],
408
- sample_weight_col: Optional[str]
420
+ sample_weight_col: Optional[str],
421
+ statement_params: Dict[str, str]
409
422
  ) -> str:
410
423
  import cloudpickle as cp
411
424
  import numpy as np
@@ -472,15 +485,15 @@ class RidgeCV(BaseTransformer):
472
485
  api_calls=[Session.call],
473
486
  custom_tags=dict([("autogen", True)]),
474
487
  )
475
- sproc_export_file_name = session.call(
476
- fit_sproc_name,
488
+ sproc_export_file_name = fit_wrapper_sproc(
489
+ session,
477
490
  query,
478
491
  stage_transform_file_name,
479
492
  stage_result_file_name,
480
493
  identifier.get_unescaped_names(self.input_cols),
481
494
  identifier.get_unescaped_names(self.label_cols),
482
495
  identifier.get_unescaped_names(self.sample_weight_col),
483
- statement_params=statement_params,
496
+ statement_params,
484
497
  )
485
498
 
486
499
  if "|" in sproc_export_file_name:
@@ -490,7 +503,7 @@ class RidgeCV(BaseTransformer):
490
503
  print("\n".join(fields[1:]))
491
504
 
492
505
  session.file.get(
493
- os.path.join(stage_result_file_name, sproc_export_file_name),
506
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
494
507
  local_result_file_name,
495
508
  statement_params=statement_params
496
509
  )
@@ -536,7 +549,7 @@ class RidgeCV(BaseTransformer):
536
549
 
537
550
  # Register vectorized UDF for batch inference
538
551
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
539
- safe_id=self.id, method=inference_method)
552
+ safe_id=self._get_rand_id(), method=inference_method)
540
553
 
541
554
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
542
555
  # will try to pickle all of self which fails.
@@ -628,7 +641,7 @@ class RidgeCV(BaseTransformer):
628
641
  return transformed_pandas_df.to_dict("records")
629
642
 
630
643
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
631
- safe_id=self.id
644
+ safe_id=self._get_rand_id()
632
645
  )
633
646
 
634
647
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -795,11 +808,18 @@ class RidgeCV(BaseTransformer):
795
808
  Transformed dataset.
796
809
  """
797
810
  if isinstance(dataset, DataFrame):
811
+ expected_type_inferred = "float"
812
+ # when it is classifier, infer the datatype from label columns
813
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
814
+ expected_type_inferred = convert_sp_to_sf_type(
815
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
816
+ )
817
+
798
818
  output_df = self._batch_inference(
799
819
  dataset=dataset,
800
820
  inference_method="predict",
801
821
  expected_output_cols_list=self.output_cols,
802
- expected_output_cols_type="float",
822
+ expected_output_cols_type=expected_type_inferred,
803
823
  )
804
824
  elif isinstance(dataset, pd.DataFrame):
805
825
  output_df = self._sklearn_inference(
@@ -870,10 +890,10 @@ class RidgeCV(BaseTransformer):
870
890
 
871
891
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
872
892
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
873
- Returns an empty list if current object is not a classifier or not yet fitted.
893
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
874
894
  """
875
895
  if getattr(self._sklearn_object, "classes_", None) is None:
876
- return []
896
+ return [output_cols_prefix]
877
897
 
878
898
  classes = self._sklearn_object.classes_
879
899
  if isinstance(classes, numpy.ndarray):
@@ -1098,7 +1118,7 @@ class RidgeCV(BaseTransformer):
1098
1118
  cp.dump(self._sklearn_object, local_score_file)
1099
1119
 
1100
1120
  # Create temp stage to run score.
1101
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1121
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1102
1122
  session = dataset._session
1103
1123
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1104
1124
  SqlResultValidator(
@@ -1112,8 +1132,9 @@ class RidgeCV(BaseTransformer):
1112
1132
  expected_value=f"Stage area {score_stage_name} successfully created."
1113
1133
  ).validate()
1114
1134
 
1115
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1116
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1135
+ # Use posixpath to construct stage paths
1136
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1137
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1117
1138
  statement_params = telemetry.get_function_usage_statement_params(
1118
1139
  project=_PROJECT,
1119
1140
  subproject=_SUBPROJECT,
@@ -1139,6 +1160,7 @@ class RidgeCV(BaseTransformer):
1139
1160
  replace=True,
1140
1161
  session=session,
1141
1162
  statement_params=statement_params,
1163
+ anonymous=True
1142
1164
  )
1143
1165
  def score_wrapper_sproc(
1144
1166
  session: Session,
@@ -1146,7 +1168,8 @@ class RidgeCV(BaseTransformer):
1146
1168
  stage_score_file_name: str,
1147
1169
  input_cols: List[str],
1148
1170
  label_cols: List[str],
1149
- sample_weight_col: Optional[str]
1171
+ sample_weight_col: Optional[str],
1172
+ statement_params: Dict[str, str]
1150
1173
  ) -> float:
1151
1174
  import cloudpickle as cp
1152
1175
  import numpy as np
@@ -1196,14 +1219,14 @@ class RidgeCV(BaseTransformer):
1196
1219
  api_calls=[Session.call],
1197
1220
  custom_tags=dict([("autogen", True)]),
1198
1221
  )
1199
- score = session.call(
1200
- score_sproc_name,
1222
+ score = score_wrapper_sproc(
1223
+ session,
1201
1224
  query,
1202
1225
  stage_score_file_name,
1203
1226
  identifier.get_unescaped_names(self.input_cols),
1204
1227
  identifier.get_unescaped_names(self.label_cols),
1205
1228
  identifier.get_unescaped_names(self.sample_weight_col),
1206
- statement_params=statement_params,
1229
+ statement_params,
1207
1230
  )
1208
1231
 
1209
1232
  cleanup_temp_files([local_score_file_name])
@@ -1221,18 +1244,20 @@ class RidgeCV(BaseTransformer):
1221
1244
  if self._sklearn_object._estimator_type == 'classifier':
1222
1245
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1223
1246
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1224
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1247
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1248
+ ([] if self._drop_input_cols else inputs) + outputs)
1225
1249
  # For regressor, the type of predict is float64
1226
1250
  elif self._sklearn_object._estimator_type == 'regressor':
1227
1251
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1228
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1229
-
1252
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1253
+ ([] if self._drop_input_cols else inputs) + outputs)
1230
1254
  for prob_func in PROB_FUNCTIONS:
1231
1255
  if hasattr(self, prob_func):
1232
1256
  output_cols_prefix: str = f"{prob_func}_"
1233
1257
  output_column_names = self._get_output_column_names(output_cols_prefix)
1234
1258
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1235
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1259
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1260
+ ([] if self._drop_input_cols else inputs) + outputs)
1236
1261
 
1237
1262
  @property
1238
1263
  def model_signatures(self) -> Dict[str, ModelSignature]:
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -355,7 +357,6 @@ class SGDClassifier(BaseTransformer):
355
357
  sample_weight_col: Optional[str] = None,
356
358
  ) -> None:
357
359
  super().__init__()
358
- self.id = str(uuid4()).replace("-", "_").upper()
359
360
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
360
361
 
361
362
  self._deps = list(deps)
@@ -395,6 +396,15 @@ class SGDClassifier(BaseTransformer):
395
396
  self.set_drop_input_cols(drop_input_cols)
396
397
  self.set_sample_weight_col(sample_weight_col)
397
398
 
399
+ def _get_rand_id(self) -> str:
400
+ """
401
+ Generate random id to be used in sproc and stage names.
402
+
403
+ Returns:
404
+ Random id string usable in sproc, table, and stage names.
405
+ """
406
+ return str(uuid4()).replace("-", "_").upper()
407
+
398
408
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
399
409
  """
400
410
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -473,7 +483,7 @@ class SGDClassifier(BaseTransformer):
473
483
  cp.dump(self._sklearn_object, local_transform_file)
474
484
 
475
485
  # Create temp stage to run fit.
476
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
486
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
477
487
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
478
488
  SqlResultValidator(
479
489
  session=session,
@@ -486,11 +496,12 @@ class SGDClassifier(BaseTransformer):
486
496
  expected_value=f"Stage area {transform_stage_name} successfully created."
487
497
  ).validate()
488
498
 
489
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
499
+ # Use posixpath to construct stage paths
500
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
501
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
490
502
  local_result_file_name = get_temp_file_path()
491
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
492
503
 
493
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
504
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
494
505
  statement_params = telemetry.get_function_usage_statement_params(
495
506
  project=_PROJECT,
496
507
  subproject=_SUBPROJECT,
@@ -516,6 +527,7 @@ class SGDClassifier(BaseTransformer):
516
527
  replace=True,
517
528
  session=session,
518
529
  statement_params=statement_params,
530
+ anonymous=True
519
531
  )
520
532
  def fit_wrapper_sproc(
521
533
  session: Session,
@@ -524,7 +536,8 @@ class SGDClassifier(BaseTransformer):
524
536
  stage_result_file_name: str,
525
537
  input_cols: List[str],
526
538
  label_cols: List[str],
527
- sample_weight_col: Optional[str]
539
+ sample_weight_col: Optional[str],
540
+ statement_params: Dict[str, str]
528
541
  ) -> str:
529
542
  import cloudpickle as cp
530
543
  import numpy as np
@@ -591,15 +604,15 @@ class SGDClassifier(BaseTransformer):
591
604
  api_calls=[Session.call],
592
605
  custom_tags=dict([("autogen", True)]),
593
606
  )
594
- sproc_export_file_name = session.call(
595
- fit_sproc_name,
607
+ sproc_export_file_name = fit_wrapper_sproc(
608
+ session,
596
609
  query,
597
610
  stage_transform_file_name,
598
611
  stage_result_file_name,
599
612
  identifier.get_unescaped_names(self.input_cols),
600
613
  identifier.get_unescaped_names(self.label_cols),
601
614
  identifier.get_unescaped_names(self.sample_weight_col),
602
- statement_params=statement_params,
615
+ statement_params,
603
616
  )
604
617
 
605
618
  if "|" in sproc_export_file_name:
@@ -609,7 +622,7 @@ class SGDClassifier(BaseTransformer):
609
622
  print("\n".join(fields[1:]))
610
623
 
611
624
  session.file.get(
612
- os.path.join(stage_result_file_name, sproc_export_file_name),
625
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
613
626
  local_result_file_name,
614
627
  statement_params=statement_params
615
628
  )
@@ -655,7 +668,7 @@ class SGDClassifier(BaseTransformer):
655
668
 
656
669
  # Register vectorized UDF for batch inference
657
670
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
658
- safe_id=self.id, method=inference_method)
671
+ safe_id=self._get_rand_id(), method=inference_method)
659
672
 
660
673
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
661
674
  # will try to pickle all of self which fails.
@@ -747,7 +760,7 @@ class SGDClassifier(BaseTransformer):
747
760
  return transformed_pandas_df.to_dict("records")
748
761
 
749
762
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
750
- safe_id=self.id
763
+ safe_id=self._get_rand_id()
751
764
  )
752
765
 
753
766
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -914,11 +927,18 @@ class SGDClassifier(BaseTransformer):
914
927
  Transformed dataset.
915
928
  """
916
929
  if isinstance(dataset, DataFrame):
930
+ expected_type_inferred = ""
931
+ # when it is classifier, infer the datatype from label columns
932
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
933
+ expected_type_inferred = convert_sp_to_sf_type(
934
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
935
+ )
936
+
917
937
  output_df = self._batch_inference(
918
938
  dataset=dataset,
919
939
  inference_method="predict",
920
940
  expected_output_cols_list=self.output_cols,
921
- expected_output_cols_type="",
941
+ expected_output_cols_type=expected_type_inferred,
922
942
  )
923
943
  elif isinstance(dataset, pd.DataFrame):
924
944
  output_df = self._sklearn_inference(
@@ -989,10 +1009,10 @@ class SGDClassifier(BaseTransformer):
989
1009
 
990
1010
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
991
1011
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
992
- Returns an empty list if current object is not a classifier or not yet fitted.
1012
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
993
1013
  """
994
1014
  if getattr(self._sklearn_object, "classes_", None) is None:
995
- return []
1015
+ return [output_cols_prefix]
996
1016
 
997
1017
  classes = self._sklearn_object.classes_
998
1018
  if isinstance(classes, numpy.ndarray):
@@ -1223,7 +1243,7 @@ class SGDClassifier(BaseTransformer):
1223
1243
  cp.dump(self._sklearn_object, local_score_file)
1224
1244
 
1225
1245
  # Create temp stage to run score.
1226
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1246
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1227
1247
  session = dataset._session
1228
1248
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1229
1249
  SqlResultValidator(
@@ -1237,8 +1257,9 @@ class SGDClassifier(BaseTransformer):
1237
1257
  expected_value=f"Stage area {score_stage_name} successfully created."
1238
1258
  ).validate()
1239
1259
 
1240
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1241
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1260
+ # Use posixpath to construct stage paths
1261
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1262
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1242
1263
  statement_params = telemetry.get_function_usage_statement_params(
1243
1264
  project=_PROJECT,
1244
1265
  subproject=_SUBPROJECT,
@@ -1264,6 +1285,7 @@ class SGDClassifier(BaseTransformer):
1264
1285
  replace=True,
1265
1286
  session=session,
1266
1287
  statement_params=statement_params,
1288
+ anonymous=True
1267
1289
  )
1268
1290
  def score_wrapper_sproc(
1269
1291
  session: Session,
@@ -1271,7 +1293,8 @@ class SGDClassifier(BaseTransformer):
1271
1293
  stage_score_file_name: str,
1272
1294
  input_cols: List[str],
1273
1295
  label_cols: List[str],
1274
- sample_weight_col: Optional[str]
1296
+ sample_weight_col: Optional[str],
1297
+ statement_params: Dict[str, str]
1275
1298
  ) -> float:
1276
1299
  import cloudpickle as cp
1277
1300
  import numpy as np
@@ -1321,14 +1344,14 @@ class SGDClassifier(BaseTransformer):
1321
1344
  api_calls=[Session.call],
1322
1345
  custom_tags=dict([("autogen", True)]),
1323
1346
  )
1324
- score = session.call(
1325
- score_sproc_name,
1347
+ score = score_wrapper_sproc(
1348
+ session,
1326
1349
  query,
1327
1350
  stage_score_file_name,
1328
1351
  identifier.get_unescaped_names(self.input_cols),
1329
1352
  identifier.get_unescaped_names(self.label_cols),
1330
1353
  identifier.get_unescaped_names(self.sample_weight_col),
1331
- statement_params=statement_params,
1354
+ statement_params,
1332
1355
  )
1333
1356
 
1334
1357
  cleanup_temp_files([local_score_file_name])
@@ -1346,18 +1369,20 @@ class SGDClassifier(BaseTransformer):
1346
1369
  if self._sklearn_object._estimator_type == 'classifier':
1347
1370
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1348
1371
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1349
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1372
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1373
+ ([] if self._drop_input_cols else inputs) + outputs)
1350
1374
  # For regressor, the type of predict is float64
1351
1375
  elif self._sklearn_object._estimator_type == 'regressor':
1352
1376
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1353
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1354
-
1377
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1378
+ ([] if self._drop_input_cols else inputs) + outputs)
1355
1379
  for prob_func in PROB_FUNCTIONS:
1356
1380
  if hasattr(self, prob_func):
1357
1381
  output_cols_prefix: str = f"{prob_func}_"
1358
1382
  output_column_names = self._get_output_column_names(output_cols_prefix)
1359
1383
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1360
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1384
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1385
+ ([] if self._drop_input_cols else inputs) + outputs)
1361
1386
 
1362
1387
  @property
1363
1388
  def model_signatures(self) -> Dict[str, ModelSignature]: