snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. snowflake/ml/_internal/env_utils.py +2 -1
  2. snowflake/ml/_internal/file_utils.py +29 -7
  3. snowflake/ml/_internal/telemetry.py +5 -8
  4. snowflake/ml/_internal/utils/uri.py +7 -2
  5. snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
  6. snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
  7. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
  8. snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
  9. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
  10. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
  11. snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
  12. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
  13. snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
  14. snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
  15. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
  16. snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
  17. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
  18. snowflake/ml/model/_deployer.py +14 -27
  19. snowflake/ml/model/_env.py +4 -4
  20. snowflake/ml/model/_handlers/custom.py +14 -2
  21. snowflake/ml/model/_handlers/pytorch.py +186 -0
  22. snowflake/ml/model/_handlers/sklearn.py +14 -9
  23. snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
  24. snowflake/ml/model/_handlers/torchscript.py +180 -0
  25. snowflake/ml/model/_handlers/xgboost.py +19 -9
  26. snowflake/ml/model/_model.py +3 -2
  27. snowflake/ml/model/_model_meta.py +12 -7
  28. snowflake/ml/model/model_signature.py +446 -66
  29. snowflake/ml/model/type_hints.py +23 -4
  30. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
  31. snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
  32. snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
  33. snowflake/ml/modeling/cluster/birch.py +51 -26
  34. snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
  35. snowflake/ml/modeling/cluster/dbscan.py +51 -26
  36. snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
  37. snowflake/ml/modeling/cluster/k_means.py +51 -26
  38. snowflake/ml/modeling/cluster/mean_shift.py +51 -26
  39. snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
  40. snowflake/ml/modeling/cluster/optics.py +51 -26
  41. snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
  42. snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
  43. snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
  44. snowflake/ml/modeling/compose/column_transformer.py +51 -26
  45. snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
  46. snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
  47. snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
  48. snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
  49. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
  50. snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
  51. snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
  52. snowflake/ml/modeling/covariance/oas.py +51 -26
  53. snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
  54. snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
  55. snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
  56. snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
  57. snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
  58. snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
  59. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
  60. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
  61. snowflake/ml/modeling/decomposition/pca.py +51 -26
  62. snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
  63. snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
  64. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
  65. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
  66. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
  67. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
  68. snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
  69. snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
  70. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
  71. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
  72. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
  73. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
  74. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
  75. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
  76. snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
  77. snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
  78. snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
  79. snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
  80. snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
  81. snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
  82. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
  83. snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
  84. snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
  85. snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
  86. snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
  87. snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
  88. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
  89. snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
  90. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
  91. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
  92. snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
  93. snowflake/ml/modeling/impute/knn_imputer.py +51 -26
  94. snowflake/ml/modeling/impute/missing_indicator.py +51 -26
  95. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
  96. snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
  97. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
  98. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
  99. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
  100. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
  101. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
  102. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
  103. snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
  104. snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
  105. snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
  106. snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
  107. snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
  108. snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
  109. snowflake/ml/modeling/linear_model/lars.py +51 -26
  110. snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
  111. snowflake/ml/modeling/linear_model/lasso.py +51 -26
  112. snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
  113. snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
  114. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
  115. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
  116. snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
  117. snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
  118. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
  119. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
  120. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
  121. snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
  122. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
  123. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
  124. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
  125. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
  126. snowflake/ml/modeling/linear_model/perceptron.py +51 -26
  127. snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
  128. snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
  129. snowflake/ml/modeling/linear_model/ridge.py +51 -26
  130. snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
  131. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
  132. snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
  133. snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
  134. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
  135. snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
  136. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
  137. snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
  138. snowflake/ml/modeling/manifold/isomap.py +51 -26
  139. snowflake/ml/modeling/manifold/mds.py +51 -26
  140. snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
  141. snowflake/ml/modeling/manifold/tsne.py +51 -26
  142. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
  143. snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
  144. snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
  145. snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
  146. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
  147. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
  148. snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
  149. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
  150. snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
  151. snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
  152. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
  153. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
  154. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
  155. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
  156. snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
  157. snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
  158. snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
  159. snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
  160. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
  161. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
  162. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
  163. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
  164. snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
  165. snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
  166. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
  167. snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
  168. snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
  169. snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
  170. snowflake/ml/modeling/svm/linear_svc.py +51 -26
  171. snowflake/ml/modeling/svm/linear_svr.py +51 -26
  172. snowflake/ml/modeling/svm/nu_svc.py +51 -26
  173. snowflake/ml/modeling/svm/nu_svr.py +51 -26
  174. snowflake/ml/modeling/svm/svc.py +51 -26
  175. snowflake/ml/modeling/svm/svr.py +51 -26
  176. snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
  177. snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
  178. snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
  179. snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
  180. snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
  181. snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
  182. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
  183. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
  184. snowflake/ml/registry/model_registry.py +74 -56
  185. snowflake/ml/version.py +1 -1
  186. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
  187. snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
  188. snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
  189. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -261,7 +263,6 @@ class RidgeClassifier(BaseTransformer):
261
263
  sample_weight_col: Optional[str] = None,
262
264
  ) -> None:
263
265
  super().__init__()
264
- self.id = str(uuid4()).replace("-", "_").upper()
265
266
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
266
267
 
267
268
  self._deps = list(deps)
@@ -289,6 +290,15 @@ class RidgeClassifier(BaseTransformer):
289
290
  self.set_drop_input_cols(drop_input_cols)
290
291
  self.set_sample_weight_col(sample_weight_col)
291
292
 
293
+ def _get_rand_id(self) -> str:
294
+ """
295
+ Generate random id to be used in sproc and stage names.
296
+
297
+ Returns:
298
+ Random id string usable in sproc, table, and stage names.
299
+ """
300
+ return str(uuid4()).replace("-", "_").upper()
301
+
292
302
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
293
303
  """
294
304
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -367,7 +377,7 @@ class RidgeClassifier(BaseTransformer):
367
377
  cp.dump(self._sklearn_object, local_transform_file)
368
378
 
369
379
  # Create temp stage to run fit.
370
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
380
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
371
381
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
372
382
  SqlResultValidator(
373
383
  session=session,
@@ -380,11 +390,12 @@ class RidgeClassifier(BaseTransformer):
380
390
  expected_value=f"Stage area {transform_stage_name} successfully created."
381
391
  ).validate()
382
392
 
383
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
393
+ # Use posixpath to construct stage paths
394
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
395
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
384
396
  local_result_file_name = get_temp_file_path()
385
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
386
397
 
387
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
398
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
388
399
  statement_params = telemetry.get_function_usage_statement_params(
389
400
  project=_PROJECT,
390
401
  subproject=_SUBPROJECT,
@@ -410,6 +421,7 @@ class RidgeClassifier(BaseTransformer):
410
421
  replace=True,
411
422
  session=session,
412
423
  statement_params=statement_params,
424
+ anonymous=True
413
425
  )
414
426
  def fit_wrapper_sproc(
415
427
  session: Session,
@@ -418,7 +430,8 @@ class RidgeClassifier(BaseTransformer):
418
430
  stage_result_file_name: str,
419
431
  input_cols: List[str],
420
432
  label_cols: List[str],
421
- sample_weight_col: Optional[str]
433
+ sample_weight_col: Optional[str],
434
+ statement_params: Dict[str, str]
422
435
  ) -> str:
423
436
  import cloudpickle as cp
424
437
  import numpy as np
@@ -485,15 +498,15 @@ class RidgeClassifier(BaseTransformer):
485
498
  api_calls=[Session.call],
486
499
  custom_tags=dict([("autogen", True)]),
487
500
  )
488
- sproc_export_file_name = session.call(
489
- fit_sproc_name,
501
+ sproc_export_file_name = fit_wrapper_sproc(
502
+ session,
490
503
  query,
491
504
  stage_transform_file_name,
492
505
  stage_result_file_name,
493
506
  identifier.get_unescaped_names(self.input_cols),
494
507
  identifier.get_unescaped_names(self.label_cols),
495
508
  identifier.get_unescaped_names(self.sample_weight_col),
496
- statement_params=statement_params,
509
+ statement_params,
497
510
  )
498
511
 
499
512
  if "|" in sproc_export_file_name:
@@ -503,7 +516,7 @@ class RidgeClassifier(BaseTransformer):
503
516
  print("\n".join(fields[1:]))
504
517
 
505
518
  session.file.get(
506
- os.path.join(stage_result_file_name, sproc_export_file_name),
519
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
507
520
  local_result_file_name,
508
521
  statement_params=statement_params
509
522
  )
@@ -549,7 +562,7 @@ class RidgeClassifier(BaseTransformer):
549
562
 
550
563
  # Register vectorized UDF for batch inference
551
564
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
552
- safe_id=self.id, method=inference_method)
565
+ safe_id=self._get_rand_id(), method=inference_method)
553
566
 
554
567
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
555
568
  # will try to pickle all of self which fails.
@@ -641,7 +654,7 @@ class RidgeClassifier(BaseTransformer):
641
654
  return transformed_pandas_df.to_dict("records")
642
655
 
643
656
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
644
- safe_id=self.id
657
+ safe_id=self._get_rand_id()
645
658
  )
646
659
 
647
660
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -808,11 +821,18 @@ class RidgeClassifier(BaseTransformer):
808
821
  Transformed dataset.
809
822
  """
810
823
  if isinstance(dataset, DataFrame):
824
+ expected_type_inferred = ""
825
+ # when it is classifier, infer the datatype from label columns
826
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
827
+ expected_type_inferred = convert_sp_to_sf_type(
828
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
829
+ )
830
+
811
831
  output_df = self._batch_inference(
812
832
  dataset=dataset,
813
833
  inference_method="predict",
814
834
  expected_output_cols_list=self.output_cols,
815
- expected_output_cols_type="",
835
+ expected_output_cols_type=expected_type_inferred,
816
836
  )
817
837
  elif isinstance(dataset, pd.DataFrame):
818
838
  output_df = self._sklearn_inference(
@@ -883,10 +903,10 @@ class RidgeClassifier(BaseTransformer):
883
903
 
884
904
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
885
905
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
886
- Returns an empty list if current object is not a classifier or not yet fitted.
906
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
887
907
  """
888
908
  if getattr(self._sklearn_object, "classes_", None) is None:
889
- return []
909
+ return [output_cols_prefix]
890
910
 
891
911
  classes = self._sklearn_object.classes_
892
912
  if isinstance(classes, numpy.ndarray):
@@ -1113,7 +1133,7 @@ class RidgeClassifier(BaseTransformer):
1113
1133
  cp.dump(self._sklearn_object, local_score_file)
1114
1134
 
1115
1135
  # Create temp stage to run score.
1116
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1136
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1117
1137
  session = dataset._session
1118
1138
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1119
1139
  SqlResultValidator(
@@ -1127,8 +1147,9 @@ class RidgeClassifier(BaseTransformer):
1127
1147
  expected_value=f"Stage area {score_stage_name} successfully created."
1128
1148
  ).validate()
1129
1149
 
1130
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1131
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1150
+ # Use posixpath to construct stage paths
1151
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1152
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1132
1153
  statement_params = telemetry.get_function_usage_statement_params(
1133
1154
  project=_PROJECT,
1134
1155
  subproject=_SUBPROJECT,
@@ -1154,6 +1175,7 @@ class RidgeClassifier(BaseTransformer):
1154
1175
  replace=True,
1155
1176
  session=session,
1156
1177
  statement_params=statement_params,
1178
+ anonymous=True
1157
1179
  )
1158
1180
  def score_wrapper_sproc(
1159
1181
  session: Session,
@@ -1161,7 +1183,8 @@ class RidgeClassifier(BaseTransformer):
1161
1183
  stage_score_file_name: str,
1162
1184
  input_cols: List[str],
1163
1185
  label_cols: List[str],
1164
- sample_weight_col: Optional[str]
1186
+ sample_weight_col: Optional[str],
1187
+ statement_params: Dict[str, str]
1165
1188
  ) -> float:
1166
1189
  import cloudpickle as cp
1167
1190
  import numpy as np
@@ -1211,14 +1234,14 @@ class RidgeClassifier(BaseTransformer):
1211
1234
  api_calls=[Session.call],
1212
1235
  custom_tags=dict([("autogen", True)]),
1213
1236
  )
1214
- score = session.call(
1215
- score_sproc_name,
1237
+ score = score_wrapper_sproc(
1238
+ session,
1216
1239
  query,
1217
1240
  stage_score_file_name,
1218
1241
  identifier.get_unescaped_names(self.input_cols),
1219
1242
  identifier.get_unescaped_names(self.label_cols),
1220
1243
  identifier.get_unescaped_names(self.sample_weight_col),
1221
- statement_params=statement_params,
1244
+ statement_params,
1222
1245
  )
1223
1246
 
1224
1247
  cleanup_temp_files([local_score_file_name])
@@ -1236,18 +1259,20 @@ class RidgeClassifier(BaseTransformer):
1236
1259
  if self._sklearn_object._estimator_type == 'classifier':
1237
1260
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1238
1261
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1239
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1262
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1263
+ ([] if self._drop_input_cols else inputs) + outputs)
1240
1264
  # For regressor, the type of predict is float64
1241
1265
  elif self._sklearn_object._estimator_type == 'regressor':
1242
1266
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1243
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1244
-
1267
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1268
+ ([] if self._drop_input_cols else inputs) + outputs)
1245
1269
  for prob_func in PROB_FUNCTIONS:
1246
1270
  if hasattr(self, prob_func):
1247
1271
  output_cols_prefix: str = f"{prob_func}_"
1248
1272
  output_column_names = self._get_output_column_names(output_cols_prefix)
1249
1273
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1250
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1274
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1275
+ ([] if self._drop_input_cols else inputs) + outputs)
1251
1276
 
1252
1277
  @property
1253
1278
  def model_signatures(self) -> Dict[str, ModelSignature]:
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -230,7 +232,6 @@ class RidgeClassifierCV(BaseTransformer):
230
232
  sample_weight_col: Optional[str] = None,
231
233
  ) -> None:
232
234
  super().__init__()
233
- self.id = str(uuid4()).replace("-", "_").upper()
234
235
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
235
236
 
236
237
  self._deps = list(deps)
@@ -255,6 +256,15 @@ class RidgeClassifierCV(BaseTransformer):
255
256
  self.set_drop_input_cols(drop_input_cols)
256
257
  self.set_sample_weight_col(sample_weight_col)
257
258
 
259
+ def _get_rand_id(self) -> str:
260
+ """
261
+ Generate random id to be used in sproc and stage names.
262
+
263
+ Returns:
264
+ Random id string usable in sproc, table, and stage names.
265
+ """
266
+ return str(uuid4()).replace("-", "_").upper()
267
+
258
268
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
259
269
  """
260
270
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -333,7 +343,7 @@ class RidgeClassifierCV(BaseTransformer):
333
343
  cp.dump(self._sklearn_object, local_transform_file)
334
344
 
335
345
  # Create temp stage to run fit.
336
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
346
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
337
347
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
338
348
  SqlResultValidator(
339
349
  session=session,
@@ -346,11 +356,12 @@ class RidgeClassifierCV(BaseTransformer):
346
356
  expected_value=f"Stage area {transform_stage_name} successfully created."
347
357
  ).validate()
348
358
 
349
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
359
+ # Use posixpath to construct stage paths
360
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
361
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
350
362
  local_result_file_name = get_temp_file_path()
351
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
352
363
 
353
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
364
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
354
365
  statement_params = telemetry.get_function_usage_statement_params(
355
366
  project=_PROJECT,
356
367
  subproject=_SUBPROJECT,
@@ -376,6 +387,7 @@ class RidgeClassifierCV(BaseTransformer):
376
387
  replace=True,
377
388
  session=session,
378
389
  statement_params=statement_params,
390
+ anonymous=True
379
391
  )
380
392
  def fit_wrapper_sproc(
381
393
  session: Session,
@@ -384,7 +396,8 @@ class RidgeClassifierCV(BaseTransformer):
384
396
  stage_result_file_name: str,
385
397
  input_cols: List[str],
386
398
  label_cols: List[str],
387
- sample_weight_col: Optional[str]
399
+ sample_weight_col: Optional[str],
400
+ statement_params: Dict[str, str]
388
401
  ) -> str:
389
402
  import cloudpickle as cp
390
403
  import numpy as np
@@ -451,15 +464,15 @@ class RidgeClassifierCV(BaseTransformer):
451
464
  api_calls=[Session.call],
452
465
  custom_tags=dict([("autogen", True)]),
453
466
  )
454
- sproc_export_file_name = session.call(
455
- fit_sproc_name,
467
+ sproc_export_file_name = fit_wrapper_sproc(
468
+ session,
456
469
  query,
457
470
  stage_transform_file_name,
458
471
  stage_result_file_name,
459
472
  identifier.get_unescaped_names(self.input_cols),
460
473
  identifier.get_unescaped_names(self.label_cols),
461
474
  identifier.get_unescaped_names(self.sample_weight_col),
462
- statement_params=statement_params,
475
+ statement_params,
463
476
  )
464
477
 
465
478
  if "|" in sproc_export_file_name:
@@ -469,7 +482,7 @@ class RidgeClassifierCV(BaseTransformer):
469
482
  print("\n".join(fields[1:]))
470
483
 
471
484
  session.file.get(
472
- os.path.join(stage_result_file_name, sproc_export_file_name),
485
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
473
486
  local_result_file_name,
474
487
  statement_params=statement_params
475
488
  )
@@ -515,7 +528,7 @@ class RidgeClassifierCV(BaseTransformer):
515
528
 
516
529
  # Register vectorized UDF for batch inference
517
530
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
518
- safe_id=self.id, method=inference_method)
531
+ safe_id=self._get_rand_id(), method=inference_method)
519
532
 
520
533
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
521
534
  # will try to pickle all of self which fails.
@@ -607,7 +620,7 @@ class RidgeClassifierCV(BaseTransformer):
607
620
  return transformed_pandas_df.to_dict("records")
608
621
 
609
622
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
610
- safe_id=self.id
623
+ safe_id=self._get_rand_id()
611
624
  )
612
625
 
613
626
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -774,11 +787,18 @@ class RidgeClassifierCV(BaseTransformer):
774
787
  Transformed dataset.
775
788
  """
776
789
  if isinstance(dataset, DataFrame):
790
+ expected_type_inferred = ""
791
+ # when it is classifier, infer the datatype from label columns
792
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
793
+ expected_type_inferred = convert_sp_to_sf_type(
794
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
795
+ )
796
+
777
797
  output_df = self._batch_inference(
778
798
  dataset=dataset,
779
799
  inference_method="predict",
780
800
  expected_output_cols_list=self.output_cols,
781
- expected_output_cols_type="",
801
+ expected_output_cols_type=expected_type_inferred,
782
802
  )
783
803
  elif isinstance(dataset, pd.DataFrame):
784
804
  output_df = self._sklearn_inference(
@@ -849,10 +869,10 @@ class RidgeClassifierCV(BaseTransformer):
849
869
 
850
870
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
851
871
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
852
- Returns an empty list if current object is not a classifier or not yet fitted.
872
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
853
873
  """
854
874
  if getattr(self._sklearn_object, "classes_", None) is None:
855
- return []
875
+ return [output_cols_prefix]
856
876
 
857
877
  classes = self._sklearn_object.classes_
858
878
  if isinstance(classes, numpy.ndarray):
@@ -1079,7 +1099,7 @@ class RidgeClassifierCV(BaseTransformer):
1079
1099
  cp.dump(self._sklearn_object, local_score_file)
1080
1100
 
1081
1101
  # Create temp stage to run score.
1082
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1102
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1083
1103
  session = dataset._session
1084
1104
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1085
1105
  SqlResultValidator(
@@ -1093,8 +1113,9 @@ class RidgeClassifierCV(BaseTransformer):
1093
1113
  expected_value=f"Stage area {score_stage_name} successfully created."
1094
1114
  ).validate()
1095
1115
 
1096
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1097
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1116
+ # Use posixpath to construct stage paths
1117
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1118
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1098
1119
  statement_params = telemetry.get_function_usage_statement_params(
1099
1120
  project=_PROJECT,
1100
1121
  subproject=_SUBPROJECT,
@@ -1120,6 +1141,7 @@ class RidgeClassifierCV(BaseTransformer):
1120
1141
  replace=True,
1121
1142
  session=session,
1122
1143
  statement_params=statement_params,
1144
+ anonymous=True
1123
1145
  )
1124
1146
  def score_wrapper_sproc(
1125
1147
  session: Session,
@@ -1127,7 +1149,8 @@ class RidgeClassifierCV(BaseTransformer):
1127
1149
  stage_score_file_name: str,
1128
1150
  input_cols: List[str],
1129
1151
  label_cols: List[str],
1130
- sample_weight_col: Optional[str]
1152
+ sample_weight_col: Optional[str],
1153
+ statement_params: Dict[str, str]
1131
1154
  ) -> float:
1132
1155
  import cloudpickle as cp
1133
1156
  import numpy as np
@@ -1177,14 +1200,14 @@ class RidgeClassifierCV(BaseTransformer):
1177
1200
  api_calls=[Session.call],
1178
1201
  custom_tags=dict([("autogen", True)]),
1179
1202
  )
1180
- score = session.call(
1181
- score_sproc_name,
1203
+ score = score_wrapper_sproc(
1204
+ session,
1182
1205
  query,
1183
1206
  stage_score_file_name,
1184
1207
  identifier.get_unescaped_names(self.input_cols),
1185
1208
  identifier.get_unescaped_names(self.label_cols),
1186
1209
  identifier.get_unescaped_names(self.sample_weight_col),
1187
- statement_params=statement_params,
1210
+ statement_params,
1188
1211
  )
1189
1212
 
1190
1213
  cleanup_temp_files([local_score_file_name])
@@ -1202,18 +1225,20 @@ class RidgeClassifierCV(BaseTransformer):
1202
1225
  if self._sklearn_object._estimator_type == 'classifier':
1203
1226
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1204
1227
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1205
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1228
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1229
+ ([] if self._drop_input_cols else inputs) + outputs)
1206
1230
  # For regressor, the type of predict is float64
1207
1231
  elif self._sklearn_object._estimator_type == 'regressor':
1208
1232
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1209
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1210
-
1233
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1234
+ ([] if self._drop_input_cols else inputs) + outputs)
1211
1235
  for prob_func in PROB_FUNCTIONS:
1212
1236
  if hasattr(self, prob_func):
1213
1237
  output_cols_prefix: str = f"{prob_func}_"
1214
1238
  output_column_names = self._get_output_column_names(output_cols_prefix)
1215
1239
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1216
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1240
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1241
+ ([] if self._drop_input_cols else inputs) + outputs)
1217
1242
 
1218
1243
  @property
1219
1244
  def model_signatures(self) -> Dict[str, ModelSignature]: