snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. snowflake/ml/_internal/env_utils.py +2 -1
  2. snowflake/ml/_internal/file_utils.py +29 -7
  3. snowflake/ml/_internal/telemetry.py +5 -8
  4. snowflake/ml/_internal/utils/uri.py +7 -2
  5. snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
  6. snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
  7. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
  8. snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
  9. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
  10. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
  11. snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
  12. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
  13. snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
  14. snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
  15. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
  16. snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
  17. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
  18. snowflake/ml/model/_deployer.py +14 -27
  19. snowflake/ml/model/_env.py +4 -4
  20. snowflake/ml/model/_handlers/custom.py +14 -2
  21. snowflake/ml/model/_handlers/pytorch.py +186 -0
  22. snowflake/ml/model/_handlers/sklearn.py +14 -9
  23. snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
  24. snowflake/ml/model/_handlers/torchscript.py +180 -0
  25. snowflake/ml/model/_handlers/xgboost.py +19 -9
  26. snowflake/ml/model/_model.py +3 -2
  27. snowflake/ml/model/_model_meta.py +12 -7
  28. snowflake/ml/model/model_signature.py +446 -66
  29. snowflake/ml/model/type_hints.py +23 -4
  30. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
  31. snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
  32. snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
  33. snowflake/ml/modeling/cluster/birch.py +51 -26
  34. snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
  35. snowflake/ml/modeling/cluster/dbscan.py +51 -26
  36. snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
  37. snowflake/ml/modeling/cluster/k_means.py +51 -26
  38. snowflake/ml/modeling/cluster/mean_shift.py +51 -26
  39. snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
  40. snowflake/ml/modeling/cluster/optics.py +51 -26
  41. snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
  42. snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
  43. snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
  44. snowflake/ml/modeling/compose/column_transformer.py +51 -26
  45. snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
  46. snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
  47. snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
  48. snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
  49. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
  50. snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
  51. snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
  52. snowflake/ml/modeling/covariance/oas.py +51 -26
  53. snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
  54. snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
  55. snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
  56. snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
  57. snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
  58. snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
  59. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
  60. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
  61. snowflake/ml/modeling/decomposition/pca.py +51 -26
  62. snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
  63. snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
  64. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
  65. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
  66. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
  67. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
  68. snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
  69. snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
  70. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
  71. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
  72. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
  73. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
  74. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
  75. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
  76. snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
  77. snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
  78. snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
  79. snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
  80. snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
  81. snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
  82. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
  83. snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
  84. snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
  85. snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
  86. snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
  87. snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
  88. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
  89. snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
  90. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
  91. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
  92. snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
  93. snowflake/ml/modeling/impute/knn_imputer.py +51 -26
  94. snowflake/ml/modeling/impute/missing_indicator.py +51 -26
  95. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
  96. snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
  97. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
  98. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
  99. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
  100. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
  101. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
  102. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
  103. snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
  104. snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
  105. snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
  106. snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
  107. snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
  108. snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
  109. snowflake/ml/modeling/linear_model/lars.py +51 -26
  110. snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
  111. snowflake/ml/modeling/linear_model/lasso.py +51 -26
  112. snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
  113. snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
  114. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
  115. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
  116. snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
  117. snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
  118. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
  119. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
  120. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
  121. snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
  122. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
  123. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
  124. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
  125. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
  126. snowflake/ml/modeling/linear_model/perceptron.py +51 -26
  127. snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
  128. snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
  129. snowflake/ml/modeling/linear_model/ridge.py +51 -26
  130. snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
  131. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
  132. snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
  133. snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
  134. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
  135. snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
  136. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
  137. snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
  138. snowflake/ml/modeling/manifold/isomap.py +51 -26
  139. snowflake/ml/modeling/manifold/mds.py +51 -26
  140. snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
  141. snowflake/ml/modeling/manifold/tsne.py +51 -26
  142. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
  143. snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
  144. snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
  145. snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
  146. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
  147. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
  148. snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
  149. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
  150. snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
  151. snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
  152. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
  153. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
  154. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
  155. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
  156. snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
  157. snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
  158. snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
  159. snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
  160. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
  161. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
  162. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
  163. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
  164. snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
  165. snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
  166. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
  167. snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
  168. snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
  169. snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
  170. snowflake/ml/modeling/svm/linear_svc.py +51 -26
  171. snowflake/ml/modeling/svm/linear_svr.py +51 -26
  172. snowflake/ml/modeling/svm/nu_svc.py +51 -26
  173. snowflake/ml/modeling/svm/nu_svr.py +51 -26
  174. snowflake/ml/modeling/svm/svc.py +51 -26
  175. snowflake/ml/modeling/svm/svr.py +51 -26
  176. snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
  177. snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
  178. snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
  179. snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
  180. snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
  181. snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
  182. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
  183. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
  184. snowflake/ml/registry/model_registry.py +74 -56
  185. snowflake/ml/version.py +1 -1
  186. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
  187. snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
  188. snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
  189. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -238,7 +240,6 @@ class BayesianRidge(BaseTransformer):
238
240
  sample_weight_col: Optional[str] = None,
239
241
  ) -> None:
240
242
  super().__init__()
241
- self.id = str(uuid4()).replace("-", "_").upper()
242
243
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
243
244
 
244
245
  self._deps = list(deps)
@@ -269,6 +270,15 @@ class BayesianRidge(BaseTransformer):
269
270
  self.set_drop_input_cols(drop_input_cols)
270
271
  self.set_sample_weight_col(sample_weight_col)
271
272
 
273
+ def _get_rand_id(self) -> str:
274
+ """
275
+ Generate random id to be used in sproc and stage names.
276
+
277
+ Returns:
278
+ Random id string usable in sproc, table, and stage names.
279
+ """
280
+ return str(uuid4()).replace("-", "_").upper()
281
+
272
282
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
273
283
  """
274
284
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -347,7 +357,7 @@ class BayesianRidge(BaseTransformer):
347
357
  cp.dump(self._sklearn_object, local_transform_file)
348
358
 
349
359
  # Create temp stage to run fit.
350
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
360
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
351
361
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
352
362
  SqlResultValidator(
353
363
  session=session,
@@ -360,11 +370,12 @@ class BayesianRidge(BaseTransformer):
360
370
  expected_value=f"Stage area {transform_stage_name} successfully created."
361
371
  ).validate()
362
372
 
363
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
373
+ # Use posixpath to construct stage paths
374
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
375
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
364
376
  local_result_file_name = get_temp_file_path()
365
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
366
377
 
367
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
378
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
368
379
  statement_params = telemetry.get_function_usage_statement_params(
369
380
  project=_PROJECT,
370
381
  subproject=_SUBPROJECT,
@@ -390,6 +401,7 @@ class BayesianRidge(BaseTransformer):
390
401
  replace=True,
391
402
  session=session,
392
403
  statement_params=statement_params,
404
+ anonymous=True
393
405
  )
394
406
  def fit_wrapper_sproc(
395
407
  session: Session,
@@ -398,7 +410,8 @@ class BayesianRidge(BaseTransformer):
398
410
  stage_result_file_name: str,
399
411
  input_cols: List[str],
400
412
  label_cols: List[str],
401
- sample_weight_col: Optional[str]
413
+ sample_weight_col: Optional[str],
414
+ statement_params: Dict[str, str]
402
415
  ) -> str:
403
416
  import cloudpickle as cp
404
417
  import numpy as np
@@ -465,15 +478,15 @@ class BayesianRidge(BaseTransformer):
465
478
  api_calls=[Session.call],
466
479
  custom_tags=dict([("autogen", True)]),
467
480
  )
468
- sproc_export_file_name = session.call(
469
- fit_sproc_name,
481
+ sproc_export_file_name = fit_wrapper_sproc(
482
+ session,
470
483
  query,
471
484
  stage_transform_file_name,
472
485
  stage_result_file_name,
473
486
  identifier.get_unescaped_names(self.input_cols),
474
487
  identifier.get_unescaped_names(self.label_cols),
475
488
  identifier.get_unescaped_names(self.sample_weight_col),
476
- statement_params=statement_params,
489
+ statement_params,
477
490
  )
478
491
 
479
492
  if "|" in sproc_export_file_name:
@@ -483,7 +496,7 @@ class BayesianRidge(BaseTransformer):
483
496
  print("\n".join(fields[1:]))
484
497
 
485
498
  session.file.get(
486
- os.path.join(stage_result_file_name, sproc_export_file_name),
499
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
487
500
  local_result_file_name,
488
501
  statement_params=statement_params
489
502
  )
@@ -529,7 +542,7 @@ class BayesianRidge(BaseTransformer):
529
542
 
530
543
  # Register vectorized UDF for batch inference
531
544
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
532
- safe_id=self.id, method=inference_method)
545
+ safe_id=self._get_rand_id(), method=inference_method)
533
546
 
534
547
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
535
548
  # will try to pickle all of self which fails.
@@ -621,7 +634,7 @@ class BayesianRidge(BaseTransformer):
621
634
  return transformed_pandas_df.to_dict("records")
622
635
 
623
636
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
624
- safe_id=self.id
637
+ safe_id=self._get_rand_id()
625
638
  )
626
639
 
627
640
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -788,11 +801,18 @@ class BayesianRidge(BaseTransformer):
788
801
  Transformed dataset.
789
802
  """
790
803
  if isinstance(dataset, DataFrame):
804
+ expected_type_inferred = "float"
805
+ # when it is classifier, infer the datatype from label columns
806
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
807
+ expected_type_inferred = convert_sp_to_sf_type(
808
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
809
+ )
810
+
791
811
  output_df = self._batch_inference(
792
812
  dataset=dataset,
793
813
  inference_method="predict",
794
814
  expected_output_cols_list=self.output_cols,
795
- expected_output_cols_type="float",
815
+ expected_output_cols_type=expected_type_inferred,
796
816
  )
797
817
  elif isinstance(dataset, pd.DataFrame):
798
818
  output_df = self._sklearn_inference(
@@ -863,10 +883,10 @@ class BayesianRidge(BaseTransformer):
863
883
 
864
884
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
865
885
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
866
- Returns an empty list if current object is not a classifier or not yet fitted.
886
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
867
887
  """
868
888
  if getattr(self._sklearn_object, "classes_", None) is None:
869
- return []
889
+ return [output_cols_prefix]
870
890
 
871
891
  classes = self._sklearn_object.classes_
872
892
  if isinstance(classes, numpy.ndarray):
@@ -1091,7 +1111,7 @@ class BayesianRidge(BaseTransformer):
1091
1111
  cp.dump(self._sklearn_object, local_score_file)
1092
1112
 
1093
1113
  # Create temp stage to run score.
1094
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1114
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1095
1115
  session = dataset._session
1096
1116
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1097
1117
  SqlResultValidator(
@@ -1105,8 +1125,9 @@ class BayesianRidge(BaseTransformer):
1105
1125
  expected_value=f"Stage area {score_stage_name} successfully created."
1106
1126
  ).validate()
1107
1127
 
1108
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1109
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1128
+ # Use posixpath to construct stage paths
1129
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1130
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1110
1131
  statement_params = telemetry.get_function_usage_statement_params(
1111
1132
  project=_PROJECT,
1112
1133
  subproject=_SUBPROJECT,
@@ -1132,6 +1153,7 @@ class BayesianRidge(BaseTransformer):
1132
1153
  replace=True,
1133
1154
  session=session,
1134
1155
  statement_params=statement_params,
1156
+ anonymous=True
1135
1157
  )
1136
1158
  def score_wrapper_sproc(
1137
1159
  session: Session,
@@ -1139,7 +1161,8 @@ class BayesianRidge(BaseTransformer):
1139
1161
  stage_score_file_name: str,
1140
1162
  input_cols: List[str],
1141
1163
  label_cols: List[str],
1142
- sample_weight_col: Optional[str]
1164
+ sample_weight_col: Optional[str],
1165
+ statement_params: Dict[str, str]
1143
1166
  ) -> float:
1144
1167
  import cloudpickle as cp
1145
1168
  import numpy as np
@@ -1189,14 +1212,14 @@ class BayesianRidge(BaseTransformer):
1189
1212
  api_calls=[Session.call],
1190
1213
  custom_tags=dict([("autogen", True)]),
1191
1214
  )
1192
- score = session.call(
1193
- score_sproc_name,
1215
+ score = score_wrapper_sproc(
1216
+ session,
1194
1217
  query,
1195
1218
  stage_score_file_name,
1196
1219
  identifier.get_unescaped_names(self.input_cols),
1197
1220
  identifier.get_unescaped_names(self.label_cols),
1198
1221
  identifier.get_unescaped_names(self.sample_weight_col),
1199
- statement_params=statement_params,
1222
+ statement_params,
1200
1223
  )
1201
1224
 
1202
1225
  cleanup_temp_files([local_score_file_name])
@@ -1214,18 +1237,20 @@ class BayesianRidge(BaseTransformer):
1214
1237
  if self._sklearn_object._estimator_type == 'classifier':
1215
1238
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1216
1239
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1217
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1240
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1241
+ ([] if self._drop_input_cols else inputs) + outputs)
1218
1242
  # For regressor, the type of predict is float64
1219
1243
  elif self._sklearn_object._estimator_type == 'regressor':
1220
1244
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1221
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1222
-
1245
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1246
+ ([] if self._drop_input_cols else inputs) + outputs)
1223
1247
  for prob_func in PROB_FUNCTIONS:
1224
1248
  if hasattr(self, prob_func):
1225
1249
  output_cols_prefix: str = f"{prob_func}_"
1226
1250
  output_column_names = self._get_output_column_names(output_cols_prefix)
1227
1251
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1228
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1252
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1253
+ ([] if self._drop_input_cols else inputs) + outputs)
1229
1254
 
1230
1255
  @property
1231
1256
  def model_signatures(self) -> Dict[str, ModelSignature]:
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -245,7 +247,6 @@ class ElasticNet(BaseTransformer):
245
247
  sample_weight_col: Optional[str] = None,
246
248
  ) -> None:
247
249
  super().__init__()
248
- self.id = str(uuid4()).replace("-", "_").upper()
249
250
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
250
251
 
251
252
  self._deps = list(deps)
@@ -275,6 +276,15 @@ class ElasticNet(BaseTransformer):
275
276
  self.set_drop_input_cols(drop_input_cols)
276
277
  self.set_sample_weight_col(sample_weight_col)
277
278
 
279
+ def _get_rand_id(self) -> str:
280
+ """
281
+ Generate random id to be used in sproc and stage names.
282
+
283
+ Returns:
284
+ Random id string usable in sproc, table, and stage names.
285
+ """
286
+ return str(uuid4()).replace("-", "_").upper()
287
+
278
288
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
279
289
  """
280
290
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -353,7 +363,7 @@ class ElasticNet(BaseTransformer):
353
363
  cp.dump(self._sklearn_object, local_transform_file)
354
364
 
355
365
  # Create temp stage to run fit.
356
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
366
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
357
367
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
358
368
  SqlResultValidator(
359
369
  session=session,
@@ -366,11 +376,12 @@ class ElasticNet(BaseTransformer):
366
376
  expected_value=f"Stage area {transform_stage_name} successfully created."
367
377
  ).validate()
368
378
 
369
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
379
+ # Use posixpath to construct stage paths
380
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
381
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
370
382
  local_result_file_name = get_temp_file_path()
371
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
372
383
 
373
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
384
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
374
385
  statement_params = telemetry.get_function_usage_statement_params(
375
386
  project=_PROJECT,
376
387
  subproject=_SUBPROJECT,
@@ -396,6 +407,7 @@ class ElasticNet(BaseTransformer):
396
407
  replace=True,
397
408
  session=session,
398
409
  statement_params=statement_params,
410
+ anonymous=True
399
411
  )
400
412
  def fit_wrapper_sproc(
401
413
  session: Session,
@@ -404,7 +416,8 @@ class ElasticNet(BaseTransformer):
404
416
  stage_result_file_name: str,
405
417
  input_cols: List[str],
406
418
  label_cols: List[str],
407
- sample_weight_col: Optional[str]
419
+ sample_weight_col: Optional[str],
420
+ statement_params: Dict[str, str]
408
421
  ) -> str:
409
422
  import cloudpickle as cp
410
423
  import numpy as np
@@ -471,15 +484,15 @@ class ElasticNet(BaseTransformer):
471
484
  api_calls=[Session.call],
472
485
  custom_tags=dict([("autogen", True)]),
473
486
  )
474
- sproc_export_file_name = session.call(
475
- fit_sproc_name,
487
+ sproc_export_file_name = fit_wrapper_sproc(
488
+ session,
476
489
  query,
477
490
  stage_transform_file_name,
478
491
  stage_result_file_name,
479
492
  identifier.get_unescaped_names(self.input_cols),
480
493
  identifier.get_unescaped_names(self.label_cols),
481
494
  identifier.get_unescaped_names(self.sample_weight_col),
482
- statement_params=statement_params,
495
+ statement_params,
483
496
  )
484
497
 
485
498
  if "|" in sproc_export_file_name:
@@ -489,7 +502,7 @@ class ElasticNet(BaseTransformer):
489
502
  print("\n".join(fields[1:]))
490
503
 
491
504
  session.file.get(
492
- os.path.join(stage_result_file_name, sproc_export_file_name),
505
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
493
506
  local_result_file_name,
494
507
  statement_params=statement_params
495
508
  )
@@ -535,7 +548,7 @@ class ElasticNet(BaseTransformer):
535
548
 
536
549
  # Register vectorized UDF for batch inference
537
550
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
538
- safe_id=self.id, method=inference_method)
551
+ safe_id=self._get_rand_id(), method=inference_method)
539
552
 
540
553
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
541
554
  # will try to pickle all of self which fails.
@@ -627,7 +640,7 @@ class ElasticNet(BaseTransformer):
627
640
  return transformed_pandas_df.to_dict("records")
628
641
 
629
642
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
630
- safe_id=self.id
643
+ safe_id=self._get_rand_id()
631
644
  )
632
645
 
633
646
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -794,11 +807,18 @@ class ElasticNet(BaseTransformer):
794
807
  Transformed dataset.
795
808
  """
796
809
  if isinstance(dataset, DataFrame):
810
+ expected_type_inferred = "float"
811
+ # when it is classifier, infer the datatype from label columns
812
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
813
+ expected_type_inferred = convert_sp_to_sf_type(
814
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
815
+ )
816
+
797
817
  output_df = self._batch_inference(
798
818
  dataset=dataset,
799
819
  inference_method="predict",
800
820
  expected_output_cols_list=self.output_cols,
801
- expected_output_cols_type="float",
821
+ expected_output_cols_type=expected_type_inferred,
802
822
  )
803
823
  elif isinstance(dataset, pd.DataFrame):
804
824
  output_df = self._sklearn_inference(
@@ -869,10 +889,10 @@ class ElasticNet(BaseTransformer):
869
889
 
870
890
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
871
891
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
872
- Returns an empty list if current object is not a classifier or not yet fitted.
892
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
873
893
  """
874
894
  if getattr(self._sklearn_object, "classes_", None) is None:
875
- return []
895
+ return [output_cols_prefix]
876
896
 
877
897
  classes = self._sklearn_object.classes_
878
898
  if isinstance(classes, numpy.ndarray):
@@ -1097,7 +1117,7 @@ class ElasticNet(BaseTransformer):
1097
1117
  cp.dump(self._sklearn_object, local_score_file)
1098
1118
 
1099
1119
  # Create temp stage to run score.
1100
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1120
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1101
1121
  session = dataset._session
1102
1122
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1103
1123
  SqlResultValidator(
@@ -1111,8 +1131,9 @@ class ElasticNet(BaseTransformer):
1111
1131
  expected_value=f"Stage area {score_stage_name} successfully created."
1112
1132
  ).validate()
1113
1133
 
1114
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1115
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1134
+ # Use posixpath to construct stage paths
1135
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1136
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1116
1137
  statement_params = telemetry.get_function_usage_statement_params(
1117
1138
  project=_PROJECT,
1118
1139
  subproject=_SUBPROJECT,
@@ -1138,6 +1159,7 @@ class ElasticNet(BaseTransformer):
1138
1159
  replace=True,
1139
1160
  session=session,
1140
1161
  statement_params=statement_params,
1162
+ anonymous=True
1141
1163
  )
1142
1164
  def score_wrapper_sproc(
1143
1165
  session: Session,
@@ -1145,7 +1167,8 @@ class ElasticNet(BaseTransformer):
1145
1167
  stage_score_file_name: str,
1146
1168
  input_cols: List[str],
1147
1169
  label_cols: List[str],
1148
- sample_weight_col: Optional[str]
1170
+ sample_weight_col: Optional[str],
1171
+ statement_params: Dict[str, str]
1149
1172
  ) -> float:
1150
1173
  import cloudpickle as cp
1151
1174
  import numpy as np
@@ -1195,14 +1218,14 @@ class ElasticNet(BaseTransformer):
1195
1218
  api_calls=[Session.call],
1196
1219
  custom_tags=dict([("autogen", True)]),
1197
1220
  )
1198
- score = session.call(
1199
- score_sproc_name,
1221
+ score = score_wrapper_sproc(
1222
+ session,
1200
1223
  query,
1201
1224
  stage_score_file_name,
1202
1225
  identifier.get_unescaped_names(self.input_cols),
1203
1226
  identifier.get_unescaped_names(self.label_cols),
1204
1227
  identifier.get_unescaped_names(self.sample_weight_col),
1205
- statement_params=statement_params,
1228
+ statement_params,
1206
1229
  )
1207
1230
 
1208
1231
  cleanup_temp_files([local_score_file_name])
@@ -1220,18 +1243,20 @@ class ElasticNet(BaseTransformer):
1220
1243
  if self._sklearn_object._estimator_type == 'classifier':
1221
1244
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1222
1245
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1223
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1246
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1247
+ ([] if self._drop_input_cols else inputs) + outputs)
1224
1248
  # For regressor, the type of predict is float64
1225
1249
  elif self._sklearn_object._estimator_type == 'regressor':
1226
1250
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1227
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1228
-
1251
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1252
+ ([] if self._drop_input_cols else inputs) + outputs)
1229
1253
  for prob_func in PROB_FUNCTIONS:
1230
1254
  if hasattr(self, prob_func):
1231
1255
  output_cols_prefix: str = f"{prob_func}_"
1232
1256
  output_column_names = self._get_output_column_names(output_cols_prefix)
1233
1257
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1234
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1258
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1259
+ ([] if self._drop_input_cols else inputs) + outputs)
1235
1260
 
1236
1261
  @property
1237
1262
  def model_signatures(self) -> Dict[str, ModelSignature]: