snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. snowflake/ml/_internal/env_utils.py +2 -1
  2. snowflake/ml/_internal/file_utils.py +29 -7
  3. snowflake/ml/_internal/telemetry.py +5 -8
  4. snowflake/ml/_internal/utils/uri.py +7 -2
  5. snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
  6. snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
  7. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
  8. snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
  9. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
  10. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
  11. snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
  12. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
  13. snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
  14. snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
  15. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
  16. snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
  17. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
  18. snowflake/ml/model/_deployer.py +14 -27
  19. snowflake/ml/model/_env.py +4 -4
  20. snowflake/ml/model/_handlers/custom.py +14 -2
  21. snowflake/ml/model/_handlers/pytorch.py +186 -0
  22. snowflake/ml/model/_handlers/sklearn.py +14 -9
  23. snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
  24. snowflake/ml/model/_handlers/torchscript.py +180 -0
  25. snowflake/ml/model/_handlers/xgboost.py +19 -9
  26. snowflake/ml/model/_model.py +3 -2
  27. snowflake/ml/model/_model_meta.py +12 -7
  28. snowflake/ml/model/model_signature.py +446 -66
  29. snowflake/ml/model/type_hints.py +23 -4
  30. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
  31. snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
  32. snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
  33. snowflake/ml/modeling/cluster/birch.py +51 -26
  34. snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
  35. snowflake/ml/modeling/cluster/dbscan.py +51 -26
  36. snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
  37. snowflake/ml/modeling/cluster/k_means.py +51 -26
  38. snowflake/ml/modeling/cluster/mean_shift.py +51 -26
  39. snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
  40. snowflake/ml/modeling/cluster/optics.py +51 -26
  41. snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
  42. snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
  43. snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
  44. snowflake/ml/modeling/compose/column_transformer.py +51 -26
  45. snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
  46. snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
  47. snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
  48. snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
  49. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
  50. snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
  51. snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
  52. snowflake/ml/modeling/covariance/oas.py +51 -26
  53. snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
  54. snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
  55. snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
  56. snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
  57. snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
  58. snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
  59. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
  60. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
  61. snowflake/ml/modeling/decomposition/pca.py +51 -26
  62. snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
  63. snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
  64. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
  65. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
  66. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
  67. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
  68. snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
  69. snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
  70. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
  71. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
  72. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
  73. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
  74. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
  75. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
  76. snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
  77. snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
  78. snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
  79. snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
  80. snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
  81. snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
  82. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
  83. snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
  84. snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
  85. snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
  86. snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
  87. snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
  88. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
  89. snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
  90. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
  91. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
  92. snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
  93. snowflake/ml/modeling/impute/knn_imputer.py +51 -26
  94. snowflake/ml/modeling/impute/missing_indicator.py +51 -26
  95. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
  96. snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
  97. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
  98. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
  99. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
  100. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
  101. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
  102. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
  103. snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
  104. snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
  105. snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
  106. snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
  107. snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
  108. snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
  109. snowflake/ml/modeling/linear_model/lars.py +51 -26
  110. snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
  111. snowflake/ml/modeling/linear_model/lasso.py +51 -26
  112. snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
  113. snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
  114. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
  115. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
  116. snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
  117. snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
  118. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
  119. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
  120. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
  121. snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
  122. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
  123. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
  124. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
  125. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
  126. snowflake/ml/modeling/linear_model/perceptron.py +51 -26
  127. snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
  128. snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
  129. snowflake/ml/modeling/linear_model/ridge.py +51 -26
  130. snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
  131. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
  132. snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
  133. snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
  134. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
  135. snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
  136. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
  137. snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
  138. snowflake/ml/modeling/manifold/isomap.py +51 -26
  139. snowflake/ml/modeling/manifold/mds.py +51 -26
  140. snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
  141. snowflake/ml/modeling/manifold/tsne.py +51 -26
  142. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
  143. snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
  144. snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
  145. snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
  146. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
  147. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
  148. snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
  149. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
  150. snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
  151. snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
  152. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
  153. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
  154. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
  155. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
  156. snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
  157. snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
  158. snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
  159. snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
  160. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
  161. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
  162. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
  163. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
  164. snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
  165. snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
  166. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
  167. snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
  168. snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
  169. snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
  170. snowflake/ml/modeling/svm/linear_svc.py +51 -26
  171. snowflake/ml/modeling/svm/linear_svr.py +51 -26
  172. snowflake/ml/modeling/svm/nu_svc.py +51 -26
  173. snowflake/ml/modeling/svm/nu_svr.py +51 -26
  174. snowflake/ml/modeling/svm/svc.py +51 -26
  175. snowflake/ml/modeling/svm/svr.py +51 -26
  176. snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
  177. snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
  178. snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
  179. snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
  180. snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
  181. snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
  182. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
  183. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
  184. snowflake/ml/registry/model_registry.py +74 -56
  185. snowflake/ml/version.py +1 -1
  186. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
  187. snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
  188. snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
  189. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -214,7 +216,6 @@ class HuberRegressor(BaseTransformer):
214
216
  sample_weight_col: Optional[str] = None,
215
217
  ) -> None:
216
218
  super().__init__()
217
- self.id = str(uuid4()).replace("-", "_").upper()
218
219
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
219
220
 
220
221
  self._deps = list(deps)
@@ -239,6 +240,15 @@ class HuberRegressor(BaseTransformer):
239
240
  self.set_drop_input_cols(drop_input_cols)
240
241
  self.set_sample_weight_col(sample_weight_col)
241
242
 
243
+ def _get_rand_id(self) -> str:
244
+ """
245
+ Generate random id to be used in sproc and stage names.
246
+
247
+ Returns:
248
+ Random id string usable in sproc, table, and stage names.
249
+ """
250
+ return str(uuid4()).replace("-", "_").upper()
251
+
242
252
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
243
253
  """
244
254
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -317,7 +327,7 @@ class HuberRegressor(BaseTransformer):
317
327
  cp.dump(self._sklearn_object, local_transform_file)
318
328
 
319
329
  # Create temp stage to run fit.
320
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
330
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
321
331
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
322
332
  SqlResultValidator(
323
333
  session=session,
@@ -330,11 +340,12 @@ class HuberRegressor(BaseTransformer):
330
340
  expected_value=f"Stage area {transform_stage_name} successfully created."
331
341
  ).validate()
332
342
 
333
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
343
+ # Use posixpath to construct stage paths
344
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
345
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
334
346
  local_result_file_name = get_temp_file_path()
335
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
336
347
 
337
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
348
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
338
349
  statement_params = telemetry.get_function_usage_statement_params(
339
350
  project=_PROJECT,
340
351
  subproject=_SUBPROJECT,
@@ -360,6 +371,7 @@ class HuberRegressor(BaseTransformer):
360
371
  replace=True,
361
372
  session=session,
362
373
  statement_params=statement_params,
374
+ anonymous=True
363
375
  )
364
376
  def fit_wrapper_sproc(
365
377
  session: Session,
@@ -368,7 +380,8 @@ class HuberRegressor(BaseTransformer):
368
380
  stage_result_file_name: str,
369
381
  input_cols: List[str],
370
382
  label_cols: List[str],
371
- sample_weight_col: Optional[str]
383
+ sample_weight_col: Optional[str],
384
+ statement_params: Dict[str, str]
372
385
  ) -> str:
373
386
  import cloudpickle as cp
374
387
  import numpy as np
@@ -435,15 +448,15 @@ class HuberRegressor(BaseTransformer):
435
448
  api_calls=[Session.call],
436
449
  custom_tags=dict([("autogen", True)]),
437
450
  )
438
- sproc_export_file_name = session.call(
439
- fit_sproc_name,
451
+ sproc_export_file_name = fit_wrapper_sproc(
452
+ session,
440
453
  query,
441
454
  stage_transform_file_name,
442
455
  stage_result_file_name,
443
456
  identifier.get_unescaped_names(self.input_cols),
444
457
  identifier.get_unescaped_names(self.label_cols),
445
458
  identifier.get_unescaped_names(self.sample_weight_col),
446
- statement_params=statement_params,
459
+ statement_params,
447
460
  )
448
461
 
449
462
  if "|" in sproc_export_file_name:
@@ -453,7 +466,7 @@ class HuberRegressor(BaseTransformer):
453
466
  print("\n".join(fields[1:]))
454
467
 
455
468
  session.file.get(
456
- os.path.join(stage_result_file_name, sproc_export_file_name),
469
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
457
470
  local_result_file_name,
458
471
  statement_params=statement_params
459
472
  )
@@ -499,7 +512,7 @@ class HuberRegressor(BaseTransformer):
499
512
 
500
513
  # Register vectorized UDF for batch inference
501
514
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
502
- safe_id=self.id, method=inference_method)
515
+ safe_id=self._get_rand_id(), method=inference_method)
503
516
 
504
517
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
505
518
  # will try to pickle all of self which fails.
@@ -591,7 +604,7 @@ class HuberRegressor(BaseTransformer):
591
604
  return transformed_pandas_df.to_dict("records")
592
605
 
593
606
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
594
- safe_id=self.id
607
+ safe_id=self._get_rand_id()
595
608
  )
596
609
 
597
610
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -758,11 +771,18 @@ class HuberRegressor(BaseTransformer):
758
771
  Transformed dataset.
759
772
  """
760
773
  if isinstance(dataset, DataFrame):
774
+ expected_type_inferred = "float"
775
+ # when it is classifier, infer the datatype from label columns
776
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
777
+ expected_type_inferred = convert_sp_to_sf_type(
778
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
779
+ )
780
+
761
781
  output_df = self._batch_inference(
762
782
  dataset=dataset,
763
783
  inference_method="predict",
764
784
  expected_output_cols_list=self.output_cols,
765
- expected_output_cols_type="float",
785
+ expected_output_cols_type=expected_type_inferred,
766
786
  )
767
787
  elif isinstance(dataset, pd.DataFrame):
768
788
  output_df = self._sklearn_inference(
@@ -833,10 +853,10 @@ class HuberRegressor(BaseTransformer):
833
853
 
834
854
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
835
855
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
836
- Returns an empty list if current object is not a classifier or not yet fitted.
856
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
837
857
  """
838
858
  if getattr(self._sklearn_object, "classes_", None) is None:
839
- return []
859
+ return [output_cols_prefix]
840
860
 
841
861
  classes = self._sklearn_object.classes_
842
862
  if isinstance(classes, numpy.ndarray):
@@ -1061,7 +1081,7 @@ class HuberRegressor(BaseTransformer):
1061
1081
  cp.dump(self._sklearn_object, local_score_file)
1062
1082
 
1063
1083
  # Create temp stage to run score.
1064
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1084
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1065
1085
  session = dataset._session
1066
1086
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1067
1087
  SqlResultValidator(
@@ -1075,8 +1095,9 @@ class HuberRegressor(BaseTransformer):
1075
1095
  expected_value=f"Stage area {score_stage_name} successfully created."
1076
1096
  ).validate()
1077
1097
 
1078
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1079
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1098
+ # Use posixpath to construct stage paths
1099
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1100
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1080
1101
  statement_params = telemetry.get_function_usage_statement_params(
1081
1102
  project=_PROJECT,
1082
1103
  subproject=_SUBPROJECT,
@@ -1102,6 +1123,7 @@ class HuberRegressor(BaseTransformer):
1102
1123
  replace=True,
1103
1124
  session=session,
1104
1125
  statement_params=statement_params,
1126
+ anonymous=True
1105
1127
  )
1106
1128
  def score_wrapper_sproc(
1107
1129
  session: Session,
@@ -1109,7 +1131,8 @@ class HuberRegressor(BaseTransformer):
1109
1131
  stage_score_file_name: str,
1110
1132
  input_cols: List[str],
1111
1133
  label_cols: List[str],
1112
- sample_weight_col: Optional[str]
1134
+ sample_weight_col: Optional[str],
1135
+ statement_params: Dict[str, str]
1113
1136
  ) -> float:
1114
1137
  import cloudpickle as cp
1115
1138
  import numpy as np
@@ -1159,14 +1182,14 @@ class HuberRegressor(BaseTransformer):
1159
1182
  api_calls=[Session.call],
1160
1183
  custom_tags=dict([("autogen", True)]),
1161
1184
  )
1162
- score = session.call(
1163
- score_sproc_name,
1185
+ score = score_wrapper_sproc(
1186
+ session,
1164
1187
  query,
1165
1188
  stage_score_file_name,
1166
1189
  identifier.get_unescaped_names(self.input_cols),
1167
1190
  identifier.get_unescaped_names(self.label_cols),
1168
1191
  identifier.get_unescaped_names(self.sample_weight_col),
1169
- statement_params=statement_params,
1192
+ statement_params,
1170
1193
  )
1171
1194
 
1172
1195
  cleanup_temp_files([local_score_file_name])
@@ -1184,18 +1207,20 @@ class HuberRegressor(BaseTransformer):
1184
1207
  if self._sklearn_object._estimator_type == 'classifier':
1185
1208
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1186
1209
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1187
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1210
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1211
+ ([] if self._drop_input_cols else inputs) + outputs)
1188
1212
  # For regressor, the type of predict is float64
1189
1213
  elif self._sklearn_object._estimator_type == 'regressor':
1190
1214
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1191
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1192
-
1215
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1216
+ ([] if self._drop_input_cols else inputs) + outputs)
1193
1217
  for prob_func in PROB_FUNCTIONS:
1194
1218
  if hasattr(self, prob_func):
1195
1219
  output_cols_prefix: str = f"{prob_func}_"
1196
1220
  output_column_names = self._get_output_column_names(output_cols_prefix)
1197
1221
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1198
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1222
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1223
+ ([] if self._drop_input_cols else inputs) + outputs)
1199
1224
 
1200
1225
  @property
1201
1226
  def model_signatures(self) -> Dict[str, ModelSignature]:
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -239,7 +241,6 @@ class Lars(BaseTransformer):
239
241
  sample_weight_col: Optional[str] = None,
240
242
  ) -> None:
241
243
  super().__init__()
242
- self.id = str(uuid4()).replace("-", "_").upper()
243
244
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
244
245
 
245
246
  self._deps = list(deps)
@@ -268,6 +269,15 @@ class Lars(BaseTransformer):
268
269
  self.set_drop_input_cols(drop_input_cols)
269
270
  self.set_sample_weight_col(sample_weight_col)
270
271
 
272
+ def _get_rand_id(self) -> str:
273
+ """
274
+ Generate random id to be used in sproc and stage names.
275
+
276
+ Returns:
277
+ Random id string usable in sproc, table, and stage names.
278
+ """
279
+ return str(uuid4()).replace("-", "_").upper()
280
+
271
281
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
272
282
  """
273
283
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -346,7 +356,7 @@ class Lars(BaseTransformer):
346
356
  cp.dump(self._sklearn_object, local_transform_file)
347
357
 
348
358
  # Create temp stage to run fit.
349
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
359
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
350
360
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
351
361
  SqlResultValidator(
352
362
  session=session,
@@ -359,11 +369,12 @@ class Lars(BaseTransformer):
359
369
  expected_value=f"Stage area {transform_stage_name} successfully created."
360
370
  ).validate()
361
371
 
362
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
372
+ # Use posixpath to construct stage paths
373
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
374
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
363
375
  local_result_file_name = get_temp_file_path()
364
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
365
376
 
366
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
377
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
367
378
  statement_params = telemetry.get_function_usage_statement_params(
368
379
  project=_PROJECT,
369
380
  subproject=_SUBPROJECT,
@@ -389,6 +400,7 @@ class Lars(BaseTransformer):
389
400
  replace=True,
390
401
  session=session,
391
402
  statement_params=statement_params,
403
+ anonymous=True
392
404
  )
393
405
  def fit_wrapper_sproc(
394
406
  session: Session,
@@ -397,7 +409,8 @@ class Lars(BaseTransformer):
397
409
  stage_result_file_name: str,
398
410
  input_cols: List[str],
399
411
  label_cols: List[str],
400
- sample_weight_col: Optional[str]
412
+ sample_weight_col: Optional[str],
413
+ statement_params: Dict[str, str]
401
414
  ) -> str:
402
415
  import cloudpickle as cp
403
416
  import numpy as np
@@ -464,15 +477,15 @@ class Lars(BaseTransformer):
464
477
  api_calls=[Session.call],
465
478
  custom_tags=dict([("autogen", True)]),
466
479
  )
467
- sproc_export_file_name = session.call(
468
- fit_sproc_name,
480
+ sproc_export_file_name = fit_wrapper_sproc(
481
+ session,
469
482
  query,
470
483
  stage_transform_file_name,
471
484
  stage_result_file_name,
472
485
  identifier.get_unescaped_names(self.input_cols),
473
486
  identifier.get_unescaped_names(self.label_cols),
474
487
  identifier.get_unescaped_names(self.sample_weight_col),
475
- statement_params=statement_params,
488
+ statement_params,
476
489
  )
477
490
 
478
491
  if "|" in sproc_export_file_name:
@@ -482,7 +495,7 @@ class Lars(BaseTransformer):
482
495
  print("\n".join(fields[1:]))
483
496
 
484
497
  session.file.get(
485
- os.path.join(stage_result_file_name, sproc_export_file_name),
498
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
486
499
  local_result_file_name,
487
500
  statement_params=statement_params
488
501
  )
@@ -528,7 +541,7 @@ class Lars(BaseTransformer):
528
541
 
529
542
  # Register vectorized UDF for batch inference
530
543
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
531
- safe_id=self.id, method=inference_method)
544
+ safe_id=self._get_rand_id(), method=inference_method)
532
545
 
533
546
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
534
547
  # will try to pickle all of self which fails.
@@ -620,7 +633,7 @@ class Lars(BaseTransformer):
620
633
  return transformed_pandas_df.to_dict("records")
621
634
 
622
635
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
623
- safe_id=self.id
636
+ safe_id=self._get_rand_id()
624
637
  )
625
638
 
626
639
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -787,11 +800,18 @@ class Lars(BaseTransformer):
787
800
  Transformed dataset.
788
801
  """
789
802
  if isinstance(dataset, DataFrame):
803
+ expected_type_inferred = "float"
804
+ # when it is classifier, infer the datatype from label columns
805
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
806
+ expected_type_inferred = convert_sp_to_sf_type(
807
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
808
+ )
809
+
790
810
  output_df = self._batch_inference(
791
811
  dataset=dataset,
792
812
  inference_method="predict",
793
813
  expected_output_cols_list=self.output_cols,
794
- expected_output_cols_type="float",
814
+ expected_output_cols_type=expected_type_inferred,
795
815
  )
796
816
  elif isinstance(dataset, pd.DataFrame):
797
817
  output_df = self._sklearn_inference(
@@ -862,10 +882,10 @@ class Lars(BaseTransformer):
862
882
 
863
883
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
864
884
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
865
- Returns an empty list if current object is not a classifier or not yet fitted.
885
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
866
886
  """
867
887
  if getattr(self._sklearn_object, "classes_", None) is None:
868
- return []
888
+ return [output_cols_prefix]
869
889
 
870
890
  classes = self._sklearn_object.classes_
871
891
  if isinstance(classes, numpy.ndarray):
@@ -1090,7 +1110,7 @@ class Lars(BaseTransformer):
1090
1110
  cp.dump(self._sklearn_object, local_score_file)
1091
1111
 
1092
1112
  # Create temp stage to run score.
1093
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1113
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1094
1114
  session = dataset._session
1095
1115
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1096
1116
  SqlResultValidator(
@@ -1104,8 +1124,9 @@ class Lars(BaseTransformer):
1104
1124
  expected_value=f"Stage area {score_stage_name} successfully created."
1105
1125
  ).validate()
1106
1126
 
1107
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1108
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1127
+ # Use posixpath to construct stage paths
1128
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1129
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1109
1130
  statement_params = telemetry.get_function_usage_statement_params(
1110
1131
  project=_PROJECT,
1111
1132
  subproject=_SUBPROJECT,
@@ -1131,6 +1152,7 @@ class Lars(BaseTransformer):
1131
1152
  replace=True,
1132
1153
  session=session,
1133
1154
  statement_params=statement_params,
1155
+ anonymous=True
1134
1156
  )
1135
1157
  def score_wrapper_sproc(
1136
1158
  session: Session,
@@ -1138,7 +1160,8 @@ class Lars(BaseTransformer):
1138
1160
  stage_score_file_name: str,
1139
1161
  input_cols: List[str],
1140
1162
  label_cols: List[str],
1141
- sample_weight_col: Optional[str]
1163
+ sample_weight_col: Optional[str],
1164
+ statement_params: Dict[str, str]
1142
1165
  ) -> float:
1143
1166
  import cloudpickle as cp
1144
1167
  import numpy as np
@@ -1188,14 +1211,14 @@ class Lars(BaseTransformer):
1188
1211
  api_calls=[Session.call],
1189
1212
  custom_tags=dict([("autogen", True)]),
1190
1213
  )
1191
- score = session.call(
1192
- score_sproc_name,
1214
+ score = score_wrapper_sproc(
1215
+ session,
1193
1216
  query,
1194
1217
  stage_score_file_name,
1195
1218
  identifier.get_unescaped_names(self.input_cols),
1196
1219
  identifier.get_unescaped_names(self.label_cols),
1197
1220
  identifier.get_unescaped_names(self.sample_weight_col),
1198
- statement_params=statement_params,
1221
+ statement_params,
1199
1222
  )
1200
1223
 
1201
1224
  cleanup_temp_files([local_score_file_name])
@@ -1213,18 +1236,20 @@ class Lars(BaseTransformer):
1213
1236
  if self._sklearn_object._estimator_type == 'classifier':
1214
1237
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1215
1238
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1216
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1239
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1240
+ ([] if self._drop_input_cols else inputs) + outputs)
1217
1241
  # For regressor, the type of predict is float64
1218
1242
  elif self._sklearn_object._estimator_type == 'regressor':
1219
1243
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1220
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1221
-
1244
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1245
+ ([] if self._drop_input_cols else inputs) + outputs)
1222
1246
  for prob_func in PROB_FUNCTIONS:
1223
1247
  if hasattr(self, prob_func):
1224
1248
  output_cols_prefix: str = f"{prob_func}_"
1225
1249
  output_column_names = self._get_output_column_names(output_cols_prefix)
1226
1250
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1227
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1251
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1252
+ ([] if self._drop_input_cols else inputs) + outputs)
1228
1253
 
1229
1254
  @property
1230
1255
  def model_signatures(self) -> Dict[str, ModelSignature]: