snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. snowflake/ml/_internal/env_utils.py +2 -1
  2. snowflake/ml/_internal/file_utils.py +29 -7
  3. snowflake/ml/_internal/telemetry.py +5 -8
  4. snowflake/ml/_internal/utils/uri.py +7 -2
  5. snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
  6. snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
  7. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
  8. snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
  9. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
  10. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
  11. snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
  12. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
  13. snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
  14. snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
  15. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
  16. snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
  17. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
  18. snowflake/ml/model/_deployer.py +14 -27
  19. snowflake/ml/model/_env.py +4 -4
  20. snowflake/ml/model/_handlers/custom.py +14 -2
  21. snowflake/ml/model/_handlers/pytorch.py +186 -0
  22. snowflake/ml/model/_handlers/sklearn.py +14 -9
  23. snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
  24. snowflake/ml/model/_handlers/torchscript.py +180 -0
  25. snowflake/ml/model/_handlers/xgboost.py +19 -9
  26. snowflake/ml/model/_model.py +3 -2
  27. snowflake/ml/model/_model_meta.py +12 -7
  28. snowflake/ml/model/model_signature.py +446 -66
  29. snowflake/ml/model/type_hints.py +23 -4
  30. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
  31. snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
  32. snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
  33. snowflake/ml/modeling/cluster/birch.py +51 -26
  34. snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
  35. snowflake/ml/modeling/cluster/dbscan.py +51 -26
  36. snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
  37. snowflake/ml/modeling/cluster/k_means.py +51 -26
  38. snowflake/ml/modeling/cluster/mean_shift.py +51 -26
  39. snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
  40. snowflake/ml/modeling/cluster/optics.py +51 -26
  41. snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
  42. snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
  43. snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
  44. snowflake/ml/modeling/compose/column_transformer.py +51 -26
  45. snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
  46. snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
  47. snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
  48. snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
  49. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
  50. snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
  51. snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
  52. snowflake/ml/modeling/covariance/oas.py +51 -26
  53. snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
  54. snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
  55. snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
  56. snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
  57. snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
  58. snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
  59. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
  60. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
  61. snowflake/ml/modeling/decomposition/pca.py +51 -26
  62. snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
  63. snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
  64. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
  65. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
  66. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
  67. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
  68. snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
  69. snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
  70. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
  71. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
  72. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
  73. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
  74. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
  75. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
  76. snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
  77. snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
  78. snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
  79. snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
  80. snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
  81. snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
  82. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
  83. snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
  84. snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
  85. snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
  86. snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
  87. snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
  88. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
  89. snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
  90. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
  91. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
  92. snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
  93. snowflake/ml/modeling/impute/knn_imputer.py +51 -26
  94. snowflake/ml/modeling/impute/missing_indicator.py +51 -26
  95. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
  96. snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
  97. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
  98. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
  99. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
  100. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
  101. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
  102. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
  103. snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
  104. snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
  105. snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
  106. snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
  107. snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
  108. snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
  109. snowflake/ml/modeling/linear_model/lars.py +51 -26
  110. snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
  111. snowflake/ml/modeling/linear_model/lasso.py +51 -26
  112. snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
  113. snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
  114. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
  115. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
  116. snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
  117. snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
  118. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
  119. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
  120. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
  121. snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
  122. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
  123. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
  124. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
  125. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
  126. snowflake/ml/modeling/linear_model/perceptron.py +51 -26
  127. snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
  128. snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
  129. snowflake/ml/modeling/linear_model/ridge.py +51 -26
  130. snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
  131. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
  132. snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
  133. snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
  134. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
  135. snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
  136. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
  137. snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
  138. snowflake/ml/modeling/manifold/isomap.py +51 -26
  139. snowflake/ml/modeling/manifold/mds.py +51 -26
  140. snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
  141. snowflake/ml/modeling/manifold/tsne.py +51 -26
  142. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
  143. snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
  144. snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
  145. snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
  146. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
  147. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
  148. snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
  149. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
  150. snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
  151. snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
  152. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
  153. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
  154. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
  155. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
  156. snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
  157. snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
  158. snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
  159. snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
  160. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
  161. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
  162. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
  163. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
  164. snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
  165. snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
  166. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
  167. snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
  168. snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
  169. snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
  170. snowflake/ml/modeling/svm/linear_svc.py +51 -26
  171. snowflake/ml/modeling/svm/linear_svr.py +51 -26
  172. snowflake/ml/modeling/svm/nu_svc.py +51 -26
  173. snowflake/ml/modeling/svm/nu_svr.py +51 -26
  174. snowflake/ml/modeling/svm/svc.py +51 -26
  175. snowflake/ml/modeling/svm/svr.py +51 -26
  176. snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
  177. snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
  178. snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
  179. snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
  180. snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
  181. snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
  182. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
  183. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
  184. snowflake/ml/registry/model_registry.py +74 -56
  185. snowflake/ml/version.py +1 -1
  186. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
  187. snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
  188. snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
  189. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -264,7 +266,6 @@ class LassoCV(BaseTransformer):
264
266
  sample_weight_col: Optional[str] = None,
265
267
  ) -> None:
266
268
  super().__init__()
267
- self.id = str(uuid4()).replace("-", "_").upper()
268
269
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
269
270
 
270
271
  self._deps = list(deps)
@@ -297,6 +298,15 @@ class LassoCV(BaseTransformer):
297
298
  self.set_drop_input_cols(drop_input_cols)
298
299
  self.set_sample_weight_col(sample_weight_col)
299
300
 
301
+ def _get_rand_id(self) -> str:
302
+ """
303
+ Generate random id to be used in sproc and stage names.
304
+
305
+ Returns:
306
+ Random id string usable in sproc, table, and stage names.
307
+ """
308
+ return str(uuid4()).replace("-", "_").upper()
309
+
300
310
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
301
311
  """
302
312
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -375,7 +385,7 @@ class LassoCV(BaseTransformer):
375
385
  cp.dump(self._sklearn_object, local_transform_file)
376
386
 
377
387
  # Create temp stage to run fit.
378
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
388
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
379
389
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
380
390
  SqlResultValidator(
381
391
  session=session,
@@ -388,11 +398,12 @@ class LassoCV(BaseTransformer):
388
398
  expected_value=f"Stage area {transform_stage_name} successfully created."
389
399
  ).validate()
390
400
 
391
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
401
+ # Use posixpath to construct stage paths
402
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
403
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
392
404
  local_result_file_name = get_temp_file_path()
393
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
394
405
 
395
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
406
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
396
407
  statement_params = telemetry.get_function_usage_statement_params(
397
408
  project=_PROJECT,
398
409
  subproject=_SUBPROJECT,
@@ -418,6 +429,7 @@ class LassoCV(BaseTransformer):
418
429
  replace=True,
419
430
  session=session,
420
431
  statement_params=statement_params,
432
+ anonymous=True
421
433
  )
422
434
  def fit_wrapper_sproc(
423
435
  session: Session,
@@ -426,7 +438,8 @@ class LassoCV(BaseTransformer):
426
438
  stage_result_file_name: str,
427
439
  input_cols: List[str],
428
440
  label_cols: List[str],
429
- sample_weight_col: Optional[str]
441
+ sample_weight_col: Optional[str],
442
+ statement_params: Dict[str, str]
430
443
  ) -> str:
431
444
  import cloudpickle as cp
432
445
  import numpy as np
@@ -493,15 +506,15 @@ class LassoCV(BaseTransformer):
493
506
  api_calls=[Session.call],
494
507
  custom_tags=dict([("autogen", True)]),
495
508
  )
496
- sproc_export_file_name = session.call(
497
- fit_sproc_name,
509
+ sproc_export_file_name = fit_wrapper_sproc(
510
+ session,
498
511
  query,
499
512
  stage_transform_file_name,
500
513
  stage_result_file_name,
501
514
  identifier.get_unescaped_names(self.input_cols),
502
515
  identifier.get_unescaped_names(self.label_cols),
503
516
  identifier.get_unescaped_names(self.sample_weight_col),
504
- statement_params=statement_params,
517
+ statement_params,
505
518
  )
506
519
 
507
520
  if "|" in sproc_export_file_name:
@@ -511,7 +524,7 @@ class LassoCV(BaseTransformer):
511
524
  print("\n".join(fields[1:]))
512
525
 
513
526
  session.file.get(
514
- os.path.join(stage_result_file_name, sproc_export_file_name),
527
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
515
528
  local_result_file_name,
516
529
  statement_params=statement_params
517
530
  )
@@ -557,7 +570,7 @@ class LassoCV(BaseTransformer):
557
570
 
558
571
  # Register vectorized UDF for batch inference
559
572
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
560
- safe_id=self.id, method=inference_method)
573
+ safe_id=self._get_rand_id(), method=inference_method)
561
574
 
562
575
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
563
576
  # will try to pickle all of self which fails.
@@ -649,7 +662,7 @@ class LassoCV(BaseTransformer):
649
662
  return transformed_pandas_df.to_dict("records")
650
663
 
651
664
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
652
- safe_id=self.id
665
+ safe_id=self._get_rand_id()
653
666
  )
654
667
 
655
668
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -816,11 +829,18 @@ class LassoCV(BaseTransformer):
816
829
  Transformed dataset.
817
830
  """
818
831
  if isinstance(dataset, DataFrame):
832
+ expected_type_inferred = "float"
833
+ # when it is classifier, infer the datatype from label columns
834
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
835
+ expected_type_inferred = convert_sp_to_sf_type(
836
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
837
+ )
838
+
819
839
  output_df = self._batch_inference(
820
840
  dataset=dataset,
821
841
  inference_method="predict",
822
842
  expected_output_cols_list=self.output_cols,
823
- expected_output_cols_type="float",
843
+ expected_output_cols_type=expected_type_inferred,
824
844
  )
825
845
  elif isinstance(dataset, pd.DataFrame):
826
846
  output_df = self._sklearn_inference(
@@ -891,10 +911,10 @@ class LassoCV(BaseTransformer):
891
911
 
892
912
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
893
913
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
894
- Returns an empty list if current object is not a classifier or not yet fitted.
914
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
895
915
  """
896
916
  if getattr(self._sklearn_object, "classes_", None) is None:
897
- return []
917
+ return [output_cols_prefix]
898
918
 
899
919
  classes = self._sklearn_object.classes_
900
920
  if isinstance(classes, numpy.ndarray):
@@ -1119,7 +1139,7 @@ class LassoCV(BaseTransformer):
1119
1139
  cp.dump(self._sklearn_object, local_score_file)
1120
1140
 
1121
1141
  # Create temp stage to run score.
1122
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1142
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1123
1143
  session = dataset._session
1124
1144
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1125
1145
  SqlResultValidator(
@@ -1133,8 +1153,9 @@ class LassoCV(BaseTransformer):
1133
1153
  expected_value=f"Stage area {score_stage_name} successfully created."
1134
1154
  ).validate()
1135
1155
 
1136
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1137
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1156
+ # Use posixpath to construct stage paths
1157
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1158
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1138
1159
  statement_params = telemetry.get_function_usage_statement_params(
1139
1160
  project=_PROJECT,
1140
1161
  subproject=_SUBPROJECT,
@@ -1160,6 +1181,7 @@ class LassoCV(BaseTransformer):
1160
1181
  replace=True,
1161
1182
  session=session,
1162
1183
  statement_params=statement_params,
1184
+ anonymous=True
1163
1185
  )
1164
1186
  def score_wrapper_sproc(
1165
1187
  session: Session,
@@ -1167,7 +1189,8 @@ class LassoCV(BaseTransformer):
1167
1189
  stage_score_file_name: str,
1168
1190
  input_cols: List[str],
1169
1191
  label_cols: List[str],
1170
- sample_weight_col: Optional[str]
1192
+ sample_weight_col: Optional[str],
1193
+ statement_params: Dict[str, str]
1171
1194
  ) -> float:
1172
1195
  import cloudpickle as cp
1173
1196
  import numpy as np
@@ -1217,14 +1240,14 @@ class LassoCV(BaseTransformer):
1217
1240
  api_calls=[Session.call],
1218
1241
  custom_tags=dict([("autogen", True)]),
1219
1242
  )
1220
- score = session.call(
1221
- score_sproc_name,
1243
+ score = score_wrapper_sproc(
1244
+ session,
1222
1245
  query,
1223
1246
  stage_score_file_name,
1224
1247
  identifier.get_unescaped_names(self.input_cols),
1225
1248
  identifier.get_unescaped_names(self.label_cols),
1226
1249
  identifier.get_unescaped_names(self.sample_weight_col),
1227
- statement_params=statement_params,
1250
+ statement_params,
1228
1251
  )
1229
1252
 
1230
1253
  cleanup_temp_files([local_score_file_name])
@@ -1242,18 +1265,20 @@ class LassoCV(BaseTransformer):
1242
1265
  if self._sklearn_object._estimator_type == 'classifier':
1243
1266
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1244
1267
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1245
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1268
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1269
+ ([] if self._drop_input_cols else inputs) + outputs)
1246
1270
  # For regressor, the type of predict is float64
1247
1271
  elif self._sklearn_object._estimator_type == 'regressor':
1248
1272
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1249
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1250
-
1273
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1274
+ ([] if self._drop_input_cols else inputs) + outputs)
1251
1275
  for prob_func in PROB_FUNCTIONS:
1252
1276
  if hasattr(self, prob_func):
1253
1277
  output_cols_prefix: str = f"{prob_func}_"
1254
1278
  output_column_names = self._get_output_column_names(output_cols_prefix)
1255
1279
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1256
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1280
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1281
+ ([] if self._drop_input_cols else inputs) + outputs)
1257
1282
 
1258
1283
  @property
1259
1284
  def model_signatures(self) -> Dict[str, ModelSignature]:
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -258,7 +260,6 @@ class LassoLars(BaseTransformer):
258
260
  sample_weight_col: Optional[str] = None,
259
261
  ) -> None:
260
262
  super().__init__()
261
- self.id = str(uuid4()).replace("-", "_").upper()
262
263
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
263
264
 
264
265
  self._deps = list(deps)
@@ -289,6 +290,15 @@ class LassoLars(BaseTransformer):
289
290
  self.set_drop_input_cols(drop_input_cols)
290
291
  self.set_sample_weight_col(sample_weight_col)
291
292
 
293
+ def _get_rand_id(self) -> str:
294
+ """
295
+ Generate random id to be used in sproc and stage names.
296
+
297
+ Returns:
298
+ Random id string usable in sproc, table, and stage names.
299
+ """
300
+ return str(uuid4()).replace("-", "_").upper()
301
+
292
302
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
293
303
  """
294
304
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -367,7 +377,7 @@ class LassoLars(BaseTransformer):
367
377
  cp.dump(self._sklearn_object, local_transform_file)
368
378
 
369
379
  # Create temp stage to run fit.
370
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
380
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
371
381
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
372
382
  SqlResultValidator(
373
383
  session=session,
@@ -380,11 +390,12 @@ class LassoLars(BaseTransformer):
380
390
  expected_value=f"Stage area {transform_stage_name} successfully created."
381
391
  ).validate()
382
392
 
383
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
393
+ # Use posixpath to construct stage paths
394
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
395
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
384
396
  local_result_file_name = get_temp_file_path()
385
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
386
397
 
387
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
398
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
388
399
  statement_params = telemetry.get_function_usage_statement_params(
389
400
  project=_PROJECT,
390
401
  subproject=_SUBPROJECT,
@@ -410,6 +421,7 @@ class LassoLars(BaseTransformer):
410
421
  replace=True,
411
422
  session=session,
412
423
  statement_params=statement_params,
424
+ anonymous=True
413
425
  )
414
426
  def fit_wrapper_sproc(
415
427
  session: Session,
@@ -418,7 +430,8 @@ class LassoLars(BaseTransformer):
418
430
  stage_result_file_name: str,
419
431
  input_cols: List[str],
420
432
  label_cols: List[str],
421
- sample_weight_col: Optional[str]
433
+ sample_weight_col: Optional[str],
434
+ statement_params: Dict[str, str]
422
435
  ) -> str:
423
436
  import cloudpickle as cp
424
437
  import numpy as np
@@ -485,15 +498,15 @@ class LassoLars(BaseTransformer):
485
498
  api_calls=[Session.call],
486
499
  custom_tags=dict([("autogen", True)]),
487
500
  )
488
- sproc_export_file_name = session.call(
489
- fit_sproc_name,
501
+ sproc_export_file_name = fit_wrapper_sproc(
502
+ session,
490
503
  query,
491
504
  stage_transform_file_name,
492
505
  stage_result_file_name,
493
506
  identifier.get_unescaped_names(self.input_cols),
494
507
  identifier.get_unescaped_names(self.label_cols),
495
508
  identifier.get_unescaped_names(self.sample_weight_col),
496
- statement_params=statement_params,
509
+ statement_params,
497
510
  )
498
511
 
499
512
  if "|" in sproc_export_file_name:
@@ -503,7 +516,7 @@ class LassoLars(BaseTransformer):
503
516
  print("\n".join(fields[1:]))
504
517
 
505
518
  session.file.get(
506
- os.path.join(stage_result_file_name, sproc_export_file_name),
519
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
507
520
  local_result_file_name,
508
521
  statement_params=statement_params
509
522
  )
@@ -549,7 +562,7 @@ class LassoLars(BaseTransformer):
549
562
 
550
563
  # Register vectorized UDF for batch inference
551
564
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
552
- safe_id=self.id, method=inference_method)
565
+ safe_id=self._get_rand_id(), method=inference_method)
553
566
 
554
567
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
555
568
  # will try to pickle all of self which fails.
@@ -641,7 +654,7 @@ class LassoLars(BaseTransformer):
641
654
  return transformed_pandas_df.to_dict("records")
642
655
 
643
656
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
644
- safe_id=self.id
657
+ safe_id=self._get_rand_id()
645
658
  )
646
659
 
647
660
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -808,11 +821,18 @@ class LassoLars(BaseTransformer):
808
821
  Transformed dataset.
809
822
  """
810
823
  if isinstance(dataset, DataFrame):
824
+ expected_type_inferred = "float"
825
+ # when it is classifier, infer the datatype from label columns
826
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
827
+ expected_type_inferred = convert_sp_to_sf_type(
828
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
829
+ )
830
+
811
831
  output_df = self._batch_inference(
812
832
  dataset=dataset,
813
833
  inference_method="predict",
814
834
  expected_output_cols_list=self.output_cols,
815
- expected_output_cols_type="float",
835
+ expected_output_cols_type=expected_type_inferred,
816
836
  )
817
837
  elif isinstance(dataset, pd.DataFrame):
818
838
  output_df = self._sklearn_inference(
@@ -883,10 +903,10 @@ class LassoLars(BaseTransformer):
883
903
 
884
904
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
885
905
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
886
- Returns an empty list if current object is not a classifier or not yet fitted.
906
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
887
907
  """
888
908
  if getattr(self._sklearn_object, "classes_", None) is None:
889
- return []
909
+ return [output_cols_prefix]
890
910
 
891
911
  classes = self._sklearn_object.classes_
892
912
  if isinstance(classes, numpy.ndarray):
@@ -1111,7 +1131,7 @@ class LassoLars(BaseTransformer):
1111
1131
  cp.dump(self._sklearn_object, local_score_file)
1112
1132
 
1113
1133
  # Create temp stage to run score.
1114
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1134
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1115
1135
  session = dataset._session
1116
1136
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1117
1137
  SqlResultValidator(
@@ -1125,8 +1145,9 @@ class LassoLars(BaseTransformer):
1125
1145
  expected_value=f"Stage area {score_stage_name} successfully created."
1126
1146
  ).validate()
1127
1147
 
1128
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1129
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1148
+ # Use posixpath to construct stage paths
1149
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1150
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1130
1151
  statement_params = telemetry.get_function_usage_statement_params(
1131
1152
  project=_PROJECT,
1132
1153
  subproject=_SUBPROJECT,
@@ -1152,6 +1173,7 @@ class LassoLars(BaseTransformer):
1152
1173
  replace=True,
1153
1174
  session=session,
1154
1175
  statement_params=statement_params,
1176
+ anonymous=True
1155
1177
  )
1156
1178
  def score_wrapper_sproc(
1157
1179
  session: Session,
@@ -1159,7 +1181,8 @@ class LassoLars(BaseTransformer):
1159
1181
  stage_score_file_name: str,
1160
1182
  input_cols: List[str],
1161
1183
  label_cols: List[str],
1162
- sample_weight_col: Optional[str]
1184
+ sample_weight_col: Optional[str],
1185
+ statement_params: Dict[str, str]
1163
1186
  ) -> float:
1164
1187
  import cloudpickle as cp
1165
1188
  import numpy as np
@@ -1209,14 +1232,14 @@ class LassoLars(BaseTransformer):
1209
1232
  api_calls=[Session.call],
1210
1233
  custom_tags=dict([("autogen", True)]),
1211
1234
  )
1212
- score = session.call(
1213
- score_sproc_name,
1235
+ score = score_wrapper_sproc(
1236
+ session,
1214
1237
  query,
1215
1238
  stage_score_file_name,
1216
1239
  identifier.get_unescaped_names(self.input_cols),
1217
1240
  identifier.get_unescaped_names(self.label_cols),
1218
1241
  identifier.get_unescaped_names(self.sample_weight_col),
1219
- statement_params=statement_params,
1242
+ statement_params,
1220
1243
  )
1221
1244
 
1222
1245
  cleanup_temp_files([local_score_file_name])
@@ -1234,18 +1257,20 @@ class LassoLars(BaseTransformer):
1234
1257
  if self._sklearn_object._estimator_type == 'classifier':
1235
1258
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1236
1259
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1237
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1260
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1261
+ ([] if self._drop_input_cols else inputs) + outputs)
1238
1262
  # For regressor, the type of predict is float64
1239
1263
  elif self._sklearn_object._estimator_type == 'regressor':
1240
1264
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1241
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1242
-
1265
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1266
+ ([] if self._drop_input_cols else inputs) + outputs)
1243
1267
  for prob_func in PROB_FUNCTIONS:
1244
1268
  if hasattr(self, prob_func):
1245
1269
  output_cols_prefix: str = f"{prob_func}_"
1246
1270
  output_column_names = self._get_output_column_names(output_cols_prefix)
1247
1271
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1248
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1272
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1273
+ ([] if self._drop_input_cols else inputs) + outputs)
1249
1274
 
1250
1275
  @property
1251
1276
  def model_signatures(self) -> Dict[str, ModelSignature]: