snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. snowflake/ml/_internal/env_utils.py +2 -1
  2. snowflake/ml/_internal/file_utils.py +29 -7
  3. snowflake/ml/_internal/telemetry.py +5 -8
  4. snowflake/ml/_internal/utils/uri.py +7 -2
  5. snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
  6. snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
  7. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
  8. snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
  9. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
  10. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
  11. snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
  12. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
  13. snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
  14. snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
  15. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
  16. snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
  17. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
  18. snowflake/ml/model/_deployer.py +14 -27
  19. snowflake/ml/model/_env.py +4 -4
  20. snowflake/ml/model/_handlers/custom.py +14 -2
  21. snowflake/ml/model/_handlers/pytorch.py +186 -0
  22. snowflake/ml/model/_handlers/sklearn.py +14 -9
  23. snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
  24. snowflake/ml/model/_handlers/torchscript.py +180 -0
  25. snowflake/ml/model/_handlers/xgboost.py +19 -9
  26. snowflake/ml/model/_model.py +3 -2
  27. snowflake/ml/model/_model_meta.py +12 -7
  28. snowflake/ml/model/model_signature.py +446 -66
  29. snowflake/ml/model/type_hints.py +23 -4
  30. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
  31. snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
  32. snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
  33. snowflake/ml/modeling/cluster/birch.py +51 -26
  34. snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
  35. snowflake/ml/modeling/cluster/dbscan.py +51 -26
  36. snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
  37. snowflake/ml/modeling/cluster/k_means.py +51 -26
  38. snowflake/ml/modeling/cluster/mean_shift.py +51 -26
  39. snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
  40. snowflake/ml/modeling/cluster/optics.py +51 -26
  41. snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
  42. snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
  43. snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
  44. snowflake/ml/modeling/compose/column_transformer.py +51 -26
  45. snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
  46. snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
  47. snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
  48. snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
  49. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
  50. snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
  51. snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
  52. snowflake/ml/modeling/covariance/oas.py +51 -26
  53. snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
  54. snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
  55. snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
  56. snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
  57. snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
  58. snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
  59. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
  60. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
  61. snowflake/ml/modeling/decomposition/pca.py +51 -26
  62. snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
  63. snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
  64. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
  65. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
  66. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
  67. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
  68. snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
  69. snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
  70. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
  71. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
  72. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
  73. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
  74. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
  75. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
  76. snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
  77. snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
  78. snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
  79. snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
  80. snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
  81. snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
  82. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
  83. snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
  84. snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
  85. snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
  86. snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
  87. snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
  88. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
  89. snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
  90. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
  91. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
  92. snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
  93. snowflake/ml/modeling/impute/knn_imputer.py +51 -26
  94. snowflake/ml/modeling/impute/missing_indicator.py +51 -26
  95. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
  96. snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
  97. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
  98. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
  99. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
  100. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
  101. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
  102. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
  103. snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
  104. snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
  105. snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
  106. snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
  107. snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
  108. snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
  109. snowflake/ml/modeling/linear_model/lars.py +51 -26
  110. snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
  111. snowflake/ml/modeling/linear_model/lasso.py +51 -26
  112. snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
  113. snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
  114. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
  115. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
  116. snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
  117. snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
  118. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
  119. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
  120. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
  121. snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
  122. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
  123. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
  124. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
  125. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
  126. snowflake/ml/modeling/linear_model/perceptron.py +51 -26
  127. snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
  128. snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
  129. snowflake/ml/modeling/linear_model/ridge.py +51 -26
  130. snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
  131. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
  132. snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
  133. snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
  134. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
  135. snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
  136. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
  137. snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
  138. snowflake/ml/modeling/manifold/isomap.py +51 -26
  139. snowflake/ml/modeling/manifold/mds.py +51 -26
  140. snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
  141. snowflake/ml/modeling/manifold/tsne.py +51 -26
  142. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
  143. snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
  144. snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
  145. snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
  146. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
  147. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
  148. snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
  149. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
  150. snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
  151. snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
  152. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
  153. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
  154. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
  155. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
  156. snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
  157. snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
  158. snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
  159. snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
  160. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
  161. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
  162. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
  163. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
  164. snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
  165. snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
  166. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
  167. snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
  168. snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
  169. snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
  170. snowflake/ml/modeling/svm/linear_svc.py +51 -26
  171. snowflake/ml/modeling/svm/linear_svr.py +51 -26
  172. snowflake/ml/modeling/svm/nu_svc.py +51 -26
  173. snowflake/ml/modeling/svm/nu_svr.py +51 -26
  174. snowflake/ml/modeling/svm/svc.py +51 -26
  175. snowflake/ml/modeling/svm/svr.py +51 -26
  176. snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
  177. snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
  178. snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
  179. snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
  180. snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
  181. snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
  182. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
  183. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
  184. snowflake/ml/registry/model_registry.py +74 -56
  185. snowflake/ml/version.py +1 -1
  186. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
  187. snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
  188. snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
  189. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -260,7 +262,6 @@ class LassoLarsCV(BaseTransformer):
260
262
  sample_weight_col: Optional[str] = None,
261
263
  ) -> None:
262
264
  super().__init__()
263
- self.id = str(uuid4()).replace("-", "_").upper()
264
265
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
265
266
 
266
267
  self._deps = list(deps)
@@ -290,6 +291,15 @@ class LassoLarsCV(BaseTransformer):
290
291
  self.set_drop_input_cols(drop_input_cols)
291
292
  self.set_sample_weight_col(sample_weight_col)
292
293
 
294
+ def _get_rand_id(self) -> str:
295
+ """
296
+ Generate random id to be used in sproc and stage names.
297
+
298
+ Returns:
299
+ Random id string usable in sproc, table, and stage names.
300
+ """
301
+ return str(uuid4()).replace("-", "_").upper()
302
+
293
303
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
294
304
  """
295
305
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -368,7 +378,7 @@ class LassoLarsCV(BaseTransformer):
368
378
  cp.dump(self._sklearn_object, local_transform_file)
369
379
 
370
380
  # Create temp stage to run fit.
371
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
381
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
372
382
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
373
383
  SqlResultValidator(
374
384
  session=session,
@@ -381,11 +391,12 @@ class LassoLarsCV(BaseTransformer):
381
391
  expected_value=f"Stage area {transform_stage_name} successfully created."
382
392
  ).validate()
383
393
 
384
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
394
+ # Use posixpath to construct stage paths
395
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
396
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
385
397
  local_result_file_name = get_temp_file_path()
386
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
387
398
 
388
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
399
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
389
400
  statement_params = telemetry.get_function_usage_statement_params(
390
401
  project=_PROJECT,
391
402
  subproject=_SUBPROJECT,
@@ -411,6 +422,7 @@ class LassoLarsCV(BaseTransformer):
411
422
  replace=True,
412
423
  session=session,
413
424
  statement_params=statement_params,
425
+ anonymous=True
414
426
  )
415
427
  def fit_wrapper_sproc(
416
428
  session: Session,
@@ -419,7 +431,8 @@ class LassoLarsCV(BaseTransformer):
419
431
  stage_result_file_name: str,
420
432
  input_cols: List[str],
421
433
  label_cols: List[str],
422
- sample_weight_col: Optional[str]
434
+ sample_weight_col: Optional[str],
435
+ statement_params: Dict[str, str]
423
436
  ) -> str:
424
437
  import cloudpickle as cp
425
438
  import numpy as np
@@ -486,15 +499,15 @@ class LassoLarsCV(BaseTransformer):
486
499
  api_calls=[Session.call],
487
500
  custom_tags=dict([("autogen", True)]),
488
501
  )
489
- sproc_export_file_name = session.call(
490
- fit_sproc_name,
502
+ sproc_export_file_name = fit_wrapper_sproc(
503
+ session,
491
504
  query,
492
505
  stage_transform_file_name,
493
506
  stage_result_file_name,
494
507
  identifier.get_unescaped_names(self.input_cols),
495
508
  identifier.get_unescaped_names(self.label_cols),
496
509
  identifier.get_unescaped_names(self.sample_weight_col),
497
- statement_params=statement_params,
510
+ statement_params,
498
511
  )
499
512
 
500
513
  if "|" in sproc_export_file_name:
@@ -504,7 +517,7 @@ class LassoLarsCV(BaseTransformer):
504
517
  print("\n".join(fields[1:]))
505
518
 
506
519
  session.file.get(
507
- os.path.join(stage_result_file_name, sproc_export_file_name),
520
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
508
521
  local_result_file_name,
509
522
  statement_params=statement_params
510
523
  )
@@ -550,7 +563,7 @@ class LassoLarsCV(BaseTransformer):
550
563
 
551
564
  # Register vectorized UDF for batch inference
552
565
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
553
- safe_id=self.id, method=inference_method)
566
+ safe_id=self._get_rand_id(), method=inference_method)
554
567
 
555
568
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
556
569
  # will try to pickle all of self which fails.
@@ -642,7 +655,7 @@ class LassoLarsCV(BaseTransformer):
642
655
  return transformed_pandas_df.to_dict("records")
643
656
 
644
657
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
645
- safe_id=self.id
658
+ safe_id=self._get_rand_id()
646
659
  )
647
660
 
648
661
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -809,11 +822,18 @@ class LassoLarsCV(BaseTransformer):
809
822
  Transformed dataset.
810
823
  """
811
824
  if isinstance(dataset, DataFrame):
825
+ expected_type_inferred = "float"
826
+ # when it is classifier, infer the datatype from label columns
827
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
828
+ expected_type_inferred = convert_sp_to_sf_type(
829
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
830
+ )
831
+
812
832
  output_df = self._batch_inference(
813
833
  dataset=dataset,
814
834
  inference_method="predict",
815
835
  expected_output_cols_list=self.output_cols,
816
- expected_output_cols_type="float",
836
+ expected_output_cols_type=expected_type_inferred,
817
837
  )
818
838
  elif isinstance(dataset, pd.DataFrame):
819
839
  output_df = self._sklearn_inference(
@@ -884,10 +904,10 @@ class LassoLarsCV(BaseTransformer):
884
904
 
885
905
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
886
906
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
887
- Returns an empty list if current object is not a classifier or not yet fitted.
907
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
888
908
  """
889
909
  if getattr(self._sklearn_object, "classes_", None) is None:
890
- return []
910
+ return [output_cols_prefix]
891
911
 
892
912
  classes = self._sklearn_object.classes_
893
913
  if isinstance(classes, numpy.ndarray):
@@ -1112,7 +1132,7 @@ class LassoLarsCV(BaseTransformer):
1112
1132
  cp.dump(self._sklearn_object, local_score_file)
1113
1133
 
1114
1134
  # Create temp stage to run score.
1115
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1135
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1116
1136
  session = dataset._session
1117
1137
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1118
1138
  SqlResultValidator(
@@ -1126,8 +1146,9 @@ class LassoLarsCV(BaseTransformer):
1126
1146
  expected_value=f"Stage area {score_stage_name} successfully created."
1127
1147
  ).validate()
1128
1148
 
1129
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1130
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1149
+ # Use posixpath to construct stage paths
1150
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1151
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1131
1152
  statement_params = telemetry.get_function_usage_statement_params(
1132
1153
  project=_PROJECT,
1133
1154
  subproject=_SUBPROJECT,
@@ -1153,6 +1174,7 @@ class LassoLarsCV(BaseTransformer):
1153
1174
  replace=True,
1154
1175
  session=session,
1155
1176
  statement_params=statement_params,
1177
+ anonymous=True
1156
1178
  )
1157
1179
  def score_wrapper_sproc(
1158
1180
  session: Session,
@@ -1160,7 +1182,8 @@ class LassoLarsCV(BaseTransformer):
1160
1182
  stage_score_file_name: str,
1161
1183
  input_cols: List[str],
1162
1184
  label_cols: List[str],
1163
- sample_weight_col: Optional[str]
1185
+ sample_weight_col: Optional[str],
1186
+ statement_params: Dict[str, str]
1164
1187
  ) -> float:
1165
1188
  import cloudpickle as cp
1166
1189
  import numpy as np
@@ -1210,14 +1233,14 @@ class LassoLarsCV(BaseTransformer):
1210
1233
  api_calls=[Session.call],
1211
1234
  custom_tags=dict([("autogen", True)]),
1212
1235
  )
1213
- score = session.call(
1214
- score_sproc_name,
1236
+ score = score_wrapper_sproc(
1237
+ session,
1215
1238
  query,
1216
1239
  stage_score_file_name,
1217
1240
  identifier.get_unescaped_names(self.input_cols),
1218
1241
  identifier.get_unescaped_names(self.label_cols),
1219
1242
  identifier.get_unescaped_names(self.sample_weight_col),
1220
- statement_params=statement_params,
1243
+ statement_params,
1221
1244
  )
1222
1245
 
1223
1246
  cleanup_temp_files([local_score_file_name])
@@ -1235,18 +1258,20 @@ class LassoLarsCV(BaseTransformer):
1235
1258
  if self._sklearn_object._estimator_type == 'classifier':
1236
1259
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1237
1260
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1238
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1261
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1262
+ ([] if self._drop_input_cols else inputs) + outputs)
1239
1263
  # For regressor, the type of predict is float64
1240
1264
  elif self._sklearn_object._estimator_type == 'regressor':
1241
1265
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1242
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1243
-
1266
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1267
+ ([] if self._drop_input_cols else inputs) + outputs)
1244
1268
  for prob_func in PROB_FUNCTIONS:
1245
1269
  if hasattr(self, prob_func):
1246
1270
  output_cols_prefix: str = f"{prob_func}_"
1247
1271
  output_column_names = self._get_output_column_names(output_cols_prefix)
1248
1272
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1249
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1273
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1274
+ ([] if self._drop_input_cols else inputs) + outputs)
1250
1275
 
1251
1276
  @property
1252
1277
  def model_signatures(self) -> Dict[str, ModelSignature]:
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -244,7 +246,6 @@ class LassoLarsIC(BaseTransformer):
244
246
  sample_weight_col: Optional[str] = None,
245
247
  ) -> None:
246
248
  super().__init__()
247
- self.id = str(uuid4()).replace("-", "_").upper()
248
249
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
249
250
 
250
251
  self._deps = list(deps)
@@ -273,6 +274,15 @@ class LassoLarsIC(BaseTransformer):
273
274
  self.set_drop_input_cols(drop_input_cols)
274
275
  self.set_sample_weight_col(sample_weight_col)
275
276
 
277
+ def _get_rand_id(self) -> str:
278
+ """
279
+ Generate random id to be used in sproc and stage names.
280
+
281
+ Returns:
282
+ Random id string usable in sproc, table, and stage names.
283
+ """
284
+ return str(uuid4()).replace("-", "_").upper()
285
+
276
286
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
277
287
  """
278
288
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -351,7 +361,7 @@ class LassoLarsIC(BaseTransformer):
351
361
  cp.dump(self._sklearn_object, local_transform_file)
352
362
 
353
363
  # Create temp stage to run fit.
354
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
364
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
355
365
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
356
366
  SqlResultValidator(
357
367
  session=session,
@@ -364,11 +374,12 @@ class LassoLarsIC(BaseTransformer):
364
374
  expected_value=f"Stage area {transform_stage_name} successfully created."
365
375
  ).validate()
366
376
 
367
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
377
+ # Use posixpath to construct stage paths
378
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
379
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
368
380
  local_result_file_name = get_temp_file_path()
369
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
370
381
 
371
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
382
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
372
383
  statement_params = telemetry.get_function_usage_statement_params(
373
384
  project=_PROJECT,
374
385
  subproject=_SUBPROJECT,
@@ -394,6 +405,7 @@ class LassoLarsIC(BaseTransformer):
394
405
  replace=True,
395
406
  session=session,
396
407
  statement_params=statement_params,
408
+ anonymous=True
397
409
  )
398
410
  def fit_wrapper_sproc(
399
411
  session: Session,
@@ -402,7 +414,8 @@ class LassoLarsIC(BaseTransformer):
402
414
  stage_result_file_name: str,
403
415
  input_cols: List[str],
404
416
  label_cols: List[str],
405
- sample_weight_col: Optional[str]
417
+ sample_weight_col: Optional[str],
418
+ statement_params: Dict[str, str]
406
419
  ) -> str:
407
420
  import cloudpickle as cp
408
421
  import numpy as np
@@ -469,15 +482,15 @@ class LassoLarsIC(BaseTransformer):
469
482
  api_calls=[Session.call],
470
483
  custom_tags=dict([("autogen", True)]),
471
484
  )
472
- sproc_export_file_name = session.call(
473
- fit_sproc_name,
485
+ sproc_export_file_name = fit_wrapper_sproc(
486
+ session,
474
487
  query,
475
488
  stage_transform_file_name,
476
489
  stage_result_file_name,
477
490
  identifier.get_unescaped_names(self.input_cols),
478
491
  identifier.get_unescaped_names(self.label_cols),
479
492
  identifier.get_unescaped_names(self.sample_weight_col),
480
- statement_params=statement_params,
493
+ statement_params,
481
494
  )
482
495
 
483
496
  if "|" in sproc_export_file_name:
@@ -487,7 +500,7 @@ class LassoLarsIC(BaseTransformer):
487
500
  print("\n".join(fields[1:]))
488
501
 
489
502
  session.file.get(
490
- os.path.join(stage_result_file_name, sproc_export_file_name),
503
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
491
504
  local_result_file_name,
492
505
  statement_params=statement_params
493
506
  )
@@ -533,7 +546,7 @@ class LassoLarsIC(BaseTransformer):
533
546
 
534
547
  # Register vectorized UDF for batch inference
535
548
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
536
- safe_id=self.id, method=inference_method)
549
+ safe_id=self._get_rand_id(), method=inference_method)
537
550
 
538
551
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
539
552
  # will try to pickle all of self which fails.
@@ -625,7 +638,7 @@ class LassoLarsIC(BaseTransformer):
625
638
  return transformed_pandas_df.to_dict("records")
626
639
 
627
640
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
628
- safe_id=self.id
641
+ safe_id=self._get_rand_id()
629
642
  )
630
643
 
631
644
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -792,11 +805,18 @@ class LassoLarsIC(BaseTransformer):
792
805
  Transformed dataset.
793
806
  """
794
807
  if isinstance(dataset, DataFrame):
808
+ expected_type_inferred = "float"
809
+ # when it is classifier, infer the datatype from label columns
810
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
811
+ expected_type_inferred = convert_sp_to_sf_type(
812
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
813
+ )
814
+
795
815
  output_df = self._batch_inference(
796
816
  dataset=dataset,
797
817
  inference_method="predict",
798
818
  expected_output_cols_list=self.output_cols,
799
- expected_output_cols_type="float",
819
+ expected_output_cols_type=expected_type_inferred,
800
820
  )
801
821
  elif isinstance(dataset, pd.DataFrame):
802
822
  output_df = self._sklearn_inference(
@@ -867,10 +887,10 @@ class LassoLarsIC(BaseTransformer):
867
887
 
868
888
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
869
889
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
870
- Returns an empty list if current object is not a classifier or not yet fitted.
890
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
871
891
  """
872
892
  if getattr(self._sklearn_object, "classes_", None) is None:
873
- return []
893
+ return [output_cols_prefix]
874
894
 
875
895
  classes = self._sklearn_object.classes_
876
896
  if isinstance(classes, numpy.ndarray):
@@ -1095,7 +1115,7 @@ class LassoLarsIC(BaseTransformer):
1095
1115
  cp.dump(self._sklearn_object, local_score_file)
1096
1116
 
1097
1117
  # Create temp stage to run score.
1098
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1118
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1099
1119
  session = dataset._session
1100
1120
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1101
1121
  SqlResultValidator(
@@ -1109,8 +1129,9 @@ class LassoLarsIC(BaseTransformer):
1109
1129
  expected_value=f"Stage area {score_stage_name} successfully created."
1110
1130
  ).validate()
1111
1131
 
1112
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1113
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1132
+ # Use posixpath to construct stage paths
1133
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1134
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1114
1135
  statement_params = telemetry.get_function_usage_statement_params(
1115
1136
  project=_PROJECT,
1116
1137
  subproject=_SUBPROJECT,
@@ -1136,6 +1157,7 @@ class LassoLarsIC(BaseTransformer):
1136
1157
  replace=True,
1137
1158
  session=session,
1138
1159
  statement_params=statement_params,
1160
+ anonymous=True
1139
1161
  )
1140
1162
  def score_wrapper_sproc(
1141
1163
  session: Session,
@@ -1143,7 +1165,8 @@ class LassoLarsIC(BaseTransformer):
1143
1165
  stage_score_file_name: str,
1144
1166
  input_cols: List[str],
1145
1167
  label_cols: List[str],
1146
- sample_weight_col: Optional[str]
1168
+ sample_weight_col: Optional[str],
1169
+ statement_params: Dict[str, str]
1147
1170
  ) -> float:
1148
1171
  import cloudpickle as cp
1149
1172
  import numpy as np
@@ -1193,14 +1216,14 @@ class LassoLarsIC(BaseTransformer):
1193
1216
  api_calls=[Session.call],
1194
1217
  custom_tags=dict([("autogen", True)]),
1195
1218
  )
1196
- score = session.call(
1197
- score_sproc_name,
1219
+ score = score_wrapper_sproc(
1220
+ session,
1198
1221
  query,
1199
1222
  stage_score_file_name,
1200
1223
  identifier.get_unescaped_names(self.input_cols),
1201
1224
  identifier.get_unescaped_names(self.label_cols),
1202
1225
  identifier.get_unescaped_names(self.sample_weight_col),
1203
- statement_params=statement_params,
1226
+ statement_params,
1204
1227
  )
1205
1228
 
1206
1229
  cleanup_temp_files([local_score_file_name])
@@ -1218,18 +1241,20 @@ class LassoLarsIC(BaseTransformer):
1218
1241
  if self._sklearn_object._estimator_type == 'classifier':
1219
1242
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1220
1243
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1221
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1244
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1245
+ ([] if self._drop_input_cols else inputs) + outputs)
1222
1246
  # For regressor, the type of predict is float64
1223
1247
  elif self._sklearn_object._estimator_type == 'regressor':
1224
1248
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1225
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1226
-
1249
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1250
+ ([] if self._drop_input_cols else inputs) + outputs)
1227
1251
  for prob_func in PROB_FUNCTIONS:
1228
1252
  if hasattr(self, prob_func):
1229
1253
  output_cols_prefix: str = f"{prob_func}_"
1230
1254
  output_column_names = self._get_output_column_names(output_cols_prefix)
1231
1255
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1232
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1256
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1257
+ ([] if self._drop_input_cols else inputs) + outputs)
1233
1258
 
1234
1259
  @property
1235
1260
  def model_signatures(self) -> Dict[str, ModelSignature]: