snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. snowflake/ml/_internal/env_utils.py +2 -1
  2. snowflake/ml/_internal/file_utils.py +29 -7
  3. snowflake/ml/_internal/telemetry.py +5 -8
  4. snowflake/ml/_internal/utils/uri.py +7 -2
  5. snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
  6. snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
  7. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
  8. snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
  9. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
  10. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
  11. snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
  12. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
  13. snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
  14. snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
  15. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
  16. snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
  17. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
  18. snowflake/ml/model/_deployer.py +14 -27
  19. snowflake/ml/model/_env.py +4 -4
  20. snowflake/ml/model/_handlers/custom.py +14 -2
  21. snowflake/ml/model/_handlers/pytorch.py +186 -0
  22. snowflake/ml/model/_handlers/sklearn.py +14 -9
  23. snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
  24. snowflake/ml/model/_handlers/torchscript.py +180 -0
  25. snowflake/ml/model/_handlers/xgboost.py +19 -9
  26. snowflake/ml/model/_model.py +3 -2
  27. snowflake/ml/model/_model_meta.py +12 -7
  28. snowflake/ml/model/model_signature.py +446 -66
  29. snowflake/ml/model/type_hints.py +23 -4
  30. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
  31. snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
  32. snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
  33. snowflake/ml/modeling/cluster/birch.py +51 -26
  34. snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
  35. snowflake/ml/modeling/cluster/dbscan.py +51 -26
  36. snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
  37. snowflake/ml/modeling/cluster/k_means.py +51 -26
  38. snowflake/ml/modeling/cluster/mean_shift.py +51 -26
  39. snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
  40. snowflake/ml/modeling/cluster/optics.py +51 -26
  41. snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
  42. snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
  43. snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
  44. snowflake/ml/modeling/compose/column_transformer.py +51 -26
  45. snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
  46. snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
  47. snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
  48. snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
  49. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
  50. snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
  51. snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
  52. snowflake/ml/modeling/covariance/oas.py +51 -26
  53. snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
  54. snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
  55. snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
  56. snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
  57. snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
  58. snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
  59. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
  60. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
  61. snowflake/ml/modeling/decomposition/pca.py +51 -26
  62. snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
  63. snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
  64. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
  65. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
  66. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
  67. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
  68. snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
  69. snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
  70. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
  71. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
  72. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
  73. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
  74. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
  75. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
  76. snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
  77. snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
  78. snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
  79. snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
  80. snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
  81. snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
  82. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
  83. snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
  84. snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
  85. snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
  86. snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
  87. snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
  88. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
  89. snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
  90. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
  91. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
  92. snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
  93. snowflake/ml/modeling/impute/knn_imputer.py +51 -26
  94. snowflake/ml/modeling/impute/missing_indicator.py +51 -26
  95. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
  96. snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
  97. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
  98. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
  99. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
  100. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
  101. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
  102. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
  103. snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
  104. snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
  105. snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
  106. snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
  107. snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
  108. snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
  109. snowflake/ml/modeling/linear_model/lars.py +51 -26
  110. snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
  111. snowflake/ml/modeling/linear_model/lasso.py +51 -26
  112. snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
  113. snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
  114. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
  115. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
  116. snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
  117. snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
  118. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
  119. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
  120. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
  121. snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
  122. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
  123. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
  124. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
  125. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
  126. snowflake/ml/modeling/linear_model/perceptron.py +51 -26
  127. snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
  128. snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
  129. snowflake/ml/modeling/linear_model/ridge.py +51 -26
  130. snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
  131. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
  132. snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
  133. snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
  134. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
  135. snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
  136. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
  137. snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
  138. snowflake/ml/modeling/manifold/isomap.py +51 -26
  139. snowflake/ml/modeling/manifold/mds.py +51 -26
  140. snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
  141. snowflake/ml/modeling/manifold/tsne.py +51 -26
  142. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
  143. snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
  144. snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
  145. snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
  146. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
  147. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
  148. snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
  149. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
  150. snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
  151. snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
  152. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
  153. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
  154. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
  155. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
  156. snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
  157. snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
  158. snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
  159. snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
  160. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
  161. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
  162. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
  163. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
  164. snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
  165. snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
  166. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
  167. snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
  168. snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
  169. snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
  170. snowflake/ml/modeling/svm/linear_svc.py +51 -26
  171. snowflake/ml/modeling/svm/linear_svr.py +51 -26
  172. snowflake/ml/modeling/svm/nu_svc.py +51 -26
  173. snowflake/ml/modeling/svm/nu_svr.py +51 -26
  174. snowflake/ml/modeling/svm/svc.py +51 -26
  175. snowflake/ml/modeling/svm/svr.py +51 -26
  176. snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
  177. snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
  178. snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
  179. snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
  180. snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
  181. snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
  182. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
  183. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
  184. snowflake/ml/registry/model_registry.py +74 -56
  185. snowflake/ml/version.py +1 -1
  186. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
  187. snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
  188. snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
  189. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -247,7 +249,6 @@ class LarsCV(BaseTransformer):
247
249
  sample_weight_col: Optional[str] = None,
248
250
  ) -> None:
249
251
  super().__init__()
250
- self.id = str(uuid4()).replace("-", "_").upper()
251
252
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
252
253
 
253
254
  self._deps = list(deps)
@@ -276,6 +277,15 @@ class LarsCV(BaseTransformer):
276
277
  self.set_drop_input_cols(drop_input_cols)
277
278
  self.set_sample_weight_col(sample_weight_col)
278
279
 
280
+ def _get_rand_id(self) -> str:
281
+ """
282
+ Generate random id to be used in sproc and stage names.
283
+
284
+ Returns:
285
+ Random id string usable in sproc, table, and stage names.
286
+ """
287
+ return str(uuid4()).replace("-", "_").upper()
288
+
279
289
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
280
290
  """
281
291
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -354,7 +364,7 @@ class LarsCV(BaseTransformer):
354
364
  cp.dump(self._sklearn_object, local_transform_file)
355
365
 
356
366
  # Create temp stage to run fit.
357
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
367
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
358
368
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
359
369
  SqlResultValidator(
360
370
  session=session,
@@ -367,11 +377,12 @@ class LarsCV(BaseTransformer):
367
377
  expected_value=f"Stage area {transform_stage_name} successfully created."
368
378
  ).validate()
369
379
 
370
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
380
+ # Use posixpath to construct stage paths
381
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
382
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
371
383
  local_result_file_name = get_temp_file_path()
372
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
373
384
 
374
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
385
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
375
386
  statement_params = telemetry.get_function_usage_statement_params(
376
387
  project=_PROJECT,
377
388
  subproject=_SUBPROJECT,
@@ -397,6 +408,7 @@ class LarsCV(BaseTransformer):
397
408
  replace=True,
398
409
  session=session,
399
410
  statement_params=statement_params,
411
+ anonymous=True
400
412
  )
401
413
  def fit_wrapper_sproc(
402
414
  session: Session,
@@ -405,7 +417,8 @@ class LarsCV(BaseTransformer):
405
417
  stage_result_file_name: str,
406
418
  input_cols: List[str],
407
419
  label_cols: List[str],
408
- sample_weight_col: Optional[str]
420
+ sample_weight_col: Optional[str],
421
+ statement_params: Dict[str, str]
409
422
  ) -> str:
410
423
  import cloudpickle as cp
411
424
  import numpy as np
@@ -472,15 +485,15 @@ class LarsCV(BaseTransformer):
472
485
  api_calls=[Session.call],
473
486
  custom_tags=dict([("autogen", True)]),
474
487
  )
475
- sproc_export_file_name = session.call(
476
- fit_sproc_name,
488
+ sproc_export_file_name = fit_wrapper_sproc(
489
+ session,
477
490
  query,
478
491
  stage_transform_file_name,
479
492
  stage_result_file_name,
480
493
  identifier.get_unescaped_names(self.input_cols),
481
494
  identifier.get_unescaped_names(self.label_cols),
482
495
  identifier.get_unescaped_names(self.sample_weight_col),
483
- statement_params=statement_params,
496
+ statement_params,
484
497
  )
485
498
 
486
499
  if "|" in sproc_export_file_name:
@@ -490,7 +503,7 @@ class LarsCV(BaseTransformer):
490
503
  print("\n".join(fields[1:]))
491
504
 
492
505
  session.file.get(
493
- os.path.join(stage_result_file_name, sproc_export_file_name),
506
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
494
507
  local_result_file_name,
495
508
  statement_params=statement_params
496
509
  )
@@ -536,7 +549,7 @@ class LarsCV(BaseTransformer):
536
549
 
537
550
  # Register vectorized UDF for batch inference
538
551
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
539
- safe_id=self.id, method=inference_method)
552
+ safe_id=self._get_rand_id(), method=inference_method)
540
553
 
541
554
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
542
555
  # will try to pickle all of self which fails.
@@ -628,7 +641,7 @@ class LarsCV(BaseTransformer):
628
641
  return transformed_pandas_df.to_dict("records")
629
642
 
630
643
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
631
- safe_id=self.id
644
+ safe_id=self._get_rand_id()
632
645
  )
633
646
 
634
647
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -795,11 +808,18 @@ class LarsCV(BaseTransformer):
795
808
  Transformed dataset.
796
809
  """
797
810
  if isinstance(dataset, DataFrame):
811
+ expected_type_inferred = "float"
812
+ # when it is classifier, infer the datatype from label columns
813
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
814
+ expected_type_inferred = convert_sp_to_sf_type(
815
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
816
+ )
817
+
798
818
  output_df = self._batch_inference(
799
819
  dataset=dataset,
800
820
  inference_method="predict",
801
821
  expected_output_cols_list=self.output_cols,
802
- expected_output_cols_type="float",
822
+ expected_output_cols_type=expected_type_inferred,
803
823
  )
804
824
  elif isinstance(dataset, pd.DataFrame):
805
825
  output_df = self._sklearn_inference(
@@ -870,10 +890,10 @@ class LarsCV(BaseTransformer):
870
890
 
871
891
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
872
892
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
873
- Returns an empty list if current object is not a classifier or not yet fitted.
893
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
874
894
  """
875
895
  if getattr(self._sklearn_object, "classes_", None) is None:
876
- return []
896
+ return [output_cols_prefix]
877
897
 
878
898
  classes = self._sklearn_object.classes_
879
899
  if isinstance(classes, numpy.ndarray):
@@ -1098,7 +1118,7 @@ class LarsCV(BaseTransformer):
1098
1118
  cp.dump(self._sklearn_object, local_score_file)
1099
1119
 
1100
1120
  # Create temp stage to run score.
1101
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1121
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1102
1122
  session = dataset._session
1103
1123
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1104
1124
  SqlResultValidator(
@@ -1112,8 +1132,9 @@ class LarsCV(BaseTransformer):
1112
1132
  expected_value=f"Stage area {score_stage_name} successfully created."
1113
1133
  ).validate()
1114
1134
 
1115
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1116
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1135
+ # Use posixpath to construct stage paths
1136
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1137
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1117
1138
  statement_params = telemetry.get_function_usage_statement_params(
1118
1139
  project=_PROJECT,
1119
1140
  subproject=_SUBPROJECT,
@@ -1139,6 +1160,7 @@ class LarsCV(BaseTransformer):
1139
1160
  replace=True,
1140
1161
  session=session,
1141
1162
  statement_params=statement_params,
1163
+ anonymous=True
1142
1164
  )
1143
1165
  def score_wrapper_sproc(
1144
1166
  session: Session,
@@ -1146,7 +1168,8 @@ class LarsCV(BaseTransformer):
1146
1168
  stage_score_file_name: str,
1147
1169
  input_cols: List[str],
1148
1170
  label_cols: List[str],
1149
- sample_weight_col: Optional[str]
1171
+ sample_weight_col: Optional[str],
1172
+ statement_params: Dict[str, str]
1150
1173
  ) -> float:
1151
1174
  import cloudpickle as cp
1152
1175
  import numpy as np
@@ -1196,14 +1219,14 @@ class LarsCV(BaseTransformer):
1196
1219
  api_calls=[Session.call],
1197
1220
  custom_tags=dict([("autogen", True)]),
1198
1221
  )
1199
- score = session.call(
1200
- score_sproc_name,
1222
+ score = score_wrapper_sproc(
1223
+ session,
1201
1224
  query,
1202
1225
  stage_score_file_name,
1203
1226
  identifier.get_unescaped_names(self.input_cols),
1204
1227
  identifier.get_unescaped_names(self.label_cols),
1205
1228
  identifier.get_unescaped_names(self.sample_weight_col),
1206
- statement_params=statement_params,
1229
+ statement_params,
1207
1230
  )
1208
1231
 
1209
1232
  cleanup_temp_files([local_score_file_name])
@@ -1221,18 +1244,20 @@ class LarsCV(BaseTransformer):
1221
1244
  if self._sklearn_object._estimator_type == 'classifier':
1222
1245
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1223
1246
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1224
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1247
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1248
+ ([] if self._drop_input_cols else inputs) + outputs)
1225
1249
  # For regressor, the type of predict is float64
1226
1250
  elif self._sklearn_object._estimator_type == 'regressor':
1227
1251
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1228
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1229
-
1252
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1253
+ ([] if self._drop_input_cols else inputs) + outputs)
1230
1254
  for prob_func in PROB_FUNCTIONS:
1231
1255
  if hasattr(self, prob_func):
1232
1256
  output_cols_prefix: str = f"{prob_func}_"
1233
1257
  output_column_names = self._get_output_column_names(output_cols_prefix)
1234
1258
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1235
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1259
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1260
+ ([] if self._drop_input_cols else inputs) + outputs)
1236
1261
 
1237
1262
  @property
1238
1263
  def model_signatures(self) -> Dict[str, ModelSignature]:
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -240,7 +242,6 @@ class Lasso(BaseTransformer):
240
242
  sample_weight_col: Optional[str] = None,
241
243
  ) -> None:
242
244
  super().__init__()
243
- self.id = str(uuid4()).replace("-", "_").upper()
244
245
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
245
246
 
246
247
  self._deps = list(deps)
@@ -269,6 +270,15 @@ class Lasso(BaseTransformer):
269
270
  self.set_drop_input_cols(drop_input_cols)
270
271
  self.set_sample_weight_col(sample_weight_col)
271
272
 
273
+ def _get_rand_id(self) -> str:
274
+ """
275
+ Generate random id to be used in sproc and stage names.
276
+
277
+ Returns:
278
+ Random id string usable in sproc, table, and stage names.
279
+ """
280
+ return str(uuid4()).replace("-", "_").upper()
281
+
272
282
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
273
283
  """
274
284
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -347,7 +357,7 @@ class Lasso(BaseTransformer):
347
357
  cp.dump(self._sklearn_object, local_transform_file)
348
358
 
349
359
  # Create temp stage to run fit.
350
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
360
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
351
361
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
352
362
  SqlResultValidator(
353
363
  session=session,
@@ -360,11 +370,12 @@ class Lasso(BaseTransformer):
360
370
  expected_value=f"Stage area {transform_stage_name} successfully created."
361
371
  ).validate()
362
372
 
363
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
373
+ # Use posixpath to construct stage paths
374
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
375
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
364
376
  local_result_file_name = get_temp_file_path()
365
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
366
377
 
367
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
378
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
368
379
  statement_params = telemetry.get_function_usage_statement_params(
369
380
  project=_PROJECT,
370
381
  subproject=_SUBPROJECT,
@@ -390,6 +401,7 @@ class Lasso(BaseTransformer):
390
401
  replace=True,
391
402
  session=session,
392
403
  statement_params=statement_params,
404
+ anonymous=True
393
405
  )
394
406
  def fit_wrapper_sproc(
395
407
  session: Session,
@@ -398,7 +410,8 @@ class Lasso(BaseTransformer):
398
410
  stage_result_file_name: str,
399
411
  input_cols: List[str],
400
412
  label_cols: List[str],
401
- sample_weight_col: Optional[str]
413
+ sample_weight_col: Optional[str],
414
+ statement_params: Dict[str, str]
402
415
  ) -> str:
403
416
  import cloudpickle as cp
404
417
  import numpy as np
@@ -465,15 +478,15 @@ class Lasso(BaseTransformer):
465
478
  api_calls=[Session.call],
466
479
  custom_tags=dict([("autogen", True)]),
467
480
  )
468
- sproc_export_file_name = session.call(
469
- fit_sproc_name,
481
+ sproc_export_file_name = fit_wrapper_sproc(
482
+ session,
470
483
  query,
471
484
  stage_transform_file_name,
472
485
  stage_result_file_name,
473
486
  identifier.get_unescaped_names(self.input_cols),
474
487
  identifier.get_unescaped_names(self.label_cols),
475
488
  identifier.get_unescaped_names(self.sample_weight_col),
476
- statement_params=statement_params,
489
+ statement_params,
477
490
  )
478
491
 
479
492
  if "|" in sproc_export_file_name:
@@ -483,7 +496,7 @@ class Lasso(BaseTransformer):
483
496
  print("\n".join(fields[1:]))
484
497
 
485
498
  session.file.get(
486
- os.path.join(stage_result_file_name, sproc_export_file_name),
499
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
487
500
  local_result_file_name,
488
501
  statement_params=statement_params
489
502
  )
@@ -529,7 +542,7 @@ class Lasso(BaseTransformer):
529
542
 
530
543
  # Register vectorized UDF for batch inference
531
544
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
532
- safe_id=self.id, method=inference_method)
545
+ safe_id=self._get_rand_id(), method=inference_method)
533
546
 
534
547
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
535
548
  # will try to pickle all of self which fails.
@@ -621,7 +634,7 @@ class Lasso(BaseTransformer):
621
634
  return transformed_pandas_df.to_dict("records")
622
635
 
623
636
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
624
- safe_id=self.id
637
+ safe_id=self._get_rand_id()
625
638
  )
626
639
 
627
640
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -788,11 +801,18 @@ class Lasso(BaseTransformer):
788
801
  Transformed dataset.
789
802
  """
790
803
  if isinstance(dataset, DataFrame):
804
+ expected_type_inferred = "float"
805
+ # when it is classifier, infer the datatype from label columns
806
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
807
+ expected_type_inferred = convert_sp_to_sf_type(
808
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
809
+ )
810
+
791
811
  output_df = self._batch_inference(
792
812
  dataset=dataset,
793
813
  inference_method="predict",
794
814
  expected_output_cols_list=self.output_cols,
795
- expected_output_cols_type="float",
815
+ expected_output_cols_type=expected_type_inferred,
796
816
  )
797
817
  elif isinstance(dataset, pd.DataFrame):
798
818
  output_df = self._sklearn_inference(
@@ -863,10 +883,10 @@ class Lasso(BaseTransformer):
863
883
 
864
884
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
865
885
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
866
- Returns an empty list if current object is not a classifier or not yet fitted.
886
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
867
887
  """
868
888
  if getattr(self._sklearn_object, "classes_", None) is None:
869
- return []
889
+ return [output_cols_prefix]
870
890
 
871
891
  classes = self._sklearn_object.classes_
872
892
  if isinstance(classes, numpy.ndarray):
@@ -1091,7 +1111,7 @@ class Lasso(BaseTransformer):
1091
1111
  cp.dump(self._sklearn_object, local_score_file)
1092
1112
 
1093
1113
  # Create temp stage to run score.
1094
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1114
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1095
1115
  session = dataset._session
1096
1116
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1097
1117
  SqlResultValidator(
@@ -1105,8 +1125,9 @@ class Lasso(BaseTransformer):
1105
1125
  expected_value=f"Stage area {score_stage_name} successfully created."
1106
1126
  ).validate()
1107
1127
 
1108
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1109
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1128
+ # Use posixpath to construct stage paths
1129
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1130
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1110
1131
  statement_params = telemetry.get_function_usage_statement_params(
1111
1132
  project=_PROJECT,
1112
1133
  subproject=_SUBPROJECT,
@@ -1132,6 +1153,7 @@ class Lasso(BaseTransformer):
1132
1153
  replace=True,
1133
1154
  session=session,
1134
1155
  statement_params=statement_params,
1156
+ anonymous=True
1135
1157
  )
1136
1158
  def score_wrapper_sproc(
1137
1159
  session: Session,
@@ -1139,7 +1161,8 @@ class Lasso(BaseTransformer):
1139
1161
  stage_score_file_name: str,
1140
1162
  input_cols: List[str],
1141
1163
  label_cols: List[str],
1142
- sample_weight_col: Optional[str]
1164
+ sample_weight_col: Optional[str],
1165
+ statement_params: Dict[str, str]
1143
1166
  ) -> float:
1144
1167
  import cloudpickle as cp
1145
1168
  import numpy as np
@@ -1189,14 +1212,14 @@ class Lasso(BaseTransformer):
1189
1212
  api_calls=[Session.call],
1190
1213
  custom_tags=dict([("autogen", True)]),
1191
1214
  )
1192
- score = session.call(
1193
- score_sproc_name,
1215
+ score = score_wrapper_sproc(
1216
+ session,
1194
1217
  query,
1195
1218
  stage_score_file_name,
1196
1219
  identifier.get_unescaped_names(self.input_cols),
1197
1220
  identifier.get_unescaped_names(self.label_cols),
1198
1221
  identifier.get_unescaped_names(self.sample_weight_col),
1199
- statement_params=statement_params,
1222
+ statement_params,
1200
1223
  )
1201
1224
 
1202
1225
  cleanup_temp_files([local_score_file_name])
@@ -1214,18 +1237,20 @@ class Lasso(BaseTransformer):
1214
1237
  if self._sklearn_object._estimator_type == 'classifier':
1215
1238
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1216
1239
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1217
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1240
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1241
+ ([] if self._drop_input_cols else inputs) + outputs)
1218
1242
  # For regressor, the type of predict is float64
1219
1243
  elif self._sklearn_object._estimator_type == 'regressor':
1220
1244
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1221
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1222
-
1245
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1246
+ ([] if self._drop_input_cols else inputs) + outputs)
1223
1247
  for prob_func in PROB_FUNCTIONS:
1224
1248
  if hasattr(self, prob_func):
1225
1249
  output_cols_prefix: str = f"{prob_func}_"
1226
1250
  output_column_names = self._get_output_column_names(output_cols_prefix)
1227
1251
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1228
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1252
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1253
+ ([] if self._drop_input_cols else inputs) + outputs)
1229
1254
 
1230
1255
  @property
1231
1256
  def model_signatures(self) -> Dict[str, ModelSignature]: