snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. snowflake/ml/_internal/env_utils.py +2 -1
  2. snowflake/ml/_internal/file_utils.py +29 -7
  3. snowflake/ml/_internal/telemetry.py +5 -8
  4. snowflake/ml/_internal/utils/uri.py +7 -2
  5. snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
  6. snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
  7. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
  8. snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
  9. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
  10. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
  11. snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
  12. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
  13. snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
  14. snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
  15. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
  16. snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
  17. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
  18. snowflake/ml/model/_deployer.py +14 -27
  19. snowflake/ml/model/_env.py +4 -4
  20. snowflake/ml/model/_handlers/custom.py +14 -2
  21. snowflake/ml/model/_handlers/pytorch.py +186 -0
  22. snowflake/ml/model/_handlers/sklearn.py +14 -9
  23. snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
  24. snowflake/ml/model/_handlers/torchscript.py +180 -0
  25. snowflake/ml/model/_handlers/xgboost.py +19 -9
  26. snowflake/ml/model/_model.py +3 -2
  27. snowflake/ml/model/_model_meta.py +12 -7
  28. snowflake/ml/model/model_signature.py +446 -66
  29. snowflake/ml/model/type_hints.py +23 -4
  30. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
  31. snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
  32. snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
  33. snowflake/ml/modeling/cluster/birch.py +51 -26
  34. snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
  35. snowflake/ml/modeling/cluster/dbscan.py +51 -26
  36. snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
  37. snowflake/ml/modeling/cluster/k_means.py +51 -26
  38. snowflake/ml/modeling/cluster/mean_shift.py +51 -26
  39. snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
  40. snowflake/ml/modeling/cluster/optics.py +51 -26
  41. snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
  42. snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
  43. snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
  44. snowflake/ml/modeling/compose/column_transformer.py +51 -26
  45. snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
  46. snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
  47. snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
  48. snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
  49. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
  50. snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
  51. snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
  52. snowflake/ml/modeling/covariance/oas.py +51 -26
  53. snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
  54. snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
  55. snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
  56. snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
  57. snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
  58. snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
  59. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
  60. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
  61. snowflake/ml/modeling/decomposition/pca.py +51 -26
  62. snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
  63. snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
  64. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
  65. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
  66. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
  67. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
  68. snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
  69. snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
  70. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
  71. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
  72. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
  73. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
  74. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
  75. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
  76. snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
  77. snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
  78. snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
  79. snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
  80. snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
  81. snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
  82. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
  83. snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
  84. snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
  85. snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
  86. snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
  87. snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
  88. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
  89. snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
  90. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
  91. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
  92. snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
  93. snowflake/ml/modeling/impute/knn_imputer.py +51 -26
  94. snowflake/ml/modeling/impute/missing_indicator.py +51 -26
  95. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
  96. snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
  97. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
  98. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
  99. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
  100. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
  101. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
  102. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
  103. snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
  104. snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
  105. snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
  106. snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
  107. snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
  108. snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
  109. snowflake/ml/modeling/linear_model/lars.py +51 -26
  110. snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
  111. snowflake/ml/modeling/linear_model/lasso.py +51 -26
  112. snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
  113. snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
  114. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
  115. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
  116. snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
  117. snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
  118. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
  119. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
  120. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
  121. snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
  122. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
  123. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
  124. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
  125. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
  126. snowflake/ml/modeling/linear_model/perceptron.py +51 -26
  127. snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
  128. snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
  129. snowflake/ml/modeling/linear_model/ridge.py +51 -26
  130. snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
  131. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
  132. snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
  133. snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
  134. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
  135. snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
  136. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
  137. snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
  138. snowflake/ml/modeling/manifold/isomap.py +51 -26
  139. snowflake/ml/modeling/manifold/mds.py +51 -26
  140. snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
  141. snowflake/ml/modeling/manifold/tsne.py +51 -26
  142. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
  143. snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
  144. snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
  145. snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
  146. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
  147. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
  148. snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
  149. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
  150. snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
  151. snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
  152. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
  153. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
  154. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
  155. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
  156. snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
  157. snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
  158. snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
  159. snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
  160. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
  161. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
  162. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
  163. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
  164. snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
  165. snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
  166. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
  167. snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
  168. snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
  169. snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
  170. snowflake/ml/modeling/svm/linear_svc.py +51 -26
  171. snowflake/ml/modeling/svm/linear_svr.py +51 -26
  172. snowflake/ml/modeling/svm/nu_svc.py +51 -26
  173. snowflake/ml/modeling/svm/nu_svr.py +51 -26
  174. snowflake/ml/modeling/svm/svc.py +51 -26
  175. snowflake/ml/modeling/svm/svr.py +51 -26
  176. snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
  177. snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
  178. snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
  179. snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
  180. snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
  181. snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
  182. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
  183. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
  184. snowflake/ml/registry/model_registry.py +74 -56
  185. snowflake/ml/version.py +1 -1
  186. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
  187. snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
  188. snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
  189. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -268,7 +270,6 @@ class MultiTaskElasticNetCV(BaseTransformer):
268
270
  sample_weight_col: Optional[str] = None,
269
271
  ) -> None:
270
272
  super().__init__()
271
- self.id = str(uuid4()).replace("-", "_").upper()
272
273
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
273
274
 
274
275
  self._deps = list(deps)
@@ -300,6 +301,15 @@ class MultiTaskElasticNetCV(BaseTransformer):
300
301
  self.set_drop_input_cols(drop_input_cols)
301
302
  self.set_sample_weight_col(sample_weight_col)
302
303
 
304
+ def _get_rand_id(self) -> str:
305
+ """
306
+ Generate random id to be used in sproc and stage names.
307
+
308
+ Returns:
309
+ Random id string usable in sproc, table, and stage names.
310
+ """
311
+ return str(uuid4()).replace("-", "_").upper()
312
+
303
313
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
304
314
  """
305
315
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -378,7 +388,7 @@ class MultiTaskElasticNetCV(BaseTransformer):
378
388
  cp.dump(self._sklearn_object, local_transform_file)
379
389
 
380
390
  # Create temp stage to run fit.
381
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
391
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
382
392
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
383
393
  SqlResultValidator(
384
394
  session=session,
@@ -391,11 +401,12 @@ class MultiTaskElasticNetCV(BaseTransformer):
391
401
  expected_value=f"Stage area {transform_stage_name} successfully created."
392
402
  ).validate()
393
403
 
394
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
404
+ # Use posixpath to construct stage paths
405
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
406
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
395
407
  local_result_file_name = get_temp_file_path()
396
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
397
408
 
398
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
409
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
399
410
  statement_params = telemetry.get_function_usage_statement_params(
400
411
  project=_PROJECT,
401
412
  subproject=_SUBPROJECT,
@@ -421,6 +432,7 @@ class MultiTaskElasticNetCV(BaseTransformer):
421
432
  replace=True,
422
433
  session=session,
423
434
  statement_params=statement_params,
435
+ anonymous=True
424
436
  )
425
437
  def fit_wrapper_sproc(
426
438
  session: Session,
@@ -429,7 +441,8 @@ class MultiTaskElasticNetCV(BaseTransformer):
429
441
  stage_result_file_name: str,
430
442
  input_cols: List[str],
431
443
  label_cols: List[str],
432
- sample_weight_col: Optional[str]
444
+ sample_weight_col: Optional[str],
445
+ statement_params: Dict[str, str]
433
446
  ) -> str:
434
447
  import cloudpickle as cp
435
448
  import numpy as np
@@ -496,15 +509,15 @@ class MultiTaskElasticNetCV(BaseTransformer):
496
509
  api_calls=[Session.call],
497
510
  custom_tags=dict([("autogen", True)]),
498
511
  )
499
- sproc_export_file_name = session.call(
500
- fit_sproc_name,
512
+ sproc_export_file_name = fit_wrapper_sproc(
513
+ session,
501
514
  query,
502
515
  stage_transform_file_name,
503
516
  stage_result_file_name,
504
517
  identifier.get_unescaped_names(self.input_cols),
505
518
  identifier.get_unescaped_names(self.label_cols),
506
519
  identifier.get_unescaped_names(self.sample_weight_col),
507
- statement_params=statement_params,
520
+ statement_params,
508
521
  )
509
522
 
510
523
  if "|" in sproc_export_file_name:
@@ -514,7 +527,7 @@ class MultiTaskElasticNetCV(BaseTransformer):
514
527
  print("\n".join(fields[1:]))
515
528
 
516
529
  session.file.get(
517
- os.path.join(stage_result_file_name, sproc_export_file_name),
530
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
518
531
  local_result_file_name,
519
532
  statement_params=statement_params
520
533
  )
@@ -560,7 +573,7 @@ class MultiTaskElasticNetCV(BaseTransformer):
560
573
 
561
574
  # Register vectorized UDF for batch inference
562
575
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
563
- safe_id=self.id, method=inference_method)
576
+ safe_id=self._get_rand_id(), method=inference_method)
564
577
 
565
578
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
566
579
  # will try to pickle all of self which fails.
@@ -652,7 +665,7 @@ class MultiTaskElasticNetCV(BaseTransformer):
652
665
  return transformed_pandas_df.to_dict("records")
653
666
 
654
667
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
655
- safe_id=self.id
668
+ safe_id=self._get_rand_id()
656
669
  )
657
670
 
658
671
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -819,11 +832,18 @@ class MultiTaskElasticNetCV(BaseTransformer):
819
832
  Transformed dataset.
820
833
  """
821
834
  if isinstance(dataset, DataFrame):
835
+ expected_type_inferred = "float"
836
+ # when it is classifier, infer the datatype from label columns
837
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
838
+ expected_type_inferred = convert_sp_to_sf_type(
839
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
840
+ )
841
+
822
842
  output_df = self._batch_inference(
823
843
  dataset=dataset,
824
844
  inference_method="predict",
825
845
  expected_output_cols_list=self.output_cols,
826
- expected_output_cols_type="float",
846
+ expected_output_cols_type=expected_type_inferred,
827
847
  )
828
848
  elif isinstance(dataset, pd.DataFrame):
829
849
  output_df = self._sklearn_inference(
@@ -894,10 +914,10 @@ class MultiTaskElasticNetCV(BaseTransformer):
894
914
 
895
915
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
896
916
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
897
- Returns an empty list if current object is not a classifier or not yet fitted.
917
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
898
918
  """
899
919
  if getattr(self._sklearn_object, "classes_", None) is None:
900
- return []
920
+ return [output_cols_prefix]
901
921
 
902
922
  classes = self._sklearn_object.classes_
903
923
  if isinstance(classes, numpy.ndarray):
@@ -1122,7 +1142,7 @@ class MultiTaskElasticNetCV(BaseTransformer):
1122
1142
  cp.dump(self._sklearn_object, local_score_file)
1123
1143
 
1124
1144
  # Create temp stage to run score.
1125
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1145
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1126
1146
  session = dataset._session
1127
1147
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1128
1148
  SqlResultValidator(
@@ -1136,8 +1156,9 @@ class MultiTaskElasticNetCV(BaseTransformer):
1136
1156
  expected_value=f"Stage area {score_stage_name} successfully created."
1137
1157
  ).validate()
1138
1158
 
1139
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1140
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1159
+ # Use posixpath to construct stage paths
1160
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1161
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1141
1162
  statement_params = telemetry.get_function_usage_statement_params(
1142
1163
  project=_PROJECT,
1143
1164
  subproject=_SUBPROJECT,
@@ -1163,6 +1184,7 @@ class MultiTaskElasticNetCV(BaseTransformer):
1163
1184
  replace=True,
1164
1185
  session=session,
1165
1186
  statement_params=statement_params,
1187
+ anonymous=True
1166
1188
  )
1167
1189
  def score_wrapper_sproc(
1168
1190
  session: Session,
@@ -1170,7 +1192,8 @@ class MultiTaskElasticNetCV(BaseTransformer):
1170
1192
  stage_score_file_name: str,
1171
1193
  input_cols: List[str],
1172
1194
  label_cols: List[str],
1173
- sample_weight_col: Optional[str]
1195
+ sample_weight_col: Optional[str],
1196
+ statement_params: Dict[str, str]
1174
1197
  ) -> float:
1175
1198
  import cloudpickle as cp
1176
1199
  import numpy as np
@@ -1220,14 +1243,14 @@ class MultiTaskElasticNetCV(BaseTransformer):
1220
1243
  api_calls=[Session.call],
1221
1244
  custom_tags=dict([("autogen", True)]),
1222
1245
  )
1223
- score = session.call(
1224
- score_sproc_name,
1246
+ score = score_wrapper_sproc(
1247
+ session,
1225
1248
  query,
1226
1249
  stage_score_file_name,
1227
1250
  identifier.get_unescaped_names(self.input_cols),
1228
1251
  identifier.get_unescaped_names(self.label_cols),
1229
1252
  identifier.get_unescaped_names(self.sample_weight_col),
1230
- statement_params=statement_params,
1253
+ statement_params,
1231
1254
  )
1232
1255
 
1233
1256
  cleanup_temp_files([local_score_file_name])
@@ -1245,18 +1268,20 @@ class MultiTaskElasticNetCV(BaseTransformer):
1245
1268
  if self._sklearn_object._estimator_type == 'classifier':
1246
1269
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1247
1270
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1248
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1271
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1272
+ ([] if self._drop_input_cols else inputs) + outputs)
1249
1273
  # For regressor, the type of predict is float64
1250
1274
  elif self._sklearn_object._estimator_type == 'regressor':
1251
1275
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1252
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1253
-
1276
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1277
+ ([] if self._drop_input_cols else inputs) + outputs)
1254
1278
  for prob_func in PROB_FUNCTIONS:
1255
1279
  if hasattr(self, prob_func):
1256
1280
  output_cols_prefix: str = f"{prob_func}_"
1257
1281
  output_column_names = self._get_output_column_names(output_cols_prefix)
1258
1282
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1259
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1283
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1284
+ ([] if self._drop_input_cols else inputs) + outputs)
1260
1285
 
1261
1286
  @property
1262
1287
  def model_signatures(self) -> Dict[str, ModelSignature]:
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -224,7 +226,6 @@ class MultiTaskLasso(BaseTransformer):
224
226
  sample_weight_col: Optional[str] = None,
225
227
  ) -> None:
226
228
  super().__init__()
227
- self.id = str(uuid4()).replace("-", "_").upper()
228
229
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
229
230
 
230
231
  self._deps = list(deps)
@@ -251,6 +252,15 @@ class MultiTaskLasso(BaseTransformer):
251
252
  self.set_drop_input_cols(drop_input_cols)
252
253
  self.set_sample_weight_col(sample_weight_col)
253
254
 
255
+ def _get_rand_id(self) -> str:
256
+ """
257
+ Generate random id to be used in sproc and stage names.
258
+
259
+ Returns:
260
+ Random id string usable in sproc, table, and stage names.
261
+ """
262
+ return str(uuid4()).replace("-", "_").upper()
263
+
254
264
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
255
265
  """
256
266
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -329,7 +339,7 @@ class MultiTaskLasso(BaseTransformer):
329
339
  cp.dump(self._sklearn_object, local_transform_file)
330
340
 
331
341
  # Create temp stage to run fit.
332
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
342
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
333
343
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
334
344
  SqlResultValidator(
335
345
  session=session,
@@ -342,11 +352,12 @@ class MultiTaskLasso(BaseTransformer):
342
352
  expected_value=f"Stage area {transform_stage_name} successfully created."
343
353
  ).validate()
344
354
 
345
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
355
+ # Use posixpath to construct stage paths
356
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
357
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
346
358
  local_result_file_name = get_temp_file_path()
347
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
348
359
 
349
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
360
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
350
361
  statement_params = telemetry.get_function_usage_statement_params(
351
362
  project=_PROJECT,
352
363
  subproject=_SUBPROJECT,
@@ -372,6 +383,7 @@ class MultiTaskLasso(BaseTransformer):
372
383
  replace=True,
373
384
  session=session,
374
385
  statement_params=statement_params,
386
+ anonymous=True
375
387
  )
376
388
  def fit_wrapper_sproc(
377
389
  session: Session,
@@ -380,7 +392,8 @@ class MultiTaskLasso(BaseTransformer):
380
392
  stage_result_file_name: str,
381
393
  input_cols: List[str],
382
394
  label_cols: List[str],
383
- sample_weight_col: Optional[str]
395
+ sample_weight_col: Optional[str],
396
+ statement_params: Dict[str, str]
384
397
  ) -> str:
385
398
  import cloudpickle as cp
386
399
  import numpy as np
@@ -447,15 +460,15 @@ class MultiTaskLasso(BaseTransformer):
447
460
  api_calls=[Session.call],
448
461
  custom_tags=dict([("autogen", True)]),
449
462
  )
450
- sproc_export_file_name = session.call(
451
- fit_sproc_name,
463
+ sproc_export_file_name = fit_wrapper_sproc(
464
+ session,
452
465
  query,
453
466
  stage_transform_file_name,
454
467
  stage_result_file_name,
455
468
  identifier.get_unescaped_names(self.input_cols),
456
469
  identifier.get_unescaped_names(self.label_cols),
457
470
  identifier.get_unescaped_names(self.sample_weight_col),
458
- statement_params=statement_params,
471
+ statement_params,
459
472
  )
460
473
 
461
474
  if "|" in sproc_export_file_name:
@@ -465,7 +478,7 @@ class MultiTaskLasso(BaseTransformer):
465
478
  print("\n".join(fields[1:]))
466
479
 
467
480
  session.file.get(
468
- os.path.join(stage_result_file_name, sproc_export_file_name),
481
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
469
482
  local_result_file_name,
470
483
  statement_params=statement_params
471
484
  )
@@ -511,7 +524,7 @@ class MultiTaskLasso(BaseTransformer):
511
524
 
512
525
  # Register vectorized UDF for batch inference
513
526
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
514
- safe_id=self.id, method=inference_method)
527
+ safe_id=self._get_rand_id(), method=inference_method)
515
528
 
516
529
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
517
530
  # will try to pickle all of self which fails.
@@ -603,7 +616,7 @@ class MultiTaskLasso(BaseTransformer):
603
616
  return transformed_pandas_df.to_dict("records")
604
617
 
605
618
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
606
- safe_id=self.id
619
+ safe_id=self._get_rand_id()
607
620
  )
608
621
 
609
622
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -770,11 +783,18 @@ class MultiTaskLasso(BaseTransformer):
770
783
  Transformed dataset.
771
784
  """
772
785
  if isinstance(dataset, DataFrame):
786
+ expected_type_inferred = "float"
787
+ # when it is classifier, infer the datatype from label columns
788
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
789
+ expected_type_inferred = convert_sp_to_sf_type(
790
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
791
+ )
792
+
773
793
  output_df = self._batch_inference(
774
794
  dataset=dataset,
775
795
  inference_method="predict",
776
796
  expected_output_cols_list=self.output_cols,
777
- expected_output_cols_type="float",
797
+ expected_output_cols_type=expected_type_inferred,
778
798
  )
779
799
  elif isinstance(dataset, pd.DataFrame):
780
800
  output_df = self._sklearn_inference(
@@ -845,10 +865,10 @@ class MultiTaskLasso(BaseTransformer):
845
865
 
846
866
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
847
867
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
848
- Returns an empty list if current object is not a classifier or not yet fitted.
868
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
849
869
  """
850
870
  if getattr(self._sklearn_object, "classes_", None) is None:
851
- return []
871
+ return [output_cols_prefix]
852
872
 
853
873
  classes = self._sklearn_object.classes_
854
874
  if isinstance(classes, numpy.ndarray):
@@ -1073,7 +1093,7 @@ class MultiTaskLasso(BaseTransformer):
1073
1093
  cp.dump(self._sklearn_object, local_score_file)
1074
1094
 
1075
1095
  # Create temp stage to run score.
1076
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1096
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1077
1097
  session = dataset._session
1078
1098
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1079
1099
  SqlResultValidator(
@@ -1087,8 +1107,9 @@ class MultiTaskLasso(BaseTransformer):
1087
1107
  expected_value=f"Stage area {score_stage_name} successfully created."
1088
1108
  ).validate()
1089
1109
 
1090
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1091
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1110
+ # Use posixpath to construct stage paths
1111
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1112
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1092
1113
  statement_params = telemetry.get_function_usage_statement_params(
1093
1114
  project=_PROJECT,
1094
1115
  subproject=_SUBPROJECT,
@@ -1114,6 +1135,7 @@ class MultiTaskLasso(BaseTransformer):
1114
1135
  replace=True,
1115
1136
  session=session,
1116
1137
  statement_params=statement_params,
1138
+ anonymous=True
1117
1139
  )
1118
1140
  def score_wrapper_sproc(
1119
1141
  session: Session,
@@ -1121,7 +1143,8 @@ class MultiTaskLasso(BaseTransformer):
1121
1143
  stage_score_file_name: str,
1122
1144
  input_cols: List[str],
1123
1145
  label_cols: List[str],
1124
- sample_weight_col: Optional[str]
1146
+ sample_weight_col: Optional[str],
1147
+ statement_params: Dict[str, str]
1125
1148
  ) -> float:
1126
1149
  import cloudpickle as cp
1127
1150
  import numpy as np
@@ -1171,14 +1194,14 @@ class MultiTaskLasso(BaseTransformer):
1171
1194
  api_calls=[Session.call],
1172
1195
  custom_tags=dict([("autogen", True)]),
1173
1196
  )
1174
- score = session.call(
1175
- score_sproc_name,
1197
+ score = score_wrapper_sproc(
1198
+ session,
1176
1199
  query,
1177
1200
  stage_score_file_name,
1178
1201
  identifier.get_unescaped_names(self.input_cols),
1179
1202
  identifier.get_unescaped_names(self.label_cols),
1180
1203
  identifier.get_unescaped_names(self.sample_weight_col),
1181
- statement_params=statement_params,
1204
+ statement_params,
1182
1205
  )
1183
1206
 
1184
1207
  cleanup_temp_files([local_score_file_name])
@@ -1196,18 +1219,20 @@ class MultiTaskLasso(BaseTransformer):
1196
1219
  if self._sklearn_object._estimator_type == 'classifier':
1197
1220
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1198
1221
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1199
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1222
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1223
+ ([] if self._drop_input_cols else inputs) + outputs)
1200
1224
  # For regressor, the type of predict is float64
1201
1225
  elif self._sklearn_object._estimator_type == 'regressor':
1202
1226
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1203
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1204
-
1227
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1228
+ ([] if self._drop_input_cols else inputs) + outputs)
1205
1229
  for prob_func in PROB_FUNCTIONS:
1206
1230
  if hasattr(self, prob_func):
1207
1231
  output_cols_prefix: str = f"{prob_func}_"
1208
1232
  output_column_names = self._get_output_column_names(output_cols_prefix)
1209
1233
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1210
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1234
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1235
+ ([] if self._drop_input_cols else inputs) + outputs)
1211
1236
 
1212
1237
  @property
1213
1238
  def model_signatures(self) -> Dict[str, ModelSignature]: