snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. snowflake/ml/_internal/env_utils.py +2 -1
  2. snowflake/ml/_internal/file_utils.py +29 -7
  3. snowflake/ml/_internal/telemetry.py +5 -8
  4. snowflake/ml/_internal/utils/uri.py +7 -2
  5. snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
  6. snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
  7. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
  8. snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
  9. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
  10. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
  11. snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
  12. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
  13. snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
  14. snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
  15. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
  16. snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
  17. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
  18. snowflake/ml/model/_deployer.py +14 -27
  19. snowflake/ml/model/_env.py +4 -4
  20. snowflake/ml/model/_handlers/custom.py +14 -2
  21. snowflake/ml/model/_handlers/pytorch.py +186 -0
  22. snowflake/ml/model/_handlers/sklearn.py +14 -9
  23. snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
  24. snowflake/ml/model/_handlers/torchscript.py +180 -0
  25. snowflake/ml/model/_handlers/xgboost.py +19 -9
  26. snowflake/ml/model/_model.py +3 -2
  27. snowflake/ml/model/_model_meta.py +12 -7
  28. snowflake/ml/model/model_signature.py +446 -66
  29. snowflake/ml/model/type_hints.py +23 -4
  30. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
  31. snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
  32. snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
  33. snowflake/ml/modeling/cluster/birch.py +51 -26
  34. snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
  35. snowflake/ml/modeling/cluster/dbscan.py +51 -26
  36. snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
  37. snowflake/ml/modeling/cluster/k_means.py +51 -26
  38. snowflake/ml/modeling/cluster/mean_shift.py +51 -26
  39. snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
  40. snowflake/ml/modeling/cluster/optics.py +51 -26
  41. snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
  42. snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
  43. snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
  44. snowflake/ml/modeling/compose/column_transformer.py +51 -26
  45. snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
  46. snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
  47. snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
  48. snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
  49. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
  50. snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
  51. snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
  52. snowflake/ml/modeling/covariance/oas.py +51 -26
  53. snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
  54. snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
  55. snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
  56. snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
  57. snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
  58. snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
  59. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
  60. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
  61. snowflake/ml/modeling/decomposition/pca.py +51 -26
  62. snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
  63. snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
  64. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
  65. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
  66. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
  67. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
  68. snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
  69. snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
  70. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
  71. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
  72. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
  73. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
  74. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
  75. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
  76. snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
  77. snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
  78. snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
  79. snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
  80. snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
  81. snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
  82. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
  83. snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
  84. snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
  85. snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
  86. snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
  87. snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
  88. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
  89. snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
  90. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
  91. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
  92. snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
  93. snowflake/ml/modeling/impute/knn_imputer.py +51 -26
  94. snowflake/ml/modeling/impute/missing_indicator.py +51 -26
  95. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
  96. snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
  97. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
  98. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
  99. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
  100. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
  101. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
  102. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
  103. snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
  104. snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
  105. snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
  106. snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
  107. snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
  108. snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
  109. snowflake/ml/modeling/linear_model/lars.py +51 -26
  110. snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
  111. snowflake/ml/modeling/linear_model/lasso.py +51 -26
  112. snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
  113. snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
  114. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
  115. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
  116. snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
  117. snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
  118. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
  119. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
  120. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
  121. snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
  122. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
  123. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
  124. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
  125. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
  126. snowflake/ml/modeling/linear_model/perceptron.py +51 -26
  127. snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
  128. snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
  129. snowflake/ml/modeling/linear_model/ridge.py +51 -26
  130. snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
  131. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
  132. snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
  133. snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
  134. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
  135. snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
  136. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
  137. snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
  138. snowflake/ml/modeling/manifold/isomap.py +51 -26
  139. snowflake/ml/modeling/manifold/mds.py +51 -26
  140. snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
  141. snowflake/ml/modeling/manifold/tsne.py +51 -26
  142. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
  143. snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
  144. snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
  145. snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
  146. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
  147. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
  148. snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
  149. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
  150. snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
  151. snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
  152. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
  153. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
  154. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
  155. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
  156. snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
  157. snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
  158. snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
  159. snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
  160. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
  161. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
  162. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
  163. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
  164. snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
  165. snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
  166. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
  167. snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
  168. snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
  169. snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
  170. snowflake/ml/modeling/svm/linear_svc.py +51 -26
  171. snowflake/ml/modeling/svm/linear_svr.py +51 -26
  172. snowflake/ml/modeling/svm/nu_svc.py +51 -26
  173. snowflake/ml/modeling/svm/nu_svr.py +51 -26
  174. snowflake/ml/modeling/svm/svc.py +51 -26
  175. snowflake/ml/modeling/svm/svr.py +51 -26
  176. snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
  177. snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
  178. snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
  179. snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
  180. snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
  181. snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
  182. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
  183. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
  184. snowflake/ml/registry/model_registry.py +74 -56
  185. snowflake/ml/version.py +1 -1
  186. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
  187. snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
  188. snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
  189. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -325,7 +327,6 @@ class LogisticRegressionCV(BaseTransformer):
325
327
  sample_weight_col: Optional[str] = None,
326
328
  ) -> None:
327
329
  super().__init__()
328
- self.id = str(uuid4()).replace("-", "_").upper()
329
330
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
330
331
 
331
332
  self._deps = list(deps)
@@ -361,6 +362,15 @@ class LogisticRegressionCV(BaseTransformer):
361
362
  self.set_drop_input_cols(drop_input_cols)
362
363
  self.set_sample_weight_col(sample_weight_col)
363
364
 
365
+ def _get_rand_id(self) -> str:
366
+ """
367
+ Generate random id to be used in sproc and stage names.
368
+
369
+ Returns:
370
+ Random id string usable in sproc, table, and stage names.
371
+ """
372
+ return str(uuid4()).replace("-", "_").upper()
373
+
364
374
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
365
375
  """
366
376
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -439,7 +449,7 @@ class LogisticRegressionCV(BaseTransformer):
439
449
  cp.dump(self._sklearn_object, local_transform_file)
440
450
 
441
451
  # Create temp stage to run fit.
442
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
452
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
443
453
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
444
454
  SqlResultValidator(
445
455
  session=session,
@@ -452,11 +462,12 @@ class LogisticRegressionCV(BaseTransformer):
452
462
  expected_value=f"Stage area {transform_stage_name} successfully created."
453
463
  ).validate()
454
464
 
455
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
465
+ # Use posixpath to construct stage paths
466
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
467
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
456
468
  local_result_file_name = get_temp_file_path()
457
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
458
469
 
459
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
470
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
460
471
  statement_params = telemetry.get_function_usage_statement_params(
461
472
  project=_PROJECT,
462
473
  subproject=_SUBPROJECT,
@@ -482,6 +493,7 @@ class LogisticRegressionCV(BaseTransformer):
482
493
  replace=True,
483
494
  session=session,
484
495
  statement_params=statement_params,
496
+ anonymous=True
485
497
  )
486
498
  def fit_wrapper_sproc(
487
499
  session: Session,
@@ -490,7 +502,8 @@ class LogisticRegressionCV(BaseTransformer):
490
502
  stage_result_file_name: str,
491
503
  input_cols: List[str],
492
504
  label_cols: List[str],
493
- sample_weight_col: Optional[str]
505
+ sample_weight_col: Optional[str],
506
+ statement_params: Dict[str, str]
494
507
  ) -> str:
495
508
  import cloudpickle as cp
496
509
  import numpy as np
@@ -557,15 +570,15 @@ class LogisticRegressionCV(BaseTransformer):
557
570
  api_calls=[Session.call],
558
571
  custom_tags=dict([("autogen", True)]),
559
572
  )
560
- sproc_export_file_name = session.call(
561
- fit_sproc_name,
573
+ sproc_export_file_name = fit_wrapper_sproc(
574
+ session,
562
575
  query,
563
576
  stage_transform_file_name,
564
577
  stage_result_file_name,
565
578
  identifier.get_unescaped_names(self.input_cols),
566
579
  identifier.get_unescaped_names(self.label_cols),
567
580
  identifier.get_unescaped_names(self.sample_weight_col),
568
- statement_params=statement_params,
581
+ statement_params,
569
582
  )
570
583
 
571
584
  if "|" in sproc_export_file_name:
@@ -575,7 +588,7 @@ class LogisticRegressionCV(BaseTransformer):
575
588
  print("\n".join(fields[1:]))
576
589
 
577
590
  session.file.get(
578
- os.path.join(stage_result_file_name, sproc_export_file_name),
591
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
579
592
  local_result_file_name,
580
593
  statement_params=statement_params
581
594
  )
@@ -621,7 +634,7 @@ class LogisticRegressionCV(BaseTransformer):
621
634
 
622
635
  # Register vectorized UDF for batch inference
623
636
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
624
- safe_id=self.id, method=inference_method)
637
+ safe_id=self._get_rand_id(), method=inference_method)
625
638
 
626
639
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
627
640
  # will try to pickle all of self which fails.
@@ -713,7 +726,7 @@ class LogisticRegressionCV(BaseTransformer):
713
726
  return transformed_pandas_df.to_dict("records")
714
727
 
715
728
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
716
- safe_id=self.id
729
+ safe_id=self._get_rand_id()
717
730
  )
718
731
 
719
732
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -880,11 +893,18 @@ class LogisticRegressionCV(BaseTransformer):
880
893
  Transformed dataset.
881
894
  """
882
895
  if isinstance(dataset, DataFrame):
896
+ expected_type_inferred = ""
897
+ # when it is classifier, infer the datatype from label columns
898
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
899
+ expected_type_inferred = convert_sp_to_sf_type(
900
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
901
+ )
902
+
883
903
  output_df = self._batch_inference(
884
904
  dataset=dataset,
885
905
  inference_method="predict",
886
906
  expected_output_cols_list=self.output_cols,
887
- expected_output_cols_type="",
907
+ expected_output_cols_type=expected_type_inferred,
888
908
  )
889
909
  elif isinstance(dataset, pd.DataFrame):
890
910
  output_df = self._sklearn_inference(
@@ -955,10 +975,10 @@ class LogisticRegressionCV(BaseTransformer):
955
975
 
956
976
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
957
977
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
958
- Returns an empty list if current object is not a classifier or not yet fitted.
978
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
959
979
  """
960
980
  if getattr(self._sklearn_object, "classes_", None) is None:
961
- return []
981
+ return [output_cols_prefix]
962
982
 
963
983
  classes = self._sklearn_object.classes_
964
984
  if isinstance(classes, numpy.ndarray):
@@ -1189,7 +1209,7 @@ class LogisticRegressionCV(BaseTransformer):
1189
1209
  cp.dump(self._sklearn_object, local_score_file)
1190
1210
 
1191
1211
  # Create temp stage to run score.
1192
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1212
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1193
1213
  session = dataset._session
1194
1214
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1195
1215
  SqlResultValidator(
@@ -1203,8 +1223,9 @@ class LogisticRegressionCV(BaseTransformer):
1203
1223
  expected_value=f"Stage area {score_stage_name} successfully created."
1204
1224
  ).validate()
1205
1225
 
1206
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1207
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1226
+ # Use posixpath to construct stage paths
1227
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1228
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1208
1229
  statement_params = telemetry.get_function_usage_statement_params(
1209
1230
  project=_PROJECT,
1210
1231
  subproject=_SUBPROJECT,
@@ -1230,6 +1251,7 @@ class LogisticRegressionCV(BaseTransformer):
1230
1251
  replace=True,
1231
1252
  session=session,
1232
1253
  statement_params=statement_params,
1254
+ anonymous=True
1233
1255
  )
1234
1256
  def score_wrapper_sproc(
1235
1257
  session: Session,
@@ -1237,7 +1259,8 @@ class LogisticRegressionCV(BaseTransformer):
1237
1259
  stage_score_file_name: str,
1238
1260
  input_cols: List[str],
1239
1261
  label_cols: List[str],
1240
- sample_weight_col: Optional[str]
1262
+ sample_weight_col: Optional[str],
1263
+ statement_params: Dict[str, str]
1241
1264
  ) -> float:
1242
1265
  import cloudpickle as cp
1243
1266
  import numpy as np
@@ -1287,14 +1310,14 @@ class LogisticRegressionCV(BaseTransformer):
1287
1310
  api_calls=[Session.call],
1288
1311
  custom_tags=dict([("autogen", True)]),
1289
1312
  )
1290
- score = session.call(
1291
- score_sproc_name,
1313
+ score = score_wrapper_sproc(
1314
+ session,
1292
1315
  query,
1293
1316
  stage_score_file_name,
1294
1317
  identifier.get_unescaped_names(self.input_cols),
1295
1318
  identifier.get_unescaped_names(self.label_cols),
1296
1319
  identifier.get_unescaped_names(self.sample_weight_col),
1297
- statement_params=statement_params,
1320
+ statement_params,
1298
1321
  )
1299
1322
 
1300
1323
  cleanup_temp_files([local_score_file_name])
@@ -1312,18 +1335,20 @@ class LogisticRegressionCV(BaseTransformer):
1312
1335
  if self._sklearn_object._estimator_type == 'classifier':
1313
1336
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1314
1337
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1315
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1338
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1339
+ ([] if self._drop_input_cols else inputs) + outputs)
1316
1340
  # For regressor, the type of predict is float64
1317
1341
  elif self._sklearn_object._estimator_type == 'regressor':
1318
1342
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1319
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1320
-
1343
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1344
+ ([] if self._drop_input_cols else inputs) + outputs)
1321
1345
  for prob_func in PROB_FUNCTIONS:
1322
1346
  if hasattr(self, prob_func):
1323
1347
  output_cols_prefix: str = f"{prob_func}_"
1324
1348
  output_column_names = self._get_output_column_names(output_cols_prefix)
1325
1349
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1326
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1350
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1351
+ ([] if self._drop_input_cols else inputs) + outputs)
1327
1352
 
1328
1353
  @property
1329
1354
  def model_signatures(self) -> Dict[str, ModelSignature]:
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -231,7 +233,6 @@ class MultiTaskElasticNet(BaseTransformer):
231
233
  sample_weight_col: Optional[str] = None,
232
234
  ) -> None:
233
235
  super().__init__()
234
- self.id = str(uuid4()).replace("-", "_").upper()
235
236
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
236
237
 
237
238
  self._deps = list(deps)
@@ -259,6 +260,15 @@ class MultiTaskElasticNet(BaseTransformer):
259
260
  self.set_drop_input_cols(drop_input_cols)
260
261
  self.set_sample_weight_col(sample_weight_col)
261
262
 
263
+ def _get_rand_id(self) -> str:
264
+ """
265
+ Generate random id to be used in sproc and stage names.
266
+
267
+ Returns:
268
+ Random id string usable in sproc, table, and stage names.
269
+ """
270
+ return str(uuid4()).replace("-", "_").upper()
271
+
262
272
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
263
273
  """
264
274
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -337,7 +347,7 @@ class MultiTaskElasticNet(BaseTransformer):
337
347
  cp.dump(self._sklearn_object, local_transform_file)
338
348
 
339
349
  # Create temp stage to run fit.
340
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
350
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
341
351
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
342
352
  SqlResultValidator(
343
353
  session=session,
@@ -350,11 +360,12 @@ class MultiTaskElasticNet(BaseTransformer):
350
360
  expected_value=f"Stage area {transform_stage_name} successfully created."
351
361
  ).validate()
352
362
 
353
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
363
+ # Use posixpath to construct stage paths
364
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
365
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
354
366
  local_result_file_name = get_temp_file_path()
355
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
356
367
 
357
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
368
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
358
369
  statement_params = telemetry.get_function_usage_statement_params(
359
370
  project=_PROJECT,
360
371
  subproject=_SUBPROJECT,
@@ -380,6 +391,7 @@ class MultiTaskElasticNet(BaseTransformer):
380
391
  replace=True,
381
392
  session=session,
382
393
  statement_params=statement_params,
394
+ anonymous=True
383
395
  )
384
396
  def fit_wrapper_sproc(
385
397
  session: Session,
@@ -388,7 +400,8 @@ class MultiTaskElasticNet(BaseTransformer):
388
400
  stage_result_file_name: str,
389
401
  input_cols: List[str],
390
402
  label_cols: List[str],
391
- sample_weight_col: Optional[str]
403
+ sample_weight_col: Optional[str],
404
+ statement_params: Dict[str, str]
392
405
  ) -> str:
393
406
  import cloudpickle as cp
394
407
  import numpy as np
@@ -455,15 +468,15 @@ class MultiTaskElasticNet(BaseTransformer):
455
468
  api_calls=[Session.call],
456
469
  custom_tags=dict([("autogen", True)]),
457
470
  )
458
- sproc_export_file_name = session.call(
459
- fit_sproc_name,
471
+ sproc_export_file_name = fit_wrapper_sproc(
472
+ session,
460
473
  query,
461
474
  stage_transform_file_name,
462
475
  stage_result_file_name,
463
476
  identifier.get_unescaped_names(self.input_cols),
464
477
  identifier.get_unescaped_names(self.label_cols),
465
478
  identifier.get_unescaped_names(self.sample_weight_col),
466
- statement_params=statement_params,
479
+ statement_params,
467
480
  )
468
481
 
469
482
  if "|" in sproc_export_file_name:
@@ -473,7 +486,7 @@ class MultiTaskElasticNet(BaseTransformer):
473
486
  print("\n".join(fields[1:]))
474
487
 
475
488
  session.file.get(
476
- os.path.join(stage_result_file_name, sproc_export_file_name),
489
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
477
490
  local_result_file_name,
478
491
  statement_params=statement_params
479
492
  )
@@ -519,7 +532,7 @@ class MultiTaskElasticNet(BaseTransformer):
519
532
 
520
533
  # Register vectorized UDF for batch inference
521
534
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
522
- safe_id=self.id, method=inference_method)
535
+ safe_id=self._get_rand_id(), method=inference_method)
523
536
 
524
537
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
525
538
  # will try to pickle all of self which fails.
@@ -611,7 +624,7 @@ class MultiTaskElasticNet(BaseTransformer):
611
624
  return transformed_pandas_df.to_dict("records")
612
625
 
613
626
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
614
- safe_id=self.id
627
+ safe_id=self._get_rand_id()
615
628
  )
616
629
 
617
630
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -778,11 +791,18 @@ class MultiTaskElasticNet(BaseTransformer):
778
791
  Transformed dataset.
779
792
  """
780
793
  if isinstance(dataset, DataFrame):
794
+ expected_type_inferred = "float"
795
+ # when it is classifier, infer the datatype from label columns
796
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
797
+ expected_type_inferred = convert_sp_to_sf_type(
798
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
799
+ )
800
+
781
801
  output_df = self._batch_inference(
782
802
  dataset=dataset,
783
803
  inference_method="predict",
784
804
  expected_output_cols_list=self.output_cols,
785
- expected_output_cols_type="float",
805
+ expected_output_cols_type=expected_type_inferred,
786
806
  )
787
807
  elif isinstance(dataset, pd.DataFrame):
788
808
  output_df = self._sklearn_inference(
@@ -853,10 +873,10 @@ class MultiTaskElasticNet(BaseTransformer):
853
873
 
854
874
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
855
875
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
856
- Returns an empty list if current object is not a classifier or not yet fitted.
876
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
857
877
  """
858
878
  if getattr(self._sklearn_object, "classes_", None) is None:
859
- return []
879
+ return [output_cols_prefix]
860
880
 
861
881
  classes = self._sklearn_object.classes_
862
882
  if isinstance(classes, numpy.ndarray):
@@ -1081,7 +1101,7 @@ class MultiTaskElasticNet(BaseTransformer):
1081
1101
  cp.dump(self._sklearn_object, local_score_file)
1082
1102
 
1083
1103
  # Create temp stage to run score.
1084
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1104
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1085
1105
  session = dataset._session
1086
1106
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1087
1107
  SqlResultValidator(
@@ -1095,8 +1115,9 @@ class MultiTaskElasticNet(BaseTransformer):
1095
1115
  expected_value=f"Stage area {score_stage_name} successfully created."
1096
1116
  ).validate()
1097
1117
 
1098
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1099
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1118
+ # Use posixpath to construct stage paths
1119
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1120
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1100
1121
  statement_params = telemetry.get_function_usage_statement_params(
1101
1122
  project=_PROJECT,
1102
1123
  subproject=_SUBPROJECT,
@@ -1122,6 +1143,7 @@ class MultiTaskElasticNet(BaseTransformer):
1122
1143
  replace=True,
1123
1144
  session=session,
1124
1145
  statement_params=statement_params,
1146
+ anonymous=True
1125
1147
  )
1126
1148
  def score_wrapper_sproc(
1127
1149
  session: Session,
@@ -1129,7 +1151,8 @@ class MultiTaskElasticNet(BaseTransformer):
1129
1151
  stage_score_file_name: str,
1130
1152
  input_cols: List[str],
1131
1153
  label_cols: List[str],
1132
- sample_weight_col: Optional[str]
1154
+ sample_weight_col: Optional[str],
1155
+ statement_params: Dict[str, str]
1133
1156
  ) -> float:
1134
1157
  import cloudpickle as cp
1135
1158
  import numpy as np
@@ -1179,14 +1202,14 @@ class MultiTaskElasticNet(BaseTransformer):
1179
1202
  api_calls=[Session.call],
1180
1203
  custom_tags=dict([("autogen", True)]),
1181
1204
  )
1182
- score = session.call(
1183
- score_sproc_name,
1205
+ score = score_wrapper_sproc(
1206
+ session,
1184
1207
  query,
1185
1208
  stage_score_file_name,
1186
1209
  identifier.get_unescaped_names(self.input_cols),
1187
1210
  identifier.get_unescaped_names(self.label_cols),
1188
1211
  identifier.get_unescaped_names(self.sample_weight_col),
1189
- statement_params=statement_params,
1212
+ statement_params,
1190
1213
  )
1191
1214
 
1192
1215
  cleanup_temp_files([local_score_file_name])
@@ -1204,18 +1227,20 @@ class MultiTaskElasticNet(BaseTransformer):
1204
1227
  if self._sklearn_object._estimator_type == 'classifier':
1205
1228
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1206
1229
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1207
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1230
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1231
+ ([] if self._drop_input_cols else inputs) + outputs)
1208
1232
  # For regressor, the type of predict is float64
1209
1233
  elif self._sklearn_object._estimator_type == 'regressor':
1210
1234
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1211
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1212
-
1235
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1236
+ ([] if self._drop_input_cols else inputs) + outputs)
1213
1237
  for prob_func in PROB_FUNCTIONS:
1214
1238
  if hasattr(self, prob_func):
1215
1239
  output_cols_prefix: str = f"{prob_func}_"
1216
1240
  output_column_names = self._get_output_column_names(output_cols_prefix)
1217
1241
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1218
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1242
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1243
+ ([] if self._drop_input_cols else inputs) + outputs)
1219
1244
 
1220
1245
  @property
1221
1246
  def model_signatures(self) -> Dict[str, ModelSignature]: