snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. snowflake/ml/_internal/env_utils.py +2 -1
  2. snowflake/ml/_internal/file_utils.py +29 -7
  3. snowflake/ml/_internal/telemetry.py +5 -8
  4. snowflake/ml/_internal/utils/uri.py +7 -2
  5. snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
  6. snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
  7. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
  8. snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
  9. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
  10. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
  11. snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
  12. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
  13. snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
  14. snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
  15. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
  16. snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
  17. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
  18. snowflake/ml/model/_deployer.py +14 -27
  19. snowflake/ml/model/_env.py +4 -4
  20. snowflake/ml/model/_handlers/custom.py +14 -2
  21. snowflake/ml/model/_handlers/pytorch.py +186 -0
  22. snowflake/ml/model/_handlers/sklearn.py +14 -9
  23. snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
  24. snowflake/ml/model/_handlers/torchscript.py +180 -0
  25. snowflake/ml/model/_handlers/xgboost.py +19 -9
  26. snowflake/ml/model/_model.py +3 -2
  27. snowflake/ml/model/_model_meta.py +12 -7
  28. snowflake/ml/model/model_signature.py +446 -66
  29. snowflake/ml/model/type_hints.py +23 -4
  30. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
  31. snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
  32. snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
  33. snowflake/ml/modeling/cluster/birch.py +51 -26
  34. snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
  35. snowflake/ml/modeling/cluster/dbscan.py +51 -26
  36. snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
  37. snowflake/ml/modeling/cluster/k_means.py +51 -26
  38. snowflake/ml/modeling/cluster/mean_shift.py +51 -26
  39. snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
  40. snowflake/ml/modeling/cluster/optics.py +51 -26
  41. snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
  42. snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
  43. snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
  44. snowflake/ml/modeling/compose/column_transformer.py +51 -26
  45. snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
  46. snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
  47. snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
  48. snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
  49. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
  50. snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
  51. snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
  52. snowflake/ml/modeling/covariance/oas.py +51 -26
  53. snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
  54. snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
  55. snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
  56. snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
  57. snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
  58. snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
  59. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
  60. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
  61. snowflake/ml/modeling/decomposition/pca.py +51 -26
  62. snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
  63. snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
  64. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
  65. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
  66. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
  67. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
  68. snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
  69. snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
  70. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
  71. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
  72. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
  73. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
  74. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
  75. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
  76. snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
  77. snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
  78. snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
  79. snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
  80. snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
  81. snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
  82. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
  83. snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
  84. snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
  85. snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
  86. snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
  87. snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
  88. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
  89. snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
  90. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
  91. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
  92. snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
  93. snowflake/ml/modeling/impute/knn_imputer.py +51 -26
  94. snowflake/ml/modeling/impute/missing_indicator.py +51 -26
  95. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
  96. snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
  97. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
  98. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
  99. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
  100. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
  101. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
  102. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
  103. snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
  104. snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
  105. snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
  106. snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
  107. snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
  108. snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
  109. snowflake/ml/modeling/linear_model/lars.py +51 -26
  110. snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
  111. snowflake/ml/modeling/linear_model/lasso.py +51 -26
  112. snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
  113. snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
  114. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
  115. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
  116. snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
  117. snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
  118. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
  119. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
  120. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
  121. snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
  122. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
  123. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
  124. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
  125. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
  126. snowflake/ml/modeling/linear_model/perceptron.py +51 -26
  127. snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
  128. snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
  129. snowflake/ml/modeling/linear_model/ridge.py +51 -26
  130. snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
  131. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
  132. snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
  133. snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
  134. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
  135. snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
  136. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
  137. snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
  138. snowflake/ml/modeling/manifold/isomap.py +51 -26
  139. snowflake/ml/modeling/manifold/mds.py +51 -26
  140. snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
  141. snowflake/ml/modeling/manifold/tsne.py +51 -26
  142. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
  143. snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
  144. snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
  145. snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
  146. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
  147. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
  148. snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
  149. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
  150. snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
  151. snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
  152. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
  153. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
  154. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
  155. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
  156. snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
  157. snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
  158. snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
  159. snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
  160. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
  161. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
  162. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
  163. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
  164. snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
  165. snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
  166. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
  167. snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
  168. snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
  169. snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
  170. snowflake/ml/modeling/svm/linear_svc.py +51 -26
  171. snowflake/ml/modeling/svm/linear_svr.py +51 -26
  172. snowflake/ml/modeling/svm/nu_svc.py +51 -26
  173. snowflake/ml/modeling/svm/nu_svr.py +51 -26
  174. snowflake/ml/modeling/svm/svc.py +51 -26
  175. snowflake/ml/modeling/svm/svr.py +51 -26
  176. snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
  177. snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
  178. snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
  179. snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
  180. snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
  181. snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
  182. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
  183. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
  184. snowflake/ml/registry/model_registry.py +74 -56
  185. snowflake/ml/version.py +1 -1
  186. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
  187. snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
  188. snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
  189. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -255,7 +257,6 @@ class MultiTaskLassoCV(BaseTransformer):
255
257
  sample_weight_col: Optional[str] = None,
256
258
  ) -> None:
257
259
  super().__init__()
258
- self.id = str(uuid4()).replace("-", "_").upper()
259
260
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
260
261
 
261
262
  self._deps = list(deps)
@@ -286,6 +287,15 @@ class MultiTaskLassoCV(BaseTransformer):
286
287
  self.set_drop_input_cols(drop_input_cols)
287
288
  self.set_sample_weight_col(sample_weight_col)
288
289
 
290
+ def _get_rand_id(self) -> str:
291
+ """
292
+ Generate random id to be used in sproc and stage names.
293
+
294
+ Returns:
295
+ Random id string usable in sproc, table, and stage names.
296
+ """
297
+ return str(uuid4()).replace("-", "_").upper()
298
+
289
299
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
290
300
  """
291
301
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -364,7 +374,7 @@ class MultiTaskLassoCV(BaseTransformer):
364
374
  cp.dump(self._sklearn_object, local_transform_file)
365
375
 
366
376
  # Create temp stage to run fit.
367
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
377
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
368
378
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
369
379
  SqlResultValidator(
370
380
  session=session,
@@ -377,11 +387,12 @@ class MultiTaskLassoCV(BaseTransformer):
377
387
  expected_value=f"Stage area {transform_stage_name} successfully created."
378
388
  ).validate()
379
389
 
380
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
390
+ # Use posixpath to construct stage paths
391
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
392
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
381
393
  local_result_file_name = get_temp_file_path()
382
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
383
394
 
384
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
395
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
385
396
  statement_params = telemetry.get_function_usage_statement_params(
386
397
  project=_PROJECT,
387
398
  subproject=_SUBPROJECT,
@@ -407,6 +418,7 @@ class MultiTaskLassoCV(BaseTransformer):
407
418
  replace=True,
408
419
  session=session,
409
420
  statement_params=statement_params,
421
+ anonymous=True
410
422
  )
411
423
  def fit_wrapper_sproc(
412
424
  session: Session,
@@ -415,7 +427,8 @@ class MultiTaskLassoCV(BaseTransformer):
415
427
  stage_result_file_name: str,
416
428
  input_cols: List[str],
417
429
  label_cols: List[str],
418
- sample_weight_col: Optional[str]
430
+ sample_weight_col: Optional[str],
431
+ statement_params: Dict[str, str]
419
432
  ) -> str:
420
433
  import cloudpickle as cp
421
434
  import numpy as np
@@ -482,15 +495,15 @@ class MultiTaskLassoCV(BaseTransformer):
482
495
  api_calls=[Session.call],
483
496
  custom_tags=dict([("autogen", True)]),
484
497
  )
485
- sproc_export_file_name = session.call(
486
- fit_sproc_name,
498
+ sproc_export_file_name = fit_wrapper_sproc(
499
+ session,
487
500
  query,
488
501
  stage_transform_file_name,
489
502
  stage_result_file_name,
490
503
  identifier.get_unescaped_names(self.input_cols),
491
504
  identifier.get_unescaped_names(self.label_cols),
492
505
  identifier.get_unescaped_names(self.sample_weight_col),
493
- statement_params=statement_params,
506
+ statement_params,
494
507
  )
495
508
 
496
509
  if "|" in sproc_export_file_name:
@@ -500,7 +513,7 @@ class MultiTaskLassoCV(BaseTransformer):
500
513
  print("\n".join(fields[1:]))
501
514
 
502
515
  session.file.get(
503
- os.path.join(stage_result_file_name, sproc_export_file_name),
516
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
504
517
  local_result_file_name,
505
518
  statement_params=statement_params
506
519
  )
@@ -546,7 +559,7 @@ class MultiTaskLassoCV(BaseTransformer):
546
559
 
547
560
  # Register vectorized UDF for batch inference
548
561
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
549
- safe_id=self.id, method=inference_method)
562
+ safe_id=self._get_rand_id(), method=inference_method)
550
563
 
551
564
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
552
565
  # will try to pickle all of self which fails.
@@ -638,7 +651,7 @@ class MultiTaskLassoCV(BaseTransformer):
638
651
  return transformed_pandas_df.to_dict("records")
639
652
 
640
653
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
641
- safe_id=self.id
654
+ safe_id=self._get_rand_id()
642
655
  )
643
656
 
644
657
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -805,11 +818,18 @@ class MultiTaskLassoCV(BaseTransformer):
805
818
  Transformed dataset.
806
819
  """
807
820
  if isinstance(dataset, DataFrame):
821
+ expected_type_inferred = "float"
822
+ # when it is classifier, infer the datatype from label columns
823
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
824
+ expected_type_inferred = convert_sp_to_sf_type(
825
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
826
+ )
827
+
808
828
  output_df = self._batch_inference(
809
829
  dataset=dataset,
810
830
  inference_method="predict",
811
831
  expected_output_cols_list=self.output_cols,
812
- expected_output_cols_type="float",
832
+ expected_output_cols_type=expected_type_inferred,
813
833
  )
814
834
  elif isinstance(dataset, pd.DataFrame):
815
835
  output_df = self._sklearn_inference(
@@ -880,10 +900,10 @@ class MultiTaskLassoCV(BaseTransformer):
880
900
 
881
901
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
882
902
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
883
- Returns an empty list if current object is not a classifier or not yet fitted.
903
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
884
904
  """
885
905
  if getattr(self._sklearn_object, "classes_", None) is None:
886
- return []
906
+ return [output_cols_prefix]
887
907
 
888
908
  classes = self._sklearn_object.classes_
889
909
  if isinstance(classes, numpy.ndarray):
@@ -1108,7 +1128,7 @@ class MultiTaskLassoCV(BaseTransformer):
1108
1128
  cp.dump(self._sklearn_object, local_score_file)
1109
1129
 
1110
1130
  # Create temp stage to run score.
1111
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1131
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1112
1132
  session = dataset._session
1113
1133
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1114
1134
  SqlResultValidator(
@@ -1122,8 +1142,9 @@ class MultiTaskLassoCV(BaseTransformer):
1122
1142
  expected_value=f"Stage area {score_stage_name} successfully created."
1123
1143
  ).validate()
1124
1144
 
1125
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1126
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1145
+ # Use posixpath to construct stage paths
1146
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1147
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1127
1148
  statement_params = telemetry.get_function_usage_statement_params(
1128
1149
  project=_PROJECT,
1129
1150
  subproject=_SUBPROJECT,
@@ -1149,6 +1170,7 @@ class MultiTaskLassoCV(BaseTransformer):
1149
1170
  replace=True,
1150
1171
  session=session,
1151
1172
  statement_params=statement_params,
1173
+ anonymous=True
1152
1174
  )
1153
1175
  def score_wrapper_sproc(
1154
1176
  session: Session,
@@ -1156,7 +1178,8 @@ class MultiTaskLassoCV(BaseTransformer):
1156
1178
  stage_score_file_name: str,
1157
1179
  input_cols: List[str],
1158
1180
  label_cols: List[str],
1159
- sample_weight_col: Optional[str]
1181
+ sample_weight_col: Optional[str],
1182
+ statement_params: Dict[str, str]
1160
1183
  ) -> float:
1161
1184
  import cloudpickle as cp
1162
1185
  import numpy as np
@@ -1206,14 +1229,14 @@ class MultiTaskLassoCV(BaseTransformer):
1206
1229
  api_calls=[Session.call],
1207
1230
  custom_tags=dict([("autogen", True)]),
1208
1231
  )
1209
- score = session.call(
1210
- score_sproc_name,
1232
+ score = score_wrapper_sproc(
1233
+ session,
1211
1234
  query,
1212
1235
  stage_score_file_name,
1213
1236
  identifier.get_unescaped_names(self.input_cols),
1214
1237
  identifier.get_unescaped_names(self.label_cols),
1215
1238
  identifier.get_unescaped_names(self.sample_weight_col),
1216
- statement_params=statement_params,
1239
+ statement_params,
1217
1240
  )
1218
1241
 
1219
1242
  cleanup_temp_files([local_score_file_name])
@@ -1231,18 +1254,20 @@ class MultiTaskLassoCV(BaseTransformer):
1231
1254
  if self._sklearn_object._estimator_type == 'classifier':
1232
1255
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1233
1256
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1234
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1257
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1258
+ ([] if self._drop_input_cols else inputs) + outputs)
1235
1259
  # For regressor, the type of predict is float64
1236
1260
  elif self._sklearn_object._estimator_type == 'regressor':
1237
1261
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1238
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1239
-
1262
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1263
+ ([] if self._drop_input_cols else inputs) + outputs)
1240
1264
  for prob_func in PROB_FUNCTIONS:
1241
1265
  if hasattr(self, prob_func):
1242
1266
  output_cols_prefix: str = f"{prob_func}_"
1243
1267
  output_column_names = self._get_output_column_names(output_cols_prefix)
1244
1268
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1245
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1269
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1270
+ ([] if self._drop_input_cols else inputs) + outputs)
1246
1271
 
1247
1272
  @property
1248
1273
  def model_signatures(self) -> Dict[str, ModelSignature]:
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -210,7 +212,6 @@ class OrthogonalMatchingPursuit(BaseTransformer):
210
212
  sample_weight_col: Optional[str] = None,
211
213
  ) -> None:
212
214
  super().__init__()
213
- self.id = str(uuid4()).replace("-", "_").upper()
214
215
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
215
216
 
216
217
  self._deps = list(deps)
@@ -234,6 +235,15 @@ class OrthogonalMatchingPursuit(BaseTransformer):
234
235
  self.set_drop_input_cols(drop_input_cols)
235
236
  self.set_sample_weight_col(sample_weight_col)
236
237
 
238
+ def _get_rand_id(self) -> str:
239
+ """
240
+ Generate random id to be used in sproc and stage names.
241
+
242
+ Returns:
243
+ Random id string usable in sproc, table, and stage names.
244
+ """
245
+ return str(uuid4()).replace("-", "_").upper()
246
+
237
247
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
238
248
  """
239
249
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -312,7 +322,7 @@ class OrthogonalMatchingPursuit(BaseTransformer):
312
322
  cp.dump(self._sklearn_object, local_transform_file)
313
323
 
314
324
  # Create temp stage to run fit.
315
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
325
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
316
326
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
317
327
  SqlResultValidator(
318
328
  session=session,
@@ -325,11 +335,12 @@ class OrthogonalMatchingPursuit(BaseTransformer):
325
335
  expected_value=f"Stage area {transform_stage_name} successfully created."
326
336
  ).validate()
327
337
 
328
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
338
+ # Use posixpath to construct stage paths
339
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
340
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
329
341
  local_result_file_name = get_temp_file_path()
330
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
331
342
 
332
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
343
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
333
344
  statement_params = telemetry.get_function_usage_statement_params(
334
345
  project=_PROJECT,
335
346
  subproject=_SUBPROJECT,
@@ -355,6 +366,7 @@ class OrthogonalMatchingPursuit(BaseTransformer):
355
366
  replace=True,
356
367
  session=session,
357
368
  statement_params=statement_params,
369
+ anonymous=True
358
370
  )
359
371
  def fit_wrapper_sproc(
360
372
  session: Session,
@@ -363,7 +375,8 @@ class OrthogonalMatchingPursuit(BaseTransformer):
363
375
  stage_result_file_name: str,
364
376
  input_cols: List[str],
365
377
  label_cols: List[str],
366
- sample_weight_col: Optional[str]
378
+ sample_weight_col: Optional[str],
379
+ statement_params: Dict[str, str]
367
380
  ) -> str:
368
381
  import cloudpickle as cp
369
382
  import numpy as np
@@ -430,15 +443,15 @@ class OrthogonalMatchingPursuit(BaseTransformer):
430
443
  api_calls=[Session.call],
431
444
  custom_tags=dict([("autogen", True)]),
432
445
  )
433
- sproc_export_file_name = session.call(
434
- fit_sproc_name,
446
+ sproc_export_file_name = fit_wrapper_sproc(
447
+ session,
435
448
  query,
436
449
  stage_transform_file_name,
437
450
  stage_result_file_name,
438
451
  identifier.get_unescaped_names(self.input_cols),
439
452
  identifier.get_unescaped_names(self.label_cols),
440
453
  identifier.get_unescaped_names(self.sample_weight_col),
441
- statement_params=statement_params,
454
+ statement_params,
442
455
  )
443
456
 
444
457
  if "|" in sproc_export_file_name:
@@ -448,7 +461,7 @@ class OrthogonalMatchingPursuit(BaseTransformer):
448
461
  print("\n".join(fields[1:]))
449
462
 
450
463
  session.file.get(
451
- os.path.join(stage_result_file_name, sproc_export_file_name),
464
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
452
465
  local_result_file_name,
453
466
  statement_params=statement_params
454
467
  )
@@ -494,7 +507,7 @@ class OrthogonalMatchingPursuit(BaseTransformer):
494
507
 
495
508
  # Register vectorized UDF for batch inference
496
509
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
497
- safe_id=self.id, method=inference_method)
510
+ safe_id=self._get_rand_id(), method=inference_method)
498
511
 
499
512
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
500
513
  # will try to pickle all of self which fails.
@@ -586,7 +599,7 @@ class OrthogonalMatchingPursuit(BaseTransformer):
586
599
  return transformed_pandas_df.to_dict("records")
587
600
 
588
601
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
589
- safe_id=self.id
602
+ safe_id=self._get_rand_id()
590
603
  )
591
604
 
592
605
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -753,11 +766,18 @@ class OrthogonalMatchingPursuit(BaseTransformer):
753
766
  Transformed dataset.
754
767
  """
755
768
  if isinstance(dataset, DataFrame):
769
+ expected_type_inferred = "float"
770
+ # when it is classifier, infer the datatype from label columns
771
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
772
+ expected_type_inferred = convert_sp_to_sf_type(
773
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
774
+ )
775
+
756
776
  output_df = self._batch_inference(
757
777
  dataset=dataset,
758
778
  inference_method="predict",
759
779
  expected_output_cols_list=self.output_cols,
760
- expected_output_cols_type="float",
780
+ expected_output_cols_type=expected_type_inferred,
761
781
  )
762
782
  elif isinstance(dataset, pd.DataFrame):
763
783
  output_df = self._sklearn_inference(
@@ -828,10 +848,10 @@ class OrthogonalMatchingPursuit(BaseTransformer):
828
848
 
829
849
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
830
850
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
831
- Returns an empty list if current object is not a classifier or not yet fitted.
851
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
832
852
  """
833
853
  if getattr(self._sklearn_object, "classes_", None) is None:
834
- return []
854
+ return [output_cols_prefix]
835
855
 
836
856
  classes = self._sklearn_object.classes_
837
857
  if isinstance(classes, numpy.ndarray):
@@ -1056,7 +1076,7 @@ class OrthogonalMatchingPursuit(BaseTransformer):
1056
1076
  cp.dump(self._sklearn_object, local_score_file)
1057
1077
 
1058
1078
  # Create temp stage to run score.
1059
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1079
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1060
1080
  session = dataset._session
1061
1081
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1062
1082
  SqlResultValidator(
@@ -1070,8 +1090,9 @@ class OrthogonalMatchingPursuit(BaseTransformer):
1070
1090
  expected_value=f"Stage area {score_stage_name} successfully created."
1071
1091
  ).validate()
1072
1092
 
1073
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1074
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1093
+ # Use posixpath to construct stage paths
1094
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1095
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1075
1096
  statement_params = telemetry.get_function_usage_statement_params(
1076
1097
  project=_PROJECT,
1077
1098
  subproject=_SUBPROJECT,
@@ -1097,6 +1118,7 @@ class OrthogonalMatchingPursuit(BaseTransformer):
1097
1118
  replace=True,
1098
1119
  session=session,
1099
1120
  statement_params=statement_params,
1121
+ anonymous=True
1100
1122
  )
1101
1123
  def score_wrapper_sproc(
1102
1124
  session: Session,
@@ -1104,7 +1126,8 @@ class OrthogonalMatchingPursuit(BaseTransformer):
1104
1126
  stage_score_file_name: str,
1105
1127
  input_cols: List[str],
1106
1128
  label_cols: List[str],
1107
- sample_weight_col: Optional[str]
1129
+ sample_weight_col: Optional[str],
1130
+ statement_params: Dict[str, str]
1108
1131
  ) -> float:
1109
1132
  import cloudpickle as cp
1110
1133
  import numpy as np
@@ -1154,14 +1177,14 @@ class OrthogonalMatchingPursuit(BaseTransformer):
1154
1177
  api_calls=[Session.call],
1155
1178
  custom_tags=dict([("autogen", True)]),
1156
1179
  )
1157
- score = session.call(
1158
- score_sproc_name,
1180
+ score = score_wrapper_sproc(
1181
+ session,
1159
1182
  query,
1160
1183
  stage_score_file_name,
1161
1184
  identifier.get_unescaped_names(self.input_cols),
1162
1185
  identifier.get_unescaped_names(self.label_cols),
1163
1186
  identifier.get_unescaped_names(self.sample_weight_col),
1164
- statement_params=statement_params,
1187
+ statement_params,
1165
1188
  )
1166
1189
 
1167
1190
  cleanup_temp_files([local_score_file_name])
@@ -1179,18 +1202,20 @@ class OrthogonalMatchingPursuit(BaseTransformer):
1179
1202
  if self._sklearn_object._estimator_type == 'classifier':
1180
1203
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1181
1204
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1182
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1205
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1206
+ ([] if self._drop_input_cols else inputs) + outputs)
1183
1207
  # For regressor, the type of predict is float64
1184
1208
  elif self._sklearn_object._estimator_type == 'regressor':
1185
1209
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1186
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1187
-
1210
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1211
+ ([] if self._drop_input_cols else inputs) + outputs)
1188
1212
  for prob_func in PROB_FUNCTIONS:
1189
1213
  if hasattr(self, prob_func):
1190
1214
  output_cols_prefix: str = f"{prob_func}_"
1191
1215
  output_column_names = self._get_output_column_names(output_cols_prefix)
1192
1216
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1193
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1217
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1218
+ ([] if self._drop_input_cols else inputs) + outputs)
1194
1219
 
1195
1220
  @property
1196
1221
  def model_signatures(self) -> Dict[str, ModelSignature]: