snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. snowflake/ml/_internal/env_utils.py +2 -1
  2. snowflake/ml/_internal/file_utils.py +29 -7
  3. snowflake/ml/_internal/telemetry.py +5 -8
  4. snowflake/ml/_internal/utils/uri.py +7 -2
  5. snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
  6. snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
  7. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
  8. snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
  9. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
  10. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
  11. snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
  12. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
  13. snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
  14. snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
  15. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
  16. snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
  17. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
  18. snowflake/ml/model/_deployer.py +14 -27
  19. snowflake/ml/model/_env.py +4 -4
  20. snowflake/ml/model/_handlers/custom.py +14 -2
  21. snowflake/ml/model/_handlers/pytorch.py +186 -0
  22. snowflake/ml/model/_handlers/sklearn.py +14 -9
  23. snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
  24. snowflake/ml/model/_handlers/torchscript.py +180 -0
  25. snowflake/ml/model/_handlers/xgboost.py +19 -9
  26. snowflake/ml/model/_model.py +3 -2
  27. snowflake/ml/model/_model_meta.py +12 -7
  28. snowflake/ml/model/model_signature.py +446 -66
  29. snowflake/ml/model/type_hints.py +23 -4
  30. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
  31. snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
  32. snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
  33. snowflake/ml/modeling/cluster/birch.py +51 -26
  34. snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
  35. snowflake/ml/modeling/cluster/dbscan.py +51 -26
  36. snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
  37. snowflake/ml/modeling/cluster/k_means.py +51 -26
  38. snowflake/ml/modeling/cluster/mean_shift.py +51 -26
  39. snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
  40. snowflake/ml/modeling/cluster/optics.py +51 -26
  41. snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
  42. snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
  43. snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
  44. snowflake/ml/modeling/compose/column_transformer.py +51 -26
  45. snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
  46. snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
  47. snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
  48. snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
  49. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
  50. snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
  51. snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
  52. snowflake/ml/modeling/covariance/oas.py +51 -26
  53. snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
  54. snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
  55. snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
  56. snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
  57. snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
  58. snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
  59. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
  60. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
  61. snowflake/ml/modeling/decomposition/pca.py +51 -26
  62. snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
  63. snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
  64. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
  65. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
  66. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
  67. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
  68. snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
  69. snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
  70. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
  71. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
  72. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
  73. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
  74. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
  75. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
  76. snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
  77. snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
  78. snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
  79. snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
  80. snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
  81. snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
  82. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
  83. snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
  84. snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
  85. snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
  86. snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
  87. snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
  88. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
  89. snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
  90. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
  91. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
  92. snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
  93. snowflake/ml/modeling/impute/knn_imputer.py +51 -26
  94. snowflake/ml/modeling/impute/missing_indicator.py +51 -26
  95. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
  96. snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
  97. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
  98. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
  99. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
  100. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
  101. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
  102. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
  103. snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
  104. snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
  105. snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
  106. snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
  107. snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
  108. snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
  109. snowflake/ml/modeling/linear_model/lars.py +51 -26
  110. snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
  111. snowflake/ml/modeling/linear_model/lasso.py +51 -26
  112. snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
  113. snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
  114. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
  115. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
  116. snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
  117. snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
  118. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
  119. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
  120. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
  121. snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
  122. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
  123. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
  124. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
  125. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
  126. snowflake/ml/modeling/linear_model/perceptron.py +51 -26
  127. snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
  128. snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
  129. snowflake/ml/modeling/linear_model/ridge.py +51 -26
  130. snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
  131. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
  132. snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
  133. snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
  134. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
  135. snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
  136. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
  137. snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
  138. snowflake/ml/modeling/manifold/isomap.py +51 -26
  139. snowflake/ml/modeling/manifold/mds.py +51 -26
  140. snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
  141. snowflake/ml/modeling/manifold/tsne.py +51 -26
  142. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
  143. snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
  144. snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
  145. snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
  146. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
  147. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
  148. snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
  149. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
  150. snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
  151. snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
  152. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
  153. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
  154. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
  155. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
  156. snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
  157. snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
  158. snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
  159. snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
  160. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
  161. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
  162. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
  163. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
  164. snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
  165. snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
  166. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
  167. snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
  168. snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
  169. snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
  170. snowflake/ml/modeling/svm/linear_svc.py +51 -26
  171. snowflake/ml/modeling/svm/linear_svr.py +51 -26
  172. snowflake/ml/modeling/svm/nu_svc.py +51 -26
  173. snowflake/ml/modeling/svm/nu_svr.py +51 -26
  174. snowflake/ml/modeling/svm/svc.py +51 -26
  175. snowflake/ml/modeling/svm/svr.py +51 -26
  176. snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
  177. snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
  178. snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
  179. snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
  180. snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
  181. snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
  182. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
  183. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
  184. snowflake/ml/registry/model_registry.py +74 -56
  185. snowflake/ml/version.py +1 -1
  186. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
  187. snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
  188. snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
  189. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -211,7 +213,6 @@ class EllipticEnvelope(BaseTransformer):
211
213
  sample_weight_col: Optional[str] = None,
212
214
  ) -> None:
213
215
  super().__init__()
214
- self.id = str(uuid4()).replace("-", "_").upper()
215
216
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
216
217
 
217
218
  self._deps = list(deps)
@@ -235,6 +236,15 @@ class EllipticEnvelope(BaseTransformer):
235
236
  self.set_drop_input_cols(drop_input_cols)
236
237
  self.set_sample_weight_col(sample_weight_col)
237
238
 
239
+ def _get_rand_id(self) -> str:
240
+ """
241
+ Generate random id to be used in sproc and stage names.
242
+
243
+ Returns:
244
+ Random id string usable in sproc, table, and stage names.
245
+ """
246
+ return str(uuid4()).replace("-", "_").upper()
247
+
238
248
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
239
249
  """
240
250
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -313,7 +323,7 @@ class EllipticEnvelope(BaseTransformer):
313
323
  cp.dump(self._sklearn_object, local_transform_file)
314
324
 
315
325
  # Create temp stage to run fit.
316
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
326
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
317
327
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
318
328
  SqlResultValidator(
319
329
  session=session,
@@ -326,11 +336,12 @@ class EllipticEnvelope(BaseTransformer):
326
336
  expected_value=f"Stage area {transform_stage_name} successfully created."
327
337
  ).validate()
328
338
 
329
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
339
+ # Use posixpath to construct stage paths
340
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
341
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
330
342
  local_result_file_name = get_temp_file_path()
331
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
332
343
 
333
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
344
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
334
345
  statement_params = telemetry.get_function_usage_statement_params(
335
346
  project=_PROJECT,
336
347
  subproject=_SUBPROJECT,
@@ -356,6 +367,7 @@ class EllipticEnvelope(BaseTransformer):
356
367
  replace=True,
357
368
  session=session,
358
369
  statement_params=statement_params,
370
+ anonymous=True
359
371
  )
360
372
  def fit_wrapper_sproc(
361
373
  session: Session,
@@ -364,7 +376,8 @@ class EllipticEnvelope(BaseTransformer):
364
376
  stage_result_file_name: str,
365
377
  input_cols: List[str],
366
378
  label_cols: List[str],
367
- sample_weight_col: Optional[str]
379
+ sample_weight_col: Optional[str],
380
+ statement_params: Dict[str, str]
368
381
  ) -> str:
369
382
  import cloudpickle as cp
370
383
  import numpy as np
@@ -431,15 +444,15 @@ class EllipticEnvelope(BaseTransformer):
431
444
  api_calls=[Session.call],
432
445
  custom_tags=dict([("autogen", True)]),
433
446
  )
434
- sproc_export_file_name = session.call(
435
- fit_sproc_name,
447
+ sproc_export_file_name = fit_wrapper_sproc(
448
+ session,
436
449
  query,
437
450
  stage_transform_file_name,
438
451
  stage_result_file_name,
439
452
  identifier.get_unescaped_names(self.input_cols),
440
453
  identifier.get_unescaped_names(self.label_cols),
441
454
  identifier.get_unescaped_names(self.sample_weight_col),
442
- statement_params=statement_params,
455
+ statement_params,
443
456
  )
444
457
 
445
458
  if "|" in sproc_export_file_name:
@@ -449,7 +462,7 @@ class EllipticEnvelope(BaseTransformer):
449
462
  print("\n".join(fields[1:]))
450
463
 
451
464
  session.file.get(
452
- os.path.join(stage_result_file_name, sproc_export_file_name),
465
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
453
466
  local_result_file_name,
454
467
  statement_params=statement_params
455
468
  )
@@ -495,7 +508,7 @@ class EllipticEnvelope(BaseTransformer):
495
508
 
496
509
  # Register vectorized UDF for batch inference
497
510
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
498
- safe_id=self.id, method=inference_method)
511
+ safe_id=self._get_rand_id(), method=inference_method)
499
512
 
500
513
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
501
514
  # will try to pickle all of self which fails.
@@ -587,7 +600,7 @@ class EllipticEnvelope(BaseTransformer):
587
600
  return transformed_pandas_df.to_dict("records")
588
601
 
589
602
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
590
- safe_id=self.id
603
+ safe_id=self._get_rand_id()
591
604
  )
592
605
 
593
606
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -754,11 +767,18 @@ class EllipticEnvelope(BaseTransformer):
754
767
  Transformed dataset.
755
768
  """
756
769
  if isinstance(dataset, DataFrame):
770
+ expected_type_inferred = ""
771
+ # when it is classifier, infer the datatype from label columns
772
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
773
+ expected_type_inferred = convert_sp_to_sf_type(
774
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
775
+ )
776
+
757
777
  output_df = self._batch_inference(
758
778
  dataset=dataset,
759
779
  inference_method="predict",
760
780
  expected_output_cols_list=self.output_cols,
761
- expected_output_cols_type="",
781
+ expected_output_cols_type=expected_type_inferred,
762
782
  )
763
783
  elif isinstance(dataset, pd.DataFrame):
764
784
  output_df = self._sklearn_inference(
@@ -829,10 +849,10 @@ class EllipticEnvelope(BaseTransformer):
829
849
 
830
850
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
831
851
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
832
- Returns an empty list if current object is not a classifier or not yet fitted.
852
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
833
853
  """
834
854
  if getattr(self._sklearn_object, "classes_", None) is None:
835
- return []
855
+ return [output_cols_prefix]
836
856
 
837
857
  classes = self._sklearn_object.classes_
838
858
  if isinstance(classes, numpy.ndarray):
@@ -1059,7 +1079,7 @@ class EllipticEnvelope(BaseTransformer):
1059
1079
  cp.dump(self._sklearn_object, local_score_file)
1060
1080
 
1061
1081
  # Create temp stage to run score.
1062
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1082
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1063
1083
  session = dataset._session
1064
1084
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1065
1085
  SqlResultValidator(
@@ -1073,8 +1093,9 @@ class EllipticEnvelope(BaseTransformer):
1073
1093
  expected_value=f"Stage area {score_stage_name} successfully created."
1074
1094
  ).validate()
1075
1095
 
1076
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1077
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1096
+ # Use posixpath to construct stage paths
1097
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1098
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1078
1099
  statement_params = telemetry.get_function_usage_statement_params(
1079
1100
  project=_PROJECT,
1080
1101
  subproject=_SUBPROJECT,
@@ -1100,6 +1121,7 @@ class EllipticEnvelope(BaseTransformer):
1100
1121
  replace=True,
1101
1122
  session=session,
1102
1123
  statement_params=statement_params,
1124
+ anonymous=True
1103
1125
  )
1104
1126
  def score_wrapper_sproc(
1105
1127
  session: Session,
@@ -1107,7 +1129,8 @@ class EllipticEnvelope(BaseTransformer):
1107
1129
  stage_score_file_name: str,
1108
1130
  input_cols: List[str],
1109
1131
  label_cols: List[str],
1110
- sample_weight_col: Optional[str]
1132
+ sample_weight_col: Optional[str],
1133
+ statement_params: Dict[str, str]
1111
1134
  ) -> float:
1112
1135
  import cloudpickle as cp
1113
1136
  import numpy as np
@@ -1157,14 +1180,14 @@ class EllipticEnvelope(BaseTransformer):
1157
1180
  api_calls=[Session.call],
1158
1181
  custom_tags=dict([("autogen", True)]),
1159
1182
  )
1160
- score = session.call(
1161
- score_sproc_name,
1183
+ score = score_wrapper_sproc(
1184
+ session,
1162
1185
  query,
1163
1186
  stage_score_file_name,
1164
1187
  identifier.get_unescaped_names(self.input_cols),
1165
1188
  identifier.get_unescaped_names(self.label_cols),
1166
1189
  identifier.get_unescaped_names(self.sample_weight_col),
1167
- statement_params=statement_params,
1190
+ statement_params,
1168
1191
  )
1169
1192
 
1170
1193
  cleanup_temp_files([local_score_file_name])
@@ -1182,18 +1205,20 @@ class EllipticEnvelope(BaseTransformer):
1182
1205
  if self._sklearn_object._estimator_type == 'classifier':
1183
1206
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1184
1207
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1185
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1208
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1209
+ ([] if self._drop_input_cols else inputs) + outputs)
1186
1210
  # For regressor, the type of predict is float64
1187
1211
  elif self._sklearn_object._estimator_type == 'regressor':
1188
1212
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1189
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1190
-
1213
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1214
+ ([] if self._drop_input_cols else inputs) + outputs)
1191
1215
  for prob_func in PROB_FUNCTIONS:
1192
1216
  if hasattr(self, prob_func):
1193
1217
  output_cols_prefix: str = f"{prob_func}_"
1194
1218
  output_column_names = self._get_output_column_names(output_cols_prefix)
1195
1219
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1196
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1220
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1221
+ ([] if self._drop_input_cols else inputs) + outputs)
1197
1222
 
1198
1223
  @property
1199
1224
  def model_signatures(self) -> Dict[str, ModelSignature]:
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -190,7 +192,6 @@ class EmpiricalCovariance(BaseTransformer):
190
192
  sample_weight_col: Optional[str] = None,
191
193
  ) -> None:
192
194
  super().__init__()
193
- self.id = str(uuid4()).replace("-", "_").upper()
194
195
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
195
196
 
196
197
  self._deps = list(deps)
@@ -211,6 +212,15 @@ class EmpiricalCovariance(BaseTransformer):
211
212
  self.set_drop_input_cols(drop_input_cols)
212
213
  self.set_sample_weight_col(sample_weight_col)
213
214
 
215
+ def _get_rand_id(self) -> str:
216
+ """
217
+ Generate random id to be used in sproc and stage names.
218
+
219
+ Returns:
220
+ Random id string usable in sproc, table, and stage names.
221
+ """
222
+ return str(uuid4()).replace("-", "_").upper()
223
+
214
224
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
215
225
  """
216
226
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -289,7 +299,7 @@ class EmpiricalCovariance(BaseTransformer):
289
299
  cp.dump(self._sklearn_object, local_transform_file)
290
300
 
291
301
  # Create temp stage to run fit.
292
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
302
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
293
303
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
294
304
  SqlResultValidator(
295
305
  session=session,
@@ -302,11 +312,12 @@ class EmpiricalCovariance(BaseTransformer):
302
312
  expected_value=f"Stage area {transform_stage_name} successfully created."
303
313
  ).validate()
304
314
 
305
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
315
+ # Use posixpath to construct stage paths
316
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
317
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
306
318
  local_result_file_name = get_temp_file_path()
307
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
308
319
 
309
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
320
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
310
321
  statement_params = telemetry.get_function_usage_statement_params(
311
322
  project=_PROJECT,
312
323
  subproject=_SUBPROJECT,
@@ -332,6 +343,7 @@ class EmpiricalCovariance(BaseTransformer):
332
343
  replace=True,
333
344
  session=session,
334
345
  statement_params=statement_params,
346
+ anonymous=True
335
347
  )
336
348
  def fit_wrapper_sproc(
337
349
  session: Session,
@@ -340,7 +352,8 @@ class EmpiricalCovariance(BaseTransformer):
340
352
  stage_result_file_name: str,
341
353
  input_cols: List[str],
342
354
  label_cols: List[str],
343
- sample_weight_col: Optional[str]
355
+ sample_weight_col: Optional[str],
356
+ statement_params: Dict[str, str]
344
357
  ) -> str:
345
358
  import cloudpickle as cp
346
359
  import numpy as np
@@ -407,15 +420,15 @@ class EmpiricalCovariance(BaseTransformer):
407
420
  api_calls=[Session.call],
408
421
  custom_tags=dict([("autogen", True)]),
409
422
  )
410
- sproc_export_file_name = session.call(
411
- fit_sproc_name,
423
+ sproc_export_file_name = fit_wrapper_sproc(
424
+ session,
412
425
  query,
413
426
  stage_transform_file_name,
414
427
  stage_result_file_name,
415
428
  identifier.get_unescaped_names(self.input_cols),
416
429
  identifier.get_unescaped_names(self.label_cols),
417
430
  identifier.get_unescaped_names(self.sample_weight_col),
418
- statement_params=statement_params,
431
+ statement_params,
419
432
  )
420
433
 
421
434
  if "|" in sproc_export_file_name:
@@ -425,7 +438,7 @@ class EmpiricalCovariance(BaseTransformer):
425
438
  print("\n".join(fields[1:]))
426
439
 
427
440
  session.file.get(
428
- os.path.join(stage_result_file_name, sproc_export_file_name),
441
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
429
442
  local_result_file_name,
430
443
  statement_params=statement_params
431
444
  )
@@ -471,7 +484,7 @@ class EmpiricalCovariance(BaseTransformer):
471
484
 
472
485
  # Register vectorized UDF for batch inference
473
486
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
474
- safe_id=self.id, method=inference_method)
487
+ safe_id=self._get_rand_id(), method=inference_method)
475
488
 
476
489
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
477
490
  # will try to pickle all of self which fails.
@@ -563,7 +576,7 @@ class EmpiricalCovariance(BaseTransformer):
563
576
  return transformed_pandas_df.to_dict("records")
564
577
 
565
578
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
566
- safe_id=self.id
579
+ safe_id=self._get_rand_id()
567
580
  )
568
581
 
569
582
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -728,11 +741,18 @@ class EmpiricalCovariance(BaseTransformer):
728
741
  Transformed dataset.
729
742
  """
730
743
  if isinstance(dataset, DataFrame):
744
+ expected_type_inferred = ""
745
+ # when it is classifier, infer the datatype from label columns
746
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
747
+ expected_type_inferred = convert_sp_to_sf_type(
748
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
749
+ )
750
+
731
751
  output_df = self._batch_inference(
732
752
  dataset=dataset,
733
753
  inference_method="predict",
734
754
  expected_output_cols_list=self.output_cols,
735
- expected_output_cols_type="",
755
+ expected_output_cols_type=expected_type_inferred,
736
756
  )
737
757
  elif isinstance(dataset, pd.DataFrame):
738
758
  output_df = self._sklearn_inference(
@@ -803,10 +823,10 @@ class EmpiricalCovariance(BaseTransformer):
803
823
 
804
824
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
805
825
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
806
- Returns an empty list if current object is not a classifier or not yet fitted.
826
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
807
827
  """
808
828
  if getattr(self._sklearn_object, "classes_", None) is None:
809
- return []
829
+ return [output_cols_prefix]
810
830
 
811
831
  classes = self._sklearn_object.classes_
812
832
  if isinstance(classes, numpy.ndarray):
@@ -1031,7 +1051,7 @@ class EmpiricalCovariance(BaseTransformer):
1031
1051
  cp.dump(self._sklearn_object, local_score_file)
1032
1052
 
1033
1053
  # Create temp stage to run score.
1034
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1054
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1035
1055
  session = dataset._session
1036
1056
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1037
1057
  SqlResultValidator(
@@ -1045,8 +1065,9 @@ class EmpiricalCovariance(BaseTransformer):
1045
1065
  expected_value=f"Stage area {score_stage_name} successfully created."
1046
1066
  ).validate()
1047
1067
 
1048
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1049
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1068
+ # Use posixpath to construct stage paths
1069
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1070
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1050
1071
  statement_params = telemetry.get_function_usage_statement_params(
1051
1072
  project=_PROJECT,
1052
1073
  subproject=_SUBPROJECT,
@@ -1072,6 +1093,7 @@ class EmpiricalCovariance(BaseTransformer):
1072
1093
  replace=True,
1073
1094
  session=session,
1074
1095
  statement_params=statement_params,
1096
+ anonymous=True
1075
1097
  )
1076
1098
  def score_wrapper_sproc(
1077
1099
  session: Session,
@@ -1079,7 +1101,8 @@ class EmpiricalCovariance(BaseTransformer):
1079
1101
  stage_score_file_name: str,
1080
1102
  input_cols: List[str],
1081
1103
  label_cols: List[str],
1082
- sample_weight_col: Optional[str]
1104
+ sample_weight_col: Optional[str],
1105
+ statement_params: Dict[str, str]
1083
1106
  ) -> float:
1084
1107
  import cloudpickle as cp
1085
1108
  import numpy as np
@@ -1129,14 +1152,14 @@ class EmpiricalCovariance(BaseTransformer):
1129
1152
  api_calls=[Session.call],
1130
1153
  custom_tags=dict([("autogen", True)]),
1131
1154
  )
1132
- score = session.call(
1133
- score_sproc_name,
1155
+ score = score_wrapper_sproc(
1156
+ session,
1134
1157
  query,
1135
1158
  stage_score_file_name,
1136
1159
  identifier.get_unescaped_names(self.input_cols),
1137
1160
  identifier.get_unescaped_names(self.label_cols),
1138
1161
  identifier.get_unescaped_names(self.sample_weight_col),
1139
- statement_params=statement_params,
1162
+ statement_params,
1140
1163
  )
1141
1164
 
1142
1165
  cleanup_temp_files([local_score_file_name])
@@ -1154,18 +1177,20 @@ class EmpiricalCovariance(BaseTransformer):
1154
1177
  if self._sklearn_object._estimator_type == 'classifier':
1155
1178
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1156
1179
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1157
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1180
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1181
+ ([] if self._drop_input_cols else inputs) + outputs)
1158
1182
  # For regressor, the type of predict is float64
1159
1183
  elif self._sklearn_object._estimator_type == 'regressor':
1160
1184
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1161
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1162
-
1185
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1186
+ ([] if self._drop_input_cols else inputs) + outputs)
1163
1187
  for prob_func in PROB_FUNCTIONS:
1164
1188
  if hasattr(self, prob_func):
1165
1189
  output_cols_prefix: str = f"{prob_func}_"
1166
1190
  output_column_names = self._get_output_column_names(output_cols_prefix)
1167
1191
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1168
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1192
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1193
+ ([] if self._drop_input_cols else inputs) + outputs)
1169
1194
 
1170
1195
  @property
1171
1196
  def model_signatures(self) -> Dict[str, ModelSignature]: