snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. snowflake/ml/_internal/env_utils.py +2 -1
  2. snowflake/ml/_internal/file_utils.py +29 -7
  3. snowflake/ml/_internal/telemetry.py +5 -8
  4. snowflake/ml/_internal/utils/uri.py +7 -2
  5. snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
  6. snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
  7. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
  8. snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
  9. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
  10. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
  11. snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
  12. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
  13. snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
  14. snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
  15. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
  16. snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
  17. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
  18. snowflake/ml/model/_deployer.py +14 -27
  19. snowflake/ml/model/_env.py +4 -4
  20. snowflake/ml/model/_handlers/custom.py +14 -2
  21. snowflake/ml/model/_handlers/pytorch.py +186 -0
  22. snowflake/ml/model/_handlers/sklearn.py +14 -9
  23. snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
  24. snowflake/ml/model/_handlers/torchscript.py +180 -0
  25. snowflake/ml/model/_handlers/xgboost.py +19 -9
  26. snowflake/ml/model/_model.py +3 -2
  27. snowflake/ml/model/_model_meta.py +12 -7
  28. snowflake/ml/model/model_signature.py +446 -66
  29. snowflake/ml/model/type_hints.py +23 -4
  30. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
  31. snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
  32. snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
  33. snowflake/ml/modeling/cluster/birch.py +51 -26
  34. snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
  35. snowflake/ml/modeling/cluster/dbscan.py +51 -26
  36. snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
  37. snowflake/ml/modeling/cluster/k_means.py +51 -26
  38. snowflake/ml/modeling/cluster/mean_shift.py +51 -26
  39. snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
  40. snowflake/ml/modeling/cluster/optics.py +51 -26
  41. snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
  42. snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
  43. snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
  44. snowflake/ml/modeling/compose/column_transformer.py +51 -26
  45. snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
  46. snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
  47. snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
  48. snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
  49. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
  50. snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
  51. snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
  52. snowflake/ml/modeling/covariance/oas.py +51 -26
  53. snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
  54. snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
  55. snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
  56. snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
  57. snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
  58. snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
  59. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
  60. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
  61. snowflake/ml/modeling/decomposition/pca.py +51 -26
  62. snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
  63. snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
  64. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
  65. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
  66. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
  67. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
  68. snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
  69. snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
  70. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
  71. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
  72. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
  73. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
  74. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
  75. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
  76. snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
  77. snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
  78. snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
  79. snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
  80. snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
  81. snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
  82. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
  83. snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
  84. snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
  85. snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
  86. snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
  87. snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
  88. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
  89. snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
  90. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
  91. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
  92. snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
  93. snowflake/ml/modeling/impute/knn_imputer.py +51 -26
  94. snowflake/ml/modeling/impute/missing_indicator.py +51 -26
  95. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
  96. snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
  97. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
  98. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
  99. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
  100. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
  101. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
  102. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
  103. snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
  104. snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
  105. snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
  106. snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
  107. snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
  108. snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
  109. snowflake/ml/modeling/linear_model/lars.py +51 -26
  110. snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
  111. snowflake/ml/modeling/linear_model/lasso.py +51 -26
  112. snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
  113. snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
  114. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
  115. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
  116. snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
  117. snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
  118. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
  119. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
  120. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
  121. snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
  122. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
  123. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
  124. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
  125. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
  126. snowflake/ml/modeling/linear_model/perceptron.py +51 -26
  127. snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
  128. snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
  129. snowflake/ml/modeling/linear_model/ridge.py +51 -26
  130. snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
  131. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
  132. snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
  133. snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
  134. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
  135. snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
  136. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
  137. snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
  138. snowflake/ml/modeling/manifold/isomap.py +51 -26
  139. snowflake/ml/modeling/manifold/mds.py +51 -26
  140. snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
  141. snowflake/ml/modeling/manifold/tsne.py +51 -26
  142. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
  143. snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
  144. snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
  145. snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
  146. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
  147. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
  148. snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
  149. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
  150. snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
  151. snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
  152. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
  153. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
  154. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
  155. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
  156. snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
  157. snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
  158. snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
  159. snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
  160. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
  161. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
  162. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
  163. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
  164. snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
  165. snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
  166. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
  167. snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
  168. snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
  169. snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
  170. snowflake/ml/modeling/svm/linear_svc.py +51 -26
  171. snowflake/ml/modeling/svm/linear_svr.py +51 -26
  172. snowflake/ml/modeling/svm/nu_svc.py +51 -26
  173. snowflake/ml/modeling/svm/nu_svr.py +51 -26
  174. snowflake/ml/modeling/svm/svc.py +51 -26
  175. snowflake/ml/modeling/svm/svr.py +51 -26
  176. snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
  177. snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
  178. snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
  179. snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
  180. snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
  181. snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
  182. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
  183. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
  184. snowflake/ml/registry/model_registry.py +74 -56
  185. snowflake/ml/version.py +1 -1
  186. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
  187. snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
  188. snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
  189. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -196,7 +198,6 @@ class LedoitWolf(BaseTransformer):
196
198
  sample_weight_col: Optional[str] = None,
197
199
  ) -> None:
198
200
  super().__init__()
199
- self.id = str(uuid4()).replace("-", "_").upper()
200
201
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
201
202
 
202
203
  self._deps = list(deps)
@@ -218,6 +219,15 @@ class LedoitWolf(BaseTransformer):
218
219
  self.set_drop_input_cols(drop_input_cols)
219
220
  self.set_sample_weight_col(sample_weight_col)
220
221
 
222
+ def _get_rand_id(self) -> str:
223
+ """
224
+ Generate random id to be used in sproc and stage names.
225
+
226
+ Returns:
227
+ Random id string usable in sproc, table, and stage names.
228
+ """
229
+ return str(uuid4()).replace("-", "_").upper()
230
+
221
231
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
222
232
  """
223
233
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -296,7 +306,7 @@ class LedoitWolf(BaseTransformer):
296
306
  cp.dump(self._sklearn_object, local_transform_file)
297
307
 
298
308
  # Create temp stage to run fit.
299
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
309
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
300
310
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
301
311
  SqlResultValidator(
302
312
  session=session,
@@ -309,11 +319,12 @@ class LedoitWolf(BaseTransformer):
309
319
  expected_value=f"Stage area {transform_stage_name} successfully created."
310
320
  ).validate()
311
321
 
312
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
322
+ # Use posixpath to construct stage paths
323
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
324
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
313
325
  local_result_file_name = get_temp_file_path()
314
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
315
326
 
316
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
327
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
317
328
  statement_params = telemetry.get_function_usage_statement_params(
318
329
  project=_PROJECT,
319
330
  subproject=_SUBPROJECT,
@@ -339,6 +350,7 @@ class LedoitWolf(BaseTransformer):
339
350
  replace=True,
340
351
  session=session,
341
352
  statement_params=statement_params,
353
+ anonymous=True
342
354
  )
343
355
  def fit_wrapper_sproc(
344
356
  session: Session,
@@ -347,7 +359,8 @@ class LedoitWolf(BaseTransformer):
347
359
  stage_result_file_name: str,
348
360
  input_cols: List[str],
349
361
  label_cols: List[str],
350
- sample_weight_col: Optional[str]
362
+ sample_weight_col: Optional[str],
363
+ statement_params: Dict[str, str]
351
364
  ) -> str:
352
365
  import cloudpickle as cp
353
366
  import numpy as np
@@ -414,15 +427,15 @@ class LedoitWolf(BaseTransformer):
414
427
  api_calls=[Session.call],
415
428
  custom_tags=dict([("autogen", True)]),
416
429
  )
417
- sproc_export_file_name = session.call(
418
- fit_sproc_name,
430
+ sproc_export_file_name = fit_wrapper_sproc(
431
+ session,
419
432
  query,
420
433
  stage_transform_file_name,
421
434
  stage_result_file_name,
422
435
  identifier.get_unescaped_names(self.input_cols),
423
436
  identifier.get_unescaped_names(self.label_cols),
424
437
  identifier.get_unescaped_names(self.sample_weight_col),
425
- statement_params=statement_params,
438
+ statement_params,
426
439
  )
427
440
 
428
441
  if "|" in sproc_export_file_name:
@@ -432,7 +445,7 @@ class LedoitWolf(BaseTransformer):
432
445
  print("\n".join(fields[1:]))
433
446
 
434
447
  session.file.get(
435
- os.path.join(stage_result_file_name, sproc_export_file_name),
448
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
436
449
  local_result_file_name,
437
450
  statement_params=statement_params
438
451
  )
@@ -478,7 +491,7 @@ class LedoitWolf(BaseTransformer):
478
491
 
479
492
  # Register vectorized UDF for batch inference
480
493
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
481
- safe_id=self.id, method=inference_method)
494
+ safe_id=self._get_rand_id(), method=inference_method)
482
495
 
483
496
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
484
497
  # will try to pickle all of self which fails.
@@ -570,7 +583,7 @@ class LedoitWolf(BaseTransformer):
570
583
  return transformed_pandas_df.to_dict("records")
571
584
 
572
585
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
573
- safe_id=self.id
586
+ safe_id=self._get_rand_id()
574
587
  )
575
588
 
576
589
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -735,11 +748,18 @@ class LedoitWolf(BaseTransformer):
735
748
  Transformed dataset.
736
749
  """
737
750
  if isinstance(dataset, DataFrame):
751
+ expected_type_inferred = ""
752
+ # when it is classifier, infer the datatype from label columns
753
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
754
+ expected_type_inferred = convert_sp_to_sf_type(
755
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
756
+ )
757
+
738
758
  output_df = self._batch_inference(
739
759
  dataset=dataset,
740
760
  inference_method="predict",
741
761
  expected_output_cols_list=self.output_cols,
742
- expected_output_cols_type="",
762
+ expected_output_cols_type=expected_type_inferred,
743
763
  )
744
764
  elif isinstance(dataset, pd.DataFrame):
745
765
  output_df = self._sklearn_inference(
@@ -810,10 +830,10 @@ class LedoitWolf(BaseTransformer):
810
830
 
811
831
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
812
832
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
813
- Returns an empty list if current object is not a classifier or not yet fitted.
833
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
814
834
  """
815
835
  if getattr(self._sklearn_object, "classes_", None) is None:
816
- return []
836
+ return [output_cols_prefix]
817
837
 
818
838
  classes = self._sklearn_object.classes_
819
839
  if isinstance(classes, numpy.ndarray):
@@ -1038,7 +1058,7 @@ class LedoitWolf(BaseTransformer):
1038
1058
  cp.dump(self._sklearn_object, local_score_file)
1039
1059
 
1040
1060
  # Create temp stage to run score.
1041
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1061
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1042
1062
  session = dataset._session
1043
1063
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1044
1064
  SqlResultValidator(
@@ -1052,8 +1072,9 @@ class LedoitWolf(BaseTransformer):
1052
1072
  expected_value=f"Stage area {score_stage_name} successfully created."
1053
1073
  ).validate()
1054
1074
 
1055
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1056
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1075
+ # Use posixpath to construct stage paths
1076
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1077
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1057
1078
  statement_params = telemetry.get_function_usage_statement_params(
1058
1079
  project=_PROJECT,
1059
1080
  subproject=_SUBPROJECT,
@@ -1079,6 +1100,7 @@ class LedoitWolf(BaseTransformer):
1079
1100
  replace=True,
1080
1101
  session=session,
1081
1102
  statement_params=statement_params,
1103
+ anonymous=True
1082
1104
  )
1083
1105
  def score_wrapper_sproc(
1084
1106
  session: Session,
@@ -1086,7 +1108,8 @@ class LedoitWolf(BaseTransformer):
1086
1108
  stage_score_file_name: str,
1087
1109
  input_cols: List[str],
1088
1110
  label_cols: List[str],
1089
- sample_weight_col: Optional[str]
1111
+ sample_weight_col: Optional[str],
1112
+ statement_params: Dict[str, str]
1090
1113
  ) -> float:
1091
1114
  import cloudpickle as cp
1092
1115
  import numpy as np
@@ -1136,14 +1159,14 @@ class LedoitWolf(BaseTransformer):
1136
1159
  api_calls=[Session.call],
1137
1160
  custom_tags=dict([("autogen", True)]),
1138
1161
  )
1139
- score = session.call(
1140
- score_sproc_name,
1162
+ score = score_wrapper_sproc(
1163
+ session,
1141
1164
  query,
1142
1165
  stage_score_file_name,
1143
1166
  identifier.get_unescaped_names(self.input_cols),
1144
1167
  identifier.get_unescaped_names(self.label_cols),
1145
1168
  identifier.get_unescaped_names(self.sample_weight_col),
1146
- statement_params=statement_params,
1169
+ statement_params,
1147
1170
  )
1148
1171
 
1149
1172
  cleanup_temp_files([local_score_file_name])
@@ -1161,18 +1184,20 @@ class LedoitWolf(BaseTransformer):
1161
1184
  if self._sklearn_object._estimator_type == 'classifier':
1162
1185
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1163
1186
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1164
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1187
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1188
+ ([] if self._drop_input_cols else inputs) + outputs)
1165
1189
  # For regressor, the type of predict is float64
1166
1190
  elif self._sklearn_object._estimator_type == 'regressor':
1167
1191
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1168
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1169
-
1192
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1193
+ ([] if self._drop_input_cols else inputs) + outputs)
1170
1194
  for prob_func in PROB_FUNCTIONS:
1171
1195
  if hasattr(self, prob_func):
1172
1196
  output_cols_prefix: str = f"{prob_func}_"
1173
1197
  output_column_names = self._get_output_column_names(output_cols_prefix)
1174
1198
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1175
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1199
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1200
+ ([] if self._drop_input_cols else inputs) + outputs)
1176
1201
 
1177
1202
  @property
1178
1203
  def model_signatures(self) -> Dict[str, ModelSignature]:
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -207,7 +209,6 @@ class MinCovDet(BaseTransformer):
207
209
  sample_weight_col: Optional[str] = None,
208
210
  ) -> None:
209
211
  super().__init__()
210
- self.id = str(uuid4()).replace("-", "_").upper()
211
212
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
212
213
 
213
214
  self._deps = list(deps)
@@ -230,6 +231,15 @@ class MinCovDet(BaseTransformer):
230
231
  self.set_drop_input_cols(drop_input_cols)
231
232
  self.set_sample_weight_col(sample_weight_col)
232
233
 
234
+ def _get_rand_id(self) -> str:
235
+ """
236
+ Generate random id to be used in sproc and stage names.
237
+
238
+ Returns:
239
+ Random id string usable in sproc, table, and stage names.
240
+ """
241
+ return str(uuid4()).replace("-", "_").upper()
242
+
233
243
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
234
244
  """
235
245
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -308,7 +318,7 @@ class MinCovDet(BaseTransformer):
308
318
  cp.dump(self._sklearn_object, local_transform_file)
309
319
 
310
320
  # Create temp stage to run fit.
311
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
321
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
312
322
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
313
323
  SqlResultValidator(
314
324
  session=session,
@@ -321,11 +331,12 @@ class MinCovDet(BaseTransformer):
321
331
  expected_value=f"Stage area {transform_stage_name} successfully created."
322
332
  ).validate()
323
333
 
324
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
334
+ # Use posixpath to construct stage paths
335
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
336
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
325
337
  local_result_file_name = get_temp_file_path()
326
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
327
338
 
328
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
339
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
329
340
  statement_params = telemetry.get_function_usage_statement_params(
330
341
  project=_PROJECT,
331
342
  subproject=_SUBPROJECT,
@@ -351,6 +362,7 @@ class MinCovDet(BaseTransformer):
351
362
  replace=True,
352
363
  session=session,
353
364
  statement_params=statement_params,
365
+ anonymous=True
354
366
  )
355
367
  def fit_wrapper_sproc(
356
368
  session: Session,
@@ -359,7 +371,8 @@ class MinCovDet(BaseTransformer):
359
371
  stage_result_file_name: str,
360
372
  input_cols: List[str],
361
373
  label_cols: List[str],
362
- sample_weight_col: Optional[str]
374
+ sample_weight_col: Optional[str],
375
+ statement_params: Dict[str, str]
363
376
  ) -> str:
364
377
  import cloudpickle as cp
365
378
  import numpy as np
@@ -426,15 +439,15 @@ class MinCovDet(BaseTransformer):
426
439
  api_calls=[Session.call],
427
440
  custom_tags=dict([("autogen", True)]),
428
441
  )
429
- sproc_export_file_name = session.call(
430
- fit_sproc_name,
442
+ sproc_export_file_name = fit_wrapper_sproc(
443
+ session,
431
444
  query,
432
445
  stage_transform_file_name,
433
446
  stage_result_file_name,
434
447
  identifier.get_unescaped_names(self.input_cols),
435
448
  identifier.get_unescaped_names(self.label_cols),
436
449
  identifier.get_unescaped_names(self.sample_weight_col),
437
- statement_params=statement_params,
450
+ statement_params,
438
451
  )
439
452
 
440
453
  if "|" in sproc_export_file_name:
@@ -444,7 +457,7 @@ class MinCovDet(BaseTransformer):
444
457
  print("\n".join(fields[1:]))
445
458
 
446
459
  session.file.get(
447
- os.path.join(stage_result_file_name, sproc_export_file_name),
460
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
448
461
  local_result_file_name,
449
462
  statement_params=statement_params
450
463
  )
@@ -490,7 +503,7 @@ class MinCovDet(BaseTransformer):
490
503
 
491
504
  # Register vectorized UDF for batch inference
492
505
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
493
- safe_id=self.id, method=inference_method)
506
+ safe_id=self._get_rand_id(), method=inference_method)
494
507
 
495
508
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
496
509
  # will try to pickle all of self which fails.
@@ -582,7 +595,7 @@ class MinCovDet(BaseTransformer):
582
595
  return transformed_pandas_df.to_dict("records")
583
596
 
584
597
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
585
- safe_id=self.id
598
+ safe_id=self._get_rand_id()
586
599
  )
587
600
 
588
601
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -747,11 +760,18 @@ class MinCovDet(BaseTransformer):
747
760
  Transformed dataset.
748
761
  """
749
762
  if isinstance(dataset, DataFrame):
763
+ expected_type_inferred = ""
764
+ # when it is classifier, infer the datatype from label columns
765
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
766
+ expected_type_inferred = convert_sp_to_sf_type(
767
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
768
+ )
769
+
750
770
  output_df = self._batch_inference(
751
771
  dataset=dataset,
752
772
  inference_method="predict",
753
773
  expected_output_cols_list=self.output_cols,
754
- expected_output_cols_type="",
774
+ expected_output_cols_type=expected_type_inferred,
755
775
  )
756
776
  elif isinstance(dataset, pd.DataFrame):
757
777
  output_df = self._sklearn_inference(
@@ -822,10 +842,10 @@ class MinCovDet(BaseTransformer):
822
842
 
823
843
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
824
844
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
825
- Returns an empty list if current object is not a classifier or not yet fitted.
845
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
826
846
  """
827
847
  if getattr(self._sklearn_object, "classes_", None) is None:
828
- return []
848
+ return [output_cols_prefix]
829
849
 
830
850
  classes = self._sklearn_object.classes_
831
851
  if isinstance(classes, numpy.ndarray):
@@ -1050,7 +1070,7 @@ class MinCovDet(BaseTransformer):
1050
1070
  cp.dump(self._sklearn_object, local_score_file)
1051
1071
 
1052
1072
  # Create temp stage to run score.
1053
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1073
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1054
1074
  session = dataset._session
1055
1075
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1056
1076
  SqlResultValidator(
@@ -1064,8 +1084,9 @@ class MinCovDet(BaseTransformer):
1064
1084
  expected_value=f"Stage area {score_stage_name} successfully created."
1065
1085
  ).validate()
1066
1086
 
1067
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1068
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1087
+ # Use posixpath to construct stage paths
1088
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1089
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1069
1090
  statement_params = telemetry.get_function_usage_statement_params(
1070
1091
  project=_PROJECT,
1071
1092
  subproject=_SUBPROJECT,
@@ -1091,6 +1112,7 @@ class MinCovDet(BaseTransformer):
1091
1112
  replace=True,
1092
1113
  session=session,
1093
1114
  statement_params=statement_params,
1115
+ anonymous=True
1094
1116
  )
1095
1117
  def score_wrapper_sproc(
1096
1118
  session: Session,
@@ -1098,7 +1120,8 @@ class MinCovDet(BaseTransformer):
1098
1120
  stage_score_file_name: str,
1099
1121
  input_cols: List[str],
1100
1122
  label_cols: List[str],
1101
- sample_weight_col: Optional[str]
1123
+ sample_weight_col: Optional[str],
1124
+ statement_params: Dict[str, str]
1102
1125
  ) -> float:
1103
1126
  import cloudpickle as cp
1104
1127
  import numpy as np
@@ -1148,14 +1171,14 @@ class MinCovDet(BaseTransformer):
1148
1171
  api_calls=[Session.call],
1149
1172
  custom_tags=dict([("autogen", True)]),
1150
1173
  )
1151
- score = session.call(
1152
- score_sproc_name,
1174
+ score = score_wrapper_sproc(
1175
+ session,
1153
1176
  query,
1154
1177
  stage_score_file_name,
1155
1178
  identifier.get_unescaped_names(self.input_cols),
1156
1179
  identifier.get_unescaped_names(self.label_cols),
1157
1180
  identifier.get_unescaped_names(self.sample_weight_col),
1158
- statement_params=statement_params,
1181
+ statement_params,
1159
1182
  )
1160
1183
 
1161
1184
  cleanup_temp_files([local_score_file_name])
@@ -1173,18 +1196,20 @@ class MinCovDet(BaseTransformer):
1173
1196
  if self._sklearn_object._estimator_type == 'classifier':
1174
1197
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1175
1198
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1176
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1199
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1200
+ ([] if self._drop_input_cols else inputs) + outputs)
1177
1201
  # For regressor, the type of predict is float64
1178
1202
  elif self._sklearn_object._estimator_type == 'regressor':
1179
1203
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1180
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1181
-
1204
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1205
+ ([] if self._drop_input_cols else inputs) + outputs)
1182
1206
  for prob_func in PROB_FUNCTIONS:
1183
1207
  if hasattr(self, prob_func):
1184
1208
  output_cols_prefix: str = f"{prob_func}_"
1185
1209
  output_column_names = self._get_output_column_names(output_cols_prefix)
1186
1210
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1187
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1211
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1212
+ ([] if self._drop_input_cols else inputs) + outputs)
1188
1213
 
1189
1214
  @property
1190
1215
  def model_signatures(self) -> Dict[str, ModelSignature]: