snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. snowflake/ml/_internal/env_utils.py +2 -1
  2. snowflake/ml/_internal/file_utils.py +29 -7
  3. snowflake/ml/_internal/telemetry.py +5 -8
  4. snowflake/ml/_internal/utils/uri.py +7 -2
  5. snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
  6. snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
  7. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
  8. snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
  9. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
  10. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
  11. snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
  12. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
  13. snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
  14. snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
  15. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
  16. snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
  17. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
  18. snowflake/ml/model/_deployer.py +14 -27
  19. snowflake/ml/model/_env.py +4 -4
  20. snowflake/ml/model/_handlers/custom.py +14 -2
  21. snowflake/ml/model/_handlers/pytorch.py +186 -0
  22. snowflake/ml/model/_handlers/sklearn.py +14 -9
  23. snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
  24. snowflake/ml/model/_handlers/torchscript.py +180 -0
  25. snowflake/ml/model/_handlers/xgboost.py +19 -9
  26. snowflake/ml/model/_model.py +3 -2
  27. snowflake/ml/model/_model_meta.py +12 -7
  28. snowflake/ml/model/model_signature.py +446 -66
  29. snowflake/ml/model/type_hints.py +23 -4
  30. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
  31. snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
  32. snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
  33. snowflake/ml/modeling/cluster/birch.py +51 -26
  34. snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
  35. snowflake/ml/modeling/cluster/dbscan.py +51 -26
  36. snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
  37. snowflake/ml/modeling/cluster/k_means.py +51 -26
  38. snowflake/ml/modeling/cluster/mean_shift.py +51 -26
  39. snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
  40. snowflake/ml/modeling/cluster/optics.py +51 -26
  41. snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
  42. snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
  43. snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
  44. snowflake/ml/modeling/compose/column_transformer.py +51 -26
  45. snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
  46. snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
  47. snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
  48. snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
  49. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
  50. snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
  51. snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
  52. snowflake/ml/modeling/covariance/oas.py +51 -26
  53. snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
  54. snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
  55. snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
  56. snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
  57. snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
  58. snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
  59. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
  60. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
  61. snowflake/ml/modeling/decomposition/pca.py +51 -26
  62. snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
  63. snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
  64. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
  65. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
  66. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
  67. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
  68. snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
  69. snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
  70. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
  71. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
  72. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
  73. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
  74. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
  75. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
  76. snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
  77. snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
  78. snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
  79. snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
  80. snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
  81. snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
  82. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
  83. snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
  84. snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
  85. snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
  86. snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
  87. snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
  88. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
  89. snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
  90. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
  91. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
  92. snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
  93. snowflake/ml/modeling/impute/knn_imputer.py +51 -26
  94. snowflake/ml/modeling/impute/missing_indicator.py +51 -26
  95. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
  96. snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
  97. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
  98. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
  99. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
  100. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
  101. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
  102. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
  103. snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
  104. snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
  105. snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
  106. snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
  107. snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
  108. snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
  109. snowflake/ml/modeling/linear_model/lars.py +51 -26
  110. snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
  111. snowflake/ml/modeling/linear_model/lasso.py +51 -26
  112. snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
  113. snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
  114. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
  115. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
  116. snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
  117. snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
  118. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
  119. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
  120. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
  121. snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
  122. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
  123. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
  124. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
  125. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
  126. snowflake/ml/modeling/linear_model/perceptron.py +51 -26
  127. snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
  128. snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
  129. snowflake/ml/modeling/linear_model/ridge.py +51 -26
  130. snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
  131. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
  132. snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
  133. snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
  134. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
  135. snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
  136. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
  137. snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
  138. snowflake/ml/modeling/manifold/isomap.py +51 -26
  139. snowflake/ml/modeling/manifold/mds.py +51 -26
  140. snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
  141. snowflake/ml/modeling/manifold/tsne.py +51 -26
  142. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
  143. snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
  144. snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
  145. snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
  146. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
  147. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
  148. snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
  149. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
  150. snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
  151. snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
  152. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
  153. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
  154. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
  155. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
  156. snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
  157. snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
  158. snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
  159. snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
  160. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
  161. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
  162. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
  163. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
  164. snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
  165. snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
  166. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
  167. snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
  168. snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
  169. snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
  170. snowflake/ml/modeling/svm/linear_svc.py +51 -26
  171. snowflake/ml/modeling/svm/linear_svr.py +51 -26
  172. snowflake/ml/modeling/svm/nu_svc.py +51 -26
  173. snowflake/ml/modeling/svm/nu_svr.py +51 -26
  174. snowflake/ml/modeling/svm/svc.py +51 -26
  175. snowflake/ml/modeling/svm/svr.py +51 -26
  176. snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
  177. snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
  178. snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
  179. snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
  180. snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
  181. snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
  182. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
  183. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
  184. snowflake/ml/registry/model_registry.py +74 -56
  185. snowflake/ml/version.py +1 -1
  186. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
  187. snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
  188. snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
  189. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -28,6 +29,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
28
29
  from snowflake.snowpark import DataFrame, Session
29
30
  from snowflake.snowpark.functions import pandas_udf, sproc
30
31
  from snowflake.snowpark.types import PandasSeries
32
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
31
33
 
32
34
  from snowflake.ml.model.model_signature import (
33
35
  DataType,
@@ -194,7 +196,6 @@ class GenericUnivariateSelect(BaseTransformer):
194
196
  sample_weight_col: Optional[str] = None,
195
197
  ) -> None:
196
198
  super().__init__()
197
- self.id = str(uuid4()).replace("-", "_").upper()
198
199
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
199
200
 
200
201
  self._deps = list(deps)
@@ -216,6 +217,15 @@ class GenericUnivariateSelect(BaseTransformer):
216
217
  self.set_drop_input_cols(drop_input_cols)
217
218
  self.set_sample_weight_col(sample_weight_col)
218
219
 
220
+ def _get_rand_id(self) -> str:
221
+ """
222
+ Generate random id to be used in sproc and stage names.
223
+
224
+ Returns:
225
+ Random id string usable in sproc, table, and stage names.
226
+ """
227
+ return str(uuid4()).replace("-", "_").upper()
228
+
219
229
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
220
230
  """
221
231
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -294,7 +304,7 @@ class GenericUnivariateSelect(BaseTransformer):
294
304
  cp.dump(self._sklearn_object, local_transform_file)
295
305
 
296
306
  # Create temp stage to run fit.
297
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
307
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
298
308
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
299
309
  SqlResultValidator(
300
310
  session=session,
@@ -307,11 +317,12 @@ class GenericUnivariateSelect(BaseTransformer):
307
317
  expected_value=f"Stage area {transform_stage_name} successfully created."
308
318
  ).validate()
309
319
 
310
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
320
+ # Use posixpath to construct stage paths
321
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
322
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
311
323
  local_result_file_name = get_temp_file_path()
312
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
313
324
 
314
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
325
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
315
326
  statement_params = telemetry.get_function_usage_statement_params(
316
327
  project=_PROJECT,
317
328
  subproject=_SUBPROJECT,
@@ -337,6 +348,7 @@ class GenericUnivariateSelect(BaseTransformer):
337
348
  replace=True,
338
349
  session=session,
339
350
  statement_params=statement_params,
351
+ anonymous=True
340
352
  )
341
353
  def fit_wrapper_sproc(
342
354
  session: Session,
@@ -345,7 +357,8 @@ class GenericUnivariateSelect(BaseTransformer):
345
357
  stage_result_file_name: str,
346
358
  input_cols: List[str],
347
359
  label_cols: List[str],
348
- sample_weight_col: Optional[str]
360
+ sample_weight_col: Optional[str],
361
+ statement_params: Dict[str, str]
349
362
  ) -> str:
350
363
  import cloudpickle as cp
351
364
  import numpy as np
@@ -412,15 +425,15 @@ class GenericUnivariateSelect(BaseTransformer):
412
425
  api_calls=[Session.call],
413
426
  custom_tags=dict([("autogen", True)]),
414
427
  )
415
- sproc_export_file_name = session.call(
416
- fit_sproc_name,
428
+ sproc_export_file_name = fit_wrapper_sproc(
429
+ session,
417
430
  query,
418
431
  stage_transform_file_name,
419
432
  stage_result_file_name,
420
433
  identifier.get_unescaped_names(self.input_cols),
421
434
  identifier.get_unescaped_names(self.label_cols),
422
435
  identifier.get_unescaped_names(self.sample_weight_col),
423
- statement_params=statement_params,
436
+ statement_params,
424
437
  )
425
438
 
426
439
  if "|" in sproc_export_file_name:
@@ -430,7 +443,7 @@ class GenericUnivariateSelect(BaseTransformer):
430
443
  print("\n".join(fields[1:]))
431
444
 
432
445
  session.file.get(
433
- os.path.join(stage_result_file_name, sproc_export_file_name),
446
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
434
447
  local_result_file_name,
435
448
  statement_params=statement_params
436
449
  )
@@ -476,7 +489,7 @@ class GenericUnivariateSelect(BaseTransformer):
476
489
 
477
490
  # Register vectorized UDF for batch inference
478
491
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
479
- safe_id=self.id, method=inference_method)
492
+ safe_id=self._get_rand_id(), method=inference_method)
480
493
 
481
494
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
482
495
  # will try to pickle all of self which fails.
@@ -568,7 +581,7 @@ class GenericUnivariateSelect(BaseTransformer):
568
581
  return transformed_pandas_df.to_dict("records")
569
582
 
570
583
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
571
- safe_id=self.id
584
+ safe_id=self._get_rand_id()
572
585
  )
573
586
 
574
587
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -733,11 +746,18 @@ class GenericUnivariateSelect(BaseTransformer):
733
746
  Transformed dataset.
734
747
  """
735
748
  if isinstance(dataset, DataFrame):
749
+ expected_type_inferred = ""
750
+ # when it is classifier, infer the datatype from label columns
751
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
752
+ expected_type_inferred = convert_sp_to_sf_type(
753
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
754
+ )
755
+
736
756
  output_df = self._batch_inference(
737
757
  dataset=dataset,
738
758
  inference_method="predict",
739
759
  expected_output_cols_list=self.output_cols,
740
- expected_output_cols_type="",
760
+ expected_output_cols_type=expected_type_inferred,
741
761
  )
742
762
  elif isinstance(dataset, pd.DataFrame):
743
763
  output_df = self._sklearn_inference(
@@ -810,10 +830,10 @@ class GenericUnivariateSelect(BaseTransformer):
810
830
 
811
831
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
812
832
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
813
- Returns an empty list if current object is not a classifier or not yet fitted.
833
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
814
834
  """
815
835
  if getattr(self._sklearn_object, "classes_", None) is None:
816
- return []
836
+ return [output_cols_prefix]
817
837
 
818
838
  classes = self._sklearn_object.classes_
819
839
  if isinstance(classes, numpy.ndarray):
@@ -1038,7 +1058,7 @@ class GenericUnivariateSelect(BaseTransformer):
1038
1058
  cp.dump(self._sklearn_object, local_score_file)
1039
1059
 
1040
1060
  # Create temp stage to run score.
1041
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1061
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1042
1062
  session = dataset._session
1043
1063
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1044
1064
  SqlResultValidator(
@@ -1052,8 +1072,9 @@ class GenericUnivariateSelect(BaseTransformer):
1052
1072
  expected_value=f"Stage area {score_stage_name} successfully created."
1053
1073
  ).validate()
1054
1074
 
1055
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1056
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1075
+ # Use posixpath to construct stage paths
1076
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1077
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1057
1078
  statement_params = telemetry.get_function_usage_statement_params(
1058
1079
  project=_PROJECT,
1059
1080
  subproject=_SUBPROJECT,
@@ -1079,6 +1100,7 @@ class GenericUnivariateSelect(BaseTransformer):
1079
1100
  replace=True,
1080
1101
  session=session,
1081
1102
  statement_params=statement_params,
1103
+ anonymous=True
1082
1104
  )
1083
1105
  def score_wrapper_sproc(
1084
1106
  session: Session,
@@ -1086,7 +1108,8 @@ class GenericUnivariateSelect(BaseTransformer):
1086
1108
  stage_score_file_name: str,
1087
1109
  input_cols: List[str],
1088
1110
  label_cols: List[str],
1089
- sample_weight_col: Optional[str]
1111
+ sample_weight_col: Optional[str],
1112
+ statement_params: Dict[str, str]
1090
1113
  ) -> float:
1091
1114
  import cloudpickle as cp
1092
1115
  import numpy as np
@@ -1136,14 +1159,14 @@ class GenericUnivariateSelect(BaseTransformer):
1136
1159
  api_calls=[Session.call],
1137
1160
  custom_tags=dict([("autogen", True)]),
1138
1161
  )
1139
- score = session.call(
1140
- score_sproc_name,
1162
+ score = score_wrapper_sproc(
1163
+ session,
1141
1164
  query,
1142
1165
  stage_score_file_name,
1143
1166
  identifier.get_unescaped_names(self.input_cols),
1144
1167
  identifier.get_unescaped_names(self.label_cols),
1145
1168
  identifier.get_unescaped_names(self.sample_weight_col),
1146
- statement_params=statement_params,
1169
+ statement_params,
1147
1170
  )
1148
1171
 
1149
1172
  cleanup_temp_files([local_score_file_name])
@@ -1161,18 +1184,20 @@ class GenericUnivariateSelect(BaseTransformer):
1161
1184
  if self._sklearn_object._estimator_type == 'classifier':
1162
1185
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1163
1186
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1164
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1187
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1188
+ ([] if self._drop_input_cols else inputs) + outputs)
1165
1189
  # For regressor, the type of predict is float64
1166
1190
  elif self._sklearn_object._estimator_type == 'regressor':
1167
1191
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1168
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1169
-
1192
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1193
+ ([] if self._drop_input_cols else inputs) + outputs)
1170
1194
  for prob_func in PROB_FUNCTIONS:
1171
1195
  if hasattr(self, prob_func):
1172
1196
  output_cols_prefix: str = f"{prob_func}_"
1173
1197
  output_column_names = self._get_output_column_names(output_cols_prefix)
1174
1198
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1175
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1199
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1200
+ ([] if self._drop_input_cols else inputs) + outputs)
1176
1201
 
1177
1202
  @property
1178
1203
  def model_signatures(self) -> Dict[str, ModelSignature]:
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -28,6 +29,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
28
29
  from snowflake.snowpark import DataFrame, Session
29
30
  from snowflake.snowpark.functions import pandas_udf, sproc
30
31
  from snowflake.snowpark.types import PandasSeries
32
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
31
33
 
32
34
  from snowflake.ml.model.model_signature import (
33
35
  DataType,
@@ -191,7 +193,6 @@ class SelectFdr(BaseTransformer):
191
193
  sample_weight_col: Optional[str] = None,
192
194
  ) -> None:
193
195
  super().__init__()
194
- self.id = str(uuid4()).replace("-", "_").upper()
195
196
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
196
197
 
197
198
  self._deps = list(deps)
@@ -212,6 +213,15 @@ class SelectFdr(BaseTransformer):
212
213
  self.set_drop_input_cols(drop_input_cols)
213
214
  self.set_sample_weight_col(sample_weight_col)
214
215
 
216
+ def _get_rand_id(self) -> str:
217
+ """
218
+ Generate random id to be used in sproc and stage names.
219
+
220
+ Returns:
221
+ Random id string usable in sproc, table, and stage names.
222
+ """
223
+ return str(uuid4()).replace("-", "_").upper()
224
+
215
225
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
216
226
  """
217
227
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -290,7 +300,7 @@ class SelectFdr(BaseTransformer):
290
300
  cp.dump(self._sklearn_object, local_transform_file)
291
301
 
292
302
  # Create temp stage to run fit.
293
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
303
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
294
304
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
295
305
  SqlResultValidator(
296
306
  session=session,
@@ -303,11 +313,12 @@ class SelectFdr(BaseTransformer):
303
313
  expected_value=f"Stage area {transform_stage_name} successfully created."
304
314
  ).validate()
305
315
 
306
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
316
+ # Use posixpath to construct stage paths
317
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
318
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
307
319
  local_result_file_name = get_temp_file_path()
308
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
309
320
 
310
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
321
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
311
322
  statement_params = telemetry.get_function_usage_statement_params(
312
323
  project=_PROJECT,
313
324
  subproject=_SUBPROJECT,
@@ -333,6 +344,7 @@ class SelectFdr(BaseTransformer):
333
344
  replace=True,
334
345
  session=session,
335
346
  statement_params=statement_params,
347
+ anonymous=True
336
348
  )
337
349
  def fit_wrapper_sproc(
338
350
  session: Session,
@@ -341,7 +353,8 @@ class SelectFdr(BaseTransformer):
341
353
  stage_result_file_name: str,
342
354
  input_cols: List[str],
343
355
  label_cols: List[str],
344
- sample_weight_col: Optional[str]
356
+ sample_weight_col: Optional[str],
357
+ statement_params: Dict[str, str]
345
358
  ) -> str:
346
359
  import cloudpickle as cp
347
360
  import numpy as np
@@ -408,15 +421,15 @@ class SelectFdr(BaseTransformer):
408
421
  api_calls=[Session.call],
409
422
  custom_tags=dict([("autogen", True)]),
410
423
  )
411
- sproc_export_file_name = session.call(
412
- fit_sproc_name,
424
+ sproc_export_file_name = fit_wrapper_sproc(
425
+ session,
413
426
  query,
414
427
  stage_transform_file_name,
415
428
  stage_result_file_name,
416
429
  identifier.get_unescaped_names(self.input_cols),
417
430
  identifier.get_unescaped_names(self.label_cols),
418
431
  identifier.get_unescaped_names(self.sample_weight_col),
419
- statement_params=statement_params,
432
+ statement_params,
420
433
  )
421
434
 
422
435
  if "|" in sproc_export_file_name:
@@ -426,7 +439,7 @@ class SelectFdr(BaseTransformer):
426
439
  print("\n".join(fields[1:]))
427
440
 
428
441
  session.file.get(
429
- os.path.join(stage_result_file_name, sproc_export_file_name),
442
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
430
443
  local_result_file_name,
431
444
  statement_params=statement_params
432
445
  )
@@ -472,7 +485,7 @@ class SelectFdr(BaseTransformer):
472
485
 
473
486
  # Register vectorized UDF for batch inference
474
487
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
475
- safe_id=self.id, method=inference_method)
488
+ safe_id=self._get_rand_id(), method=inference_method)
476
489
 
477
490
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
478
491
  # will try to pickle all of self which fails.
@@ -564,7 +577,7 @@ class SelectFdr(BaseTransformer):
564
577
  return transformed_pandas_df.to_dict("records")
565
578
 
566
579
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
567
- safe_id=self.id
580
+ safe_id=self._get_rand_id()
568
581
  )
569
582
 
570
583
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -729,11 +742,18 @@ class SelectFdr(BaseTransformer):
729
742
  Transformed dataset.
730
743
  """
731
744
  if isinstance(dataset, DataFrame):
745
+ expected_type_inferred = ""
746
+ # when it is classifier, infer the datatype from label columns
747
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
748
+ expected_type_inferred = convert_sp_to_sf_type(
749
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
750
+ )
751
+
732
752
  output_df = self._batch_inference(
733
753
  dataset=dataset,
734
754
  inference_method="predict",
735
755
  expected_output_cols_list=self.output_cols,
736
- expected_output_cols_type="",
756
+ expected_output_cols_type=expected_type_inferred,
737
757
  )
738
758
  elif isinstance(dataset, pd.DataFrame):
739
759
  output_df = self._sklearn_inference(
@@ -806,10 +826,10 @@ class SelectFdr(BaseTransformer):
806
826
 
807
827
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
808
828
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
809
- Returns an empty list if current object is not a classifier or not yet fitted.
829
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
810
830
  """
811
831
  if getattr(self._sklearn_object, "classes_", None) is None:
812
- return []
832
+ return [output_cols_prefix]
813
833
 
814
834
  classes = self._sklearn_object.classes_
815
835
  if isinstance(classes, numpy.ndarray):
@@ -1034,7 +1054,7 @@ class SelectFdr(BaseTransformer):
1034
1054
  cp.dump(self._sklearn_object, local_score_file)
1035
1055
 
1036
1056
  # Create temp stage to run score.
1037
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1057
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1038
1058
  session = dataset._session
1039
1059
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1040
1060
  SqlResultValidator(
@@ -1048,8 +1068,9 @@ class SelectFdr(BaseTransformer):
1048
1068
  expected_value=f"Stage area {score_stage_name} successfully created."
1049
1069
  ).validate()
1050
1070
 
1051
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1052
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1071
+ # Use posixpath to construct stage paths
1072
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1073
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1053
1074
  statement_params = telemetry.get_function_usage_statement_params(
1054
1075
  project=_PROJECT,
1055
1076
  subproject=_SUBPROJECT,
@@ -1075,6 +1096,7 @@ class SelectFdr(BaseTransformer):
1075
1096
  replace=True,
1076
1097
  session=session,
1077
1098
  statement_params=statement_params,
1099
+ anonymous=True
1078
1100
  )
1079
1101
  def score_wrapper_sproc(
1080
1102
  session: Session,
@@ -1082,7 +1104,8 @@ class SelectFdr(BaseTransformer):
1082
1104
  stage_score_file_name: str,
1083
1105
  input_cols: List[str],
1084
1106
  label_cols: List[str],
1085
- sample_weight_col: Optional[str]
1107
+ sample_weight_col: Optional[str],
1108
+ statement_params: Dict[str, str]
1086
1109
  ) -> float:
1087
1110
  import cloudpickle as cp
1088
1111
  import numpy as np
@@ -1132,14 +1155,14 @@ class SelectFdr(BaseTransformer):
1132
1155
  api_calls=[Session.call],
1133
1156
  custom_tags=dict([("autogen", True)]),
1134
1157
  )
1135
- score = session.call(
1136
- score_sproc_name,
1158
+ score = score_wrapper_sproc(
1159
+ session,
1137
1160
  query,
1138
1161
  stage_score_file_name,
1139
1162
  identifier.get_unescaped_names(self.input_cols),
1140
1163
  identifier.get_unescaped_names(self.label_cols),
1141
1164
  identifier.get_unescaped_names(self.sample_weight_col),
1142
- statement_params=statement_params,
1165
+ statement_params,
1143
1166
  )
1144
1167
 
1145
1168
  cleanup_temp_files([local_score_file_name])
@@ -1157,18 +1180,20 @@ class SelectFdr(BaseTransformer):
1157
1180
  if self._sklearn_object._estimator_type == 'classifier':
1158
1181
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1159
1182
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1160
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1183
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1184
+ ([] if self._drop_input_cols else inputs) + outputs)
1161
1185
  # For regressor, the type of predict is float64
1162
1186
  elif self._sklearn_object._estimator_type == 'regressor':
1163
1187
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1164
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1165
-
1188
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1189
+ ([] if self._drop_input_cols else inputs) + outputs)
1166
1190
  for prob_func in PROB_FUNCTIONS:
1167
1191
  if hasattr(self, prob_func):
1168
1192
  output_cols_prefix: str = f"{prob_func}_"
1169
1193
  output_column_names = self._get_output_column_names(output_cols_prefix)
1170
1194
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1171
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1195
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1196
+ ([] if self._drop_input_cols else inputs) + outputs)
1172
1197
 
1173
1198
  @property
1174
1199
  def model_signatures(self) -> Dict[str, ModelSignature]: