snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. snowflake/ml/_internal/env_utils.py +2 -1
  2. snowflake/ml/_internal/file_utils.py +29 -7
  3. snowflake/ml/_internal/telemetry.py +5 -8
  4. snowflake/ml/_internal/utils/uri.py +7 -2
  5. snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
  6. snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
  7. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
  8. snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
  9. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
  10. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
  11. snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
  12. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
  13. snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
  14. snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
  15. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
  16. snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
  17. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
  18. snowflake/ml/model/_deployer.py +14 -27
  19. snowflake/ml/model/_env.py +4 -4
  20. snowflake/ml/model/_handlers/custom.py +14 -2
  21. snowflake/ml/model/_handlers/pytorch.py +186 -0
  22. snowflake/ml/model/_handlers/sklearn.py +14 -9
  23. snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
  24. snowflake/ml/model/_handlers/torchscript.py +180 -0
  25. snowflake/ml/model/_handlers/xgboost.py +19 -9
  26. snowflake/ml/model/_model.py +3 -2
  27. snowflake/ml/model/_model_meta.py +12 -7
  28. snowflake/ml/model/model_signature.py +446 -66
  29. snowflake/ml/model/type_hints.py +23 -4
  30. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
  31. snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
  32. snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
  33. snowflake/ml/modeling/cluster/birch.py +51 -26
  34. snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
  35. snowflake/ml/modeling/cluster/dbscan.py +51 -26
  36. snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
  37. snowflake/ml/modeling/cluster/k_means.py +51 -26
  38. snowflake/ml/modeling/cluster/mean_shift.py +51 -26
  39. snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
  40. snowflake/ml/modeling/cluster/optics.py +51 -26
  41. snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
  42. snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
  43. snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
  44. snowflake/ml/modeling/compose/column_transformer.py +51 -26
  45. snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
  46. snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
  47. snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
  48. snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
  49. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
  50. snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
  51. snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
  52. snowflake/ml/modeling/covariance/oas.py +51 -26
  53. snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
  54. snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
  55. snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
  56. snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
  57. snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
  58. snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
  59. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
  60. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
  61. snowflake/ml/modeling/decomposition/pca.py +51 -26
  62. snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
  63. snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
  64. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
  65. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
  66. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
  67. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
  68. snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
  69. snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
  70. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
  71. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
  72. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
  73. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
  74. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
  75. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
  76. snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
  77. snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
  78. snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
  79. snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
  80. snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
  81. snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
  82. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
  83. snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
  84. snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
  85. snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
  86. snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
  87. snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
  88. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
  89. snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
  90. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
  91. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
  92. snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
  93. snowflake/ml/modeling/impute/knn_imputer.py +51 -26
  94. snowflake/ml/modeling/impute/missing_indicator.py +51 -26
  95. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
  96. snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
  97. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
  98. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
  99. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
  100. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
  101. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
  102. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
  103. snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
  104. snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
  105. snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
  106. snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
  107. snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
  108. snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
  109. snowflake/ml/modeling/linear_model/lars.py +51 -26
  110. snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
  111. snowflake/ml/modeling/linear_model/lasso.py +51 -26
  112. snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
  113. snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
  114. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
  115. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
  116. snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
  117. snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
  118. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
  119. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
  120. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
  121. snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
  122. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
  123. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
  124. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
  125. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
  126. snowflake/ml/modeling/linear_model/perceptron.py +51 -26
  127. snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
  128. snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
  129. snowflake/ml/modeling/linear_model/ridge.py +51 -26
  130. snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
  131. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
  132. snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
  133. snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
  134. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
  135. snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
  136. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
  137. snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
  138. snowflake/ml/modeling/manifold/isomap.py +51 -26
  139. snowflake/ml/modeling/manifold/mds.py +51 -26
  140. snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
  141. snowflake/ml/modeling/manifold/tsne.py +51 -26
  142. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
  143. snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
  144. snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
  145. snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
  146. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
  147. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
  148. snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
  149. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
  150. snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
  151. snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
  152. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
  153. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
  154. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
  155. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
  156. snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
  157. snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
  158. snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
  159. snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
  160. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
  161. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
  162. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
  163. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
  164. snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
  165. snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
  166. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
  167. snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
  168. snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
  169. snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
  170. snowflake/ml/modeling/svm/linear_svc.py +51 -26
  171. snowflake/ml/modeling/svm/linear_svr.py +51 -26
  172. snowflake/ml/modeling/svm/nu_svc.py +51 -26
  173. snowflake/ml/modeling/svm/nu_svr.py +51 -26
  174. snowflake/ml/modeling/svm/svc.py +51 -26
  175. snowflake/ml/modeling/svm/svr.py +51 -26
  176. snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
  177. snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
  178. snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
  179. snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
  180. snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
  181. snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
  182. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
  183. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
  184. snowflake/ml/registry/model_registry.py +74 -56
  185. snowflake/ml/version.py +1 -1
  186. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
  187. snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
  188. snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
  189. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -219,7 +221,6 @@ class VotingClassifier(BaseTransformer):
219
221
  sample_weight_col: Optional[str] = None,
220
222
  ) -> None:
221
223
  super().__init__()
222
- self.id = str(uuid4()).replace("-", "_").upper()
223
224
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
224
225
  deps = deps | _gather_dependencies(estimators)
225
226
  self._deps = list(deps)
@@ -244,6 +245,15 @@ class VotingClassifier(BaseTransformer):
244
245
  self.set_drop_input_cols(drop_input_cols)
245
246
  self.set_sample_weight_col(sample_weight_col)
246
247
 
248
+ def _get_rand_id(self) -> str:
249
+ """
250
+ Generate random id to be used in sproc and stage names.
251
+
252
+ Returns:
253
+ Random id string usable in sproc, table, and stage names.
254
+ """
255
+ return str(uuid4()).replace("-", "_").upper()
256
+
247
257
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
248
258
  """
249
259
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -322,7 +332,7 @@ class VotingClassifier(BaseTransformer):
322
332
  cp.dump(self._sklearn_object, local_transform_file)
323
333
 
324
334
  # Create temp stage to run fit.
325
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
335
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
326
336
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
327
337
  SqlResultValidator(
328
338
  session=session,
@@ -335,11 +345,12 @@ class VotingClassifier(BaseTransformer):
335
345
  expected_value=f"Stage area {transform_stage_name} successfully created."
336
346
  ).validate()
337
347
 
338
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
348
+ # Use posixpath to construct stage paths
349
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
350
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
339
351
  local_result_file_name = get_temp_file_path()
340
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
341
352
 
342
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
353
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
343
354
  statement_params = telemetry.get_function_usage_statement_params(
344
355
  project=_PROJECT,
345
356
  subproject=_SUBPROJECT,
@@ -365,6 +376,7 @@ class VotingClassifier(BaseTransformer):
365
376
  replace=True,
366
377
  session=session,
367
378
  statement_params=statement_params,
379
+ anonymous=True
368
380
  )
369
381
  def fit_wrapper_sproc(
370
382
  session: Session,
@@ -373,7 +385,8 @@ class VotingClassifier(BaseTransformer):
373
385
  stage_result_file_name: str,
374
386
  input_cols: List[str],
375
387
  label_cols: List[str],
376
- sample_weight_col: Optional[str]
388
+ sample_weight_col: Optional[str],
389
+ statement_params: Dict[str, str]
377
390
  ) -> str:
378
391
  import cloudpickle as cp
379
392
  import numpy as np
@@ -440,15 +453,15 @@ class VotingClassifier(BaseTransformer):
440
453
  api_calls=[Session.call],
441
454
  custom_tags=dict([("autogen", True)]),
442
455
  )
443
- sproc_export_file_name = session.call(
444
- fit_sproc_name,
456
+ sproc_export_file_name = fit_wrapper_sproc(
457
+ session,
445
458
  query,
446
459
  stage_transform_file_name,
447
460
  stage_result_file_name,
448
461
  identifier.get_unescaped_names(self.input_cols),
449
462
  identifier.get_unescaped_names(self.label_cols),
450
463
  identifier.get_unescaped_names(self.sample_weight_col),
451
- statement_params=statement_params,
464
+ statement_params,
452
465
  )
453
466
 
454
467
  if "|" in sproc_export_file_name:
@@ -458,7 +471,7 @@ class VotingClassifier(BaseTransformer):
458
471
  print("\n".join(fields[1:]))
459
472
 
460
473
  session.file.get(
461
- os.path.join(stage_result_file_name, sproc_export_file_name),
474
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
462
475
  local_result_file_name,
463
476
  statement_params=statement_params
464
477
  )
@@ -504,7 +517,7 @@ class VotingClassifier(BaseTransformer):
504
517
 
505
518
  # Register vectorized UDF for batch inference
506
519
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
507
- safe_id=self.id, method=inference_method)
520
+ safe_id=self._get_rand_id(), method=inference_method)
508
521
 
509
522
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
510
523
  # will try to pickle all of self which fails.
@@ -596,7 +609,7 @@ class VotingClassifier(BaseTransformer):
596
609
  return transformed_pandas_df.to_dict("records")
597
610
 
598
611
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
599
- safe_id=self.id
612
+ safe_id=self._get_rand_id()
600
613
  )
601
614
 
602
615
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -763,11 +776,18 @@ class VotingClassifier(BaseTransformer):
763
776
  Transformed dataset.
764
777
  """
765
778
  if isinstance(dataset, DataFrame):
779
+ expected_type_inferred = ""
780
+ # when it is classifier, infer the datatype from label columns
781
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
782
+ expected_type_inferred = convert_sp_to_sf_type(
783
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
784
+ )
785
+
766
786
  output_df = self._batch_inference(
767
787
  dataset=dataset,
768
788
  inference_method="predict",
769
789
  expected_output_cols_list=self.output_cols,
770
- expected_output_cols_type="",
790
+ expected_output_cols_type=expected_type_inferred,
771
791
  )
772
792
  elif isinstance(dataset, pd.DataFrame):
773
793
  output_df = self._sklearn_inference(
@@ -840,10 +860,10 @@ class VotingClassifier(BaseTransformer):
840
860
 
841
861
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
842
862
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
843
- Returns an empty list if current object is not a classifier or not yet fitted.
863
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
844
864
  """
845
865
  if getattr(self._sklearn_object, "classes_", None) is None:
846
- return []
866
+ return [output_cols_prefix]
847
867
 
848
868
  classes = self._sklearn_object.classes_
849
869
  if isinstance(classes, numpy.ndarray):
@@ -1072,7 +1092,7 @@ class VotingClassifier(BaseTransformer):
1072
1092
  cp.dump(self._sklearn_object, local_score_file)
1073
1093
 
1074
1094
  # Create temp stage to run score.
1075
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1095
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1076
1096
  session = dataset._session
1077
1097
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1078
1098
  SqlResultValidator(
@@ -1086,8 +1106,9 @@ class VotingClassifier(BaseTransformer):
1086
1106
  expected_value=f"Stage area {score_stage_name} successfully created."
1087
1107
  ).validate()
1088
1108
 
1089
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1090
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1109
+ # Use posixpath to construct stage paths
1110
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1111
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1091
1112
  statement_params = telemetry.get_function_usage_statement_params(
1092
1113
  project=_PROJECT,
1093
1114
  subproject=_SUBPROJECT,
@@ -1113,6 +1134,7 @@ class VotingClassifier(BaseTransformer):
1113
1134
  replace=True,
1114
1135
  session=session,
1115
1136
  statement_params=statement_params,
1137
+ anonymous=True
1116
1138
  )
1117
1139
  def score_wrapper_sproc(
1118
1140
  session: Session,
@@ -1120,7 +1142,8 @@ class VotingClassifier(BaseTransformer):
1120
1142
  stage_score_file_name: str,
1121
1143
  input_cols: List[str],
1122
1144
  label_cols: List[str],
1123
- sample_weight_col: Optional[str]
1145
+ sample_weight_col: Optional[str],
1146
+ statement_params: Dict[str, str]
1124
1147
  ) -> float:
1125
1148
  import cloudpickle as cp
1126
1149
  import numpy as np
@@ -1170,14 +1193,14 @@ class VotingClassifier(BaseTransformer):
1170
1193
  api_calls=[Session.call],
1171
1194
  custom_tags=dict([("autogen", True)]),
1172
1195
  )
1173
- score = session.call(
1174
- score_sproc_name,
1196
+ score = score_wrapper_sproc(
1197
+ session,
1175
1198
  query,
1176
1199
  stage_score_file_name,
1177
1200
  identifier.get_unescaped_names(self.input_cols),
1178
1201
  identifier.get_unescaped_names(self.label_cols),
1179
1202
  identifier.get_unescaped_names(self.sample_weight_col),
1180
- statement_params=statement_params,
1203
+ statement_params,
1181
1204
  )
1182
1205
 
1183
1206
  cleanup_temp_files([local_score_file_name])
@@ -1195,18 +1218,20 @@ class VotingClassifier(BaseTransformer):
1195
1218
  if self._sklearn_object._estimator_type == 'classifier':
1196
1219
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1197
1220
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1198
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1221
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1222
+ ([] if self._drop_input_cols else inputs) + outputs)
1199
1223
  # For regressor, the type of predict is float64
1200
1224
  elif self._sklearn_object._estimator_type == 'regressor':
1201
1225
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1202
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1203
-
1226
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1227
+ ([] if self._drop_input_cols else inputs) + outputs)
1204
1228
  for prob_func in PROB_FUNCTIONS:
1205
1229
  if hasattr(self, prob_func):
1206
1230
  output_cols_prefix: str = f"{prob_func}_"
1207
1231
  output_column_names = self._get_output_column_names(output_cols_prefix)
1208
1232
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1209
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1233
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1234
+ ([] if self._drop_input_cols else inputs) + outputs)
1210
1235
 
1211
1236
  @property
1212
1237
  def model_signatures(self) -> Dict[str, ModelSignature]:
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -203,7 +205,6 @@ class VotingRegressor(BaseTransformer):
203
205
  sample_weight_col: Optional[str] = None,
204
206
  ) -> None:
205
207
  super().__init__()
206
- self.id = str(uuid4()).replace("-", "_").upper()
207
208
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
208
209
  deps = deps | _gather_dependencies(estimators)
209
210
  self._deps = list(deps)
@@ -226,6 +227,15 @@ class VotingRegressor(BaseTransformer):
226
227
  self.set_drop_input_cols(drop_input_cols)
227
228
  self.set_sample_weight_col(sample_weight_col)
228
229
 
230
+ def _get_rand_id(self) -> str:
231
+ """
232
+ Generate random id to be used in sproc and stage names.
233
+
234
+ Returns:
235
+ Random id string usable in sproc, table, and stage names.
236
+ """
237
+ return str(uuid4()).replace("-", "_").upper()
238
+
229
239
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
230
240
  """
231
241
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -304,7 +314,7 @@ class VotingRegressor(BaseTransformer):
304
314
  cp.dump(self._sklearn_object, local_transform_file)
305
315
 
306
316
  # Create temp stage to run fit.
307
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
317
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
308
318
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
309
319
  SqlResultValidator(
310
320
  session=session,
@@ -317,11 +327,12 @@ class VotingRegressor(BaseTransformer):
317
327
  expected_value=f"Stage area {transform_stage_name} successfully created."
318
328
  ).validate()
319
329
 
320
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
330
+ # Use posixpath to construct stage paths
331
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
332
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
321
333
  local_result_file_name = get_temp_file_path()
322
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
323
334
 
324
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
335
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
325
336
  statement_params = telemetry.get_function_usage_statement_params(
326
337
  project=_PROJECT,
327
338
  subproject=_SUBPROJECT,
@@ -347,6 +358,7 @@ class VotingRegressor(BaseTransformer):
347
358
  replace=True,
348
359
  session=session,
349
360
  statement_params=statement_params,
361
+ anonymous=True
350
362
  )
351
363
  def fit_wrapper_sproc(
352
364
  session: Session,
@@ -355,7 +367,8 @@ class VotingRegressor(BaseTransformer):
355
367
  stage_result_file_name: str,
356
368
  input_cols: List[str],
357
369
  label_cols: List[str],
358
- sample_weight_col: Optional[str]
370
+ sample_weight_col: Optional[str],
371
+ statement_params: Dict[str, str]
359
372
  ) -> str:
360
373
  import cloudpickle as cp
361
374
  import numpy as np
@@ -422,15 +435,15 @@ class VotingRegressor(BaseTransformer):
422
435
  api_calls=[Session.call],
423
436
  custom_tags=dict([("autogen", True)]),
424
437
  )
425
- sproc_export_file_name = session.call(
426
- fit_sproc_name,
438
+ sproc_export_file_name = fit_wrapper_sproc(
439
+ session,
427
440
  query,
428
441
  stage_transform_file_name,
429
442
  stage_result_file_name,
430
443
  identifier.get_unescaped_names(self.input_cols),
431
444
  identifier.get_unescaped_names(self.label_cols),
432
445
  identifier.get_unescaped_names(self.sample_weight_col),
433
- statement_params=statement_params,
446
+ statement_params,
434
447
  )
435
448
 
436
449
  if "|" in sproc_export_file_name:
@@ -440,7 +453,7 @@ class VotingRegressor(BaseTransformer):
440
453
  print("\n".join(fields[1:]))
441
454
 
442
455
  session.file.get(
443
- os.path.join(stage_result_file_name, sproc_export_file_name),
456
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
444
457
  local_result_file_name,
445
458
  statement_params=statement_params
446
459
  )
@@ -486,7 +499,7 @@ class VotingRegressor(BaseTransformer):
486
499
 
487
500
  # Register vectorized UDF for batch inference
488
501
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
489
- safe_id=self.id, method=inference_method)
502
+ safe_id=self._get_rand_id(), method=inference_method)
490
503
 
491
504
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
492
505
  # will try to pickle all of self which fails.
@@ -578,7 +591,7 @@ class VotingRegressor(BaseTransformer):
578
591
  return transformed_pandas_df.to_dict("records")
579
592
 
580
593
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
581
- safe_id=self.id
594
+ safe_id=self._get_rand_id()
582
595
  )
583
596
 
584
597
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -745,11 +758,18 @@ class VotingRegressor(BaseTransformer):
745
758
  Transformed dataset.
746
759
  """
747
760
  if isinstance(dataset, DataFrame):
761
+ expected_type_inferred = "float"
762
+ # when it is classifier, infer the datatype from label columns
763
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
764
+ expected_type_inferred = convert_sp_to_sf_type(
765
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
766
+ )
767
+
748
768
  output_df = self._batch_inference(
749
769
  dataset=dataset,
750
770
  inference_method="predict",
751
771
  expected_output_cols_list=self.output_cols,
752
- expected_output_cols_type="float",
772
+ expected_output_cols_type=expected_type_inferred,
753
773
  )
754
774
  elif isinstance(dataset, pd.DataFrame):
755
775
  output_df = self._sklearn_inference(
@@ -822,10 +842,10 @@ class VotingRegressor(BaseTransformer):
822
842
 
823
843
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
824
844
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
825
- Returns an empty list if current object is not a classifier or not yet fitted.
845
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
826
846
  """
827
847
  if getattr(self._sklearn_object, "classes_", None) is None:
828
- return []
848
+ return [output_cols_prefix]
829
849
 
830
850
  classes = self._sklearn_object.classes_
831
851
  if isinstance(classes, numpy.ndarray):
@@ -1050,7 +1070,7 @@ class VotingRegressor(BaseTransformer):
1050
1070
  cp.dump(self._sklearn_object, local_score_file)
1051
1071
 
1052
1072
  # Create temp stage to run score.
1053
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1073
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1054
1074
  session = dataset._session
1055
1075
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1056
1076
  SqlResultValidator(
@@ -1064,8 +1084,9 @@ class VotingRegressor(BaseTransformer):
1064
1084
  expected_value=f"Stage area {score_stage_name} successfully created."
1065
1085
  ).validate()
1066
1086
 
1067
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1068
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1087
+ # Use posixpath to construct stage paths
1088
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1089
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1069
1090
  statement_params = telemetry.get_function_usage_statement_params(
1070
1091
  project=_PROJECT,
1071
1092
  subproject=_SUBPROJECT,
@@ -1091,6 +1112,7 @@ class VotingRegressor(BaseTransformer):
1091
1112
  replace=True,
1092
1113
  session=session,
1093
1114
  statement_params=statement_params,
1115
+ anonymous=True
1094
1116
  )
1095
1117
  def score_wrapper_sproc(
1096
1118
  session: Session,
@@ -1098,7 +1120,8 @@ class VotingRegressor(BaseTransformer):
1098
1120
  stage_score_file_name: str,
1099
1121
  input_cols: List[str],
1100
1122
  label_cols: List[str],
1101
- sample_weight_col: Optional[str]
1123
+ sample_weight_col: Optional[str],
1124
+ statement_params: Dict[str, str]
1102
1125
  ) -> float:
1103
1126
  import cloudpickle as cp
1104
1127
  import numpy as np
@@ -1148,14 +1171,14 @@ class VotingRegressor(BaseTransformer):
1148
1171
  api_calls=[Session.call],
1149
1172
  custom_tags=dict([("autogen", True)]),
1150
1173
  )
1151
- score = session.call(
1152
- score_sproc_name,
1174
+ score = score_wrapper_sproc(
1175
+ session,
1153
1176
  query,
1154
1177
  stage_score_file_name,
1155
1178
  identifier.get_unescaped_names(self.input_cols),
1156
1179
  identifier.get_unescaped_names(self.label_cols),
1157
1180
  identifier.get_unescaped_names(self.sample_weight_col),
1158
- statement_params=statement_params,
1181
+ statement_params,
1159
1182
  )
1160
1183
 
1161
1184
  cleanup_temp_files([local_score_file_name])
@@ -1173,18 +1196,20 @@ class VotingRegressor(BaseTransformer):
1173
1196
  if self._sklearn_object._estimator_type == 'classifier':
1174
1197
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1175
1198
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1176
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1199
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1200
+ ([] if self._drop_input_cols else inputs) + outputs)
1177
1201
  # For regressor, the type of predict is float64
1178
1202
  elif self._sklearn_object._estimator_type == 'regressor':
1179
1203
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1180
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1181
-
1204
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1205
+ ([] if self._drop_input_cols else inputs) + outputs)
1182
1206
  for prob_func in PROB_FUNCTIONS:
1183
1207
  if hasattr(self, prob_func):
1184
1208
  output_cols_prefix: str = f"{prob_func}_"
1185
1209
  output_column_names = self._get_output_column_names(output_cols_prefix)
1186
1210
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1187
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1211
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1212
+ ([] if self._drop_input_cols else inputs) + outputs)
1188
1213
 
1189
1214
  @property
1190
1215
  def model_signatures(self) -> Dict[str, ModelSignature]: