snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. snowflake/ml/_internal/env_utils.py +2 -1
  2. snowflake/ml/_internal/file_utils.py +29 -7
  3. snowflake/ml/_internal/telemetry.py +5 -8
  4. snowflake/ml/_internal/utils/uri.py +7 -2
  5. snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
  6. snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
  7. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
  8. snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
  9. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
  10. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
  11. snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
  12. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
  13. snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
  14. snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
  15. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
  16. snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
  17. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
  18. snowflake/ml/model/_deployer.py +14 -27
  19. snowflake/ml/model/_env.py +4 -4
  20. snowflake/ml/model/_handlers/custom.py +14 -2
  21. snowflake/ml/model/_handlers/pytorch.py +186 -0
  22. snowflake/ml/model/_handlers/sklearn.py +14 -9
  23. snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
  24. snowflake/ml/model/_handlers/torchscript.py +180 -0
  25. snowflake/ml/model/_handlers/xgboost.py +19 -9
  26. snowflake/ml/model/_model.py +3 -2
  27. snowflake/ml/model/_model_meta.py +12 -7
  28. snowflake/ml/model/model_signature.py +446 -66
  29. snowflake/ml/model/type_hints.py +23 -4
  30. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
  31. snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
  32. snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
  33. snowflake/ml/modeling/cluster/birch.py +51 -26
  34. snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
  35. snowflake/ml/modeling/cluster/dbscan.py +51 -26
  36. snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
  37. snowflake/ml/modeling/cluster/k_means.py +51 -26
  38. snowflake/ml/modeling/cluster/mean_shift.py +51 -26
  39. snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
  40. snowflake/ml/modeling/cluster/optics.py +51 -26
  41. snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
  42. snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
  43. snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
  44. snowflake/ml/modeling/compose/column_transformer.py +51 -26
  45. snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
  46. snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
  47. snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
  48. snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
  49. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
  50. snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
  51. snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
  52. snowflake/ml/modeling/covariance/oas.py +51 -26
  53. snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
  54. snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
  55. snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
  56. snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
  57. snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
  58. snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
  59. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
  60. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
  61. snowflake/ml/modeling/decomposition/pca.py +51 -26
  62. snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
  63. snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
  64. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
  65. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
  66. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
  67. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
  68. snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
  69. snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
  70. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
  71. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
  72. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
  73. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
  74. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
  75. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
  76. snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
  77. snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
  78. snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
  79. snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
  80. snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
  81. snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
  82. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
  83. snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
  84. snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
  85. snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
  86. snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
  87. snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
  88. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
  89. snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
  90. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
  91. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
  92. snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
  93. snowflake/ml/modeling/impute/knn_imputer.py +51 -26
  94. snowflake/ml/modeling/impute/missing_indicator.py +51 -26
  95. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
  96. snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
  97. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
  98. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
  99. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
  100. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
  101. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
  102. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
  103. snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
  104. snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
  105. snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
  106. snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
  107. snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
  108. snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
  109. snowflake/ml/modeling/linear_model/lars.py +51 -26
  110. snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
  111. snowflake/ml/modeling/linear_model/lasso.py +51 -26
  112. snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
  113. snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
  114. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
  115. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
  116. snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
  117. snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
  118. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
  119. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
  120. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
  121. snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
  122. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
  123. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
  124. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
  125. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
  126. snowflake/ml/modeling/linear_model/perceptron.py +51 -26
  127. snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
  128. snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
  129. snowflake/ml/modeling/linear_model/ridge.py +51 -26
  130. snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
  131. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
  132. snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
  133. snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
  134. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
  135. snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
  136. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
  137. snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
  138. snowflake/ml/modeling/manifold/isomap.py +51 -26
  139. snowflake/ml/modeling/manifold/mds.py +51 -26
  140. snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
  141. snowflake/ml/modeling/manifold/tsne.py +51 -26
  142. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
  143. snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
  144. snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
  145. snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
  146. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
  147. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
  148. snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
  149. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
  150. snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
  151. snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
  152. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
  153. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
  154. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
  155. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
  156. snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
  157. snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
  158. snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
  159. snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
  160. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
  161. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
  162. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
  163. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
  164. snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
  165. snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
  166. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
  167. snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
  168. snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
  169. snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
  170. snowflake/ml/modeling/svm/linear_svc.py +51 -26
  171. snowflake/ml/modeling/svm/linear_svr.py +51 -26
  172. snowflake/ml/modeling/svm/nu_svc.py +51 -26
  173. snowflake/ml/modeling/svm/nu_svr.py +51 -26
  174. snowflake/ml/modeling/svm/svc.py +51 -26
  175. snowflake/ml/modeling/svm/svr.py +51 -26
  176. snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
  177. snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
  178. snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
  179. snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
  180. snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
  181. snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
  182. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
  183. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
  184. snowflake/ml/registry/model_registry.py +74 -56
  185. snowflake/ml/version.py +1 -1
  186. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
  187. snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
  188. snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
  189. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -208,7 +210,6 @@ class PolynomialFeatures(BaseTransformer):
208
210
  sample_weight_col: Optional[str] = None,
209
211
  ) -> None:
210
212
  super().__init__()
211
- self.id = str(uuid4()).replace("-", "_").upper()
212
213
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
213
214
 
214
215
  self._deps = list(deps)
@@ -231,6 +232,15 @@ class PolynomialFeatures(BaseTransformer):
231
232
  self.set_drop_input_cols(drop_input_cols)
232
233
  self.set_sample_weight_col(sample_weight_col)
233
234
 
235
+ def _get_rand_id(self) -> str:
236
+ """
237
+ Generate random id to be used in sproc and stage names.
238
+
239
+ Returns:
240
+ Random id string usable in sproc, table, and stage names.
241
+ """
242
+ return str(uuid4()).replace("-", "_").upper()
243
+
234
244
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
235
245
  """
236
246
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -309,7 +319,7 @@ class PolynomialFeatures(BaseTransformer):
309
319
  cp.dump(self._sklearn_object, local_transform_file)
310
320
 
311
321
  # Create temp stage to run fit.
312
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
322
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
313
323
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
314
324
  SqlResultValidator(
315
325
  session=session,
@@ -322,11 +332,12 @@ class PolynomialFeatures(BaseTransformer):
322
332
  expected_value=f"Stage area {transform_stage_name} successfully created."
323
333
  ).validate()
324
334
 
325
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
335
+ # Use posixpath to construct stage paths
336
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
337
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
326
338
  local_result_file_name = get_temp_file_path()
327
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
328
339
 
329
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
340
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
330
341
  statement_params = telemetry.get_function_usage_statement_params(
331
342
  project=_PROJECT,
332
343
  subproject=_SUBPROJECT,
@@ -352,6 +363,7 @@ class PolynomialFeatures(BaseTransformer):
352
363
  replace=True,
353
364
  session=session,
354
365
  statement_params=statement_params,
366
+ anonymous=True
355
367
  )
356
368
  def fit_wrapper_sproc(
357
369
  session: Session,
@@ -360,7 +372,8 @@ class PolynomialFeatures(BaseTransformer):
360
372
  stage_result_file_name: str,
361
373
  input_cols: List[str],
362
374
  label_cols: List[str],
363
- sample_weight_col: Optional[str]
375
+ sample_weight_col: Optional[str],
376
+ statement_params: Dict[str, str]
364
377
  ) -> str:
365
378
  import cloudpickle as cp
366
379
  import numpy as np
@@ -427,15 +440,15 @@ class PolynomialFeatures(BaseTransformer):
427
440
  api_calls=[Session.call],
428
441
  custom_tags=dict([("autogen", True)]),
429
442
  )
430
- sproc_export_file_name = session.call(
431
- fit_sproc_name,
443
+ sproc_export_file_name = fit_wrapper_sproc(
444
+ session,
432
445
  query,
433
446
  stage_transform_file_name,
434
447
  stage_result_file_name,
435
448
  identifier.get_unescaped_names(self.input_cols),
436
449
  identifier.get_unescaped_names(self.label_cols),
437
450
  identifier.get_unescaped_names(self.sample_weight_col),
438
- statement_params=statement_params,
451
+ statement_params,
439
452
  )
440
453
 
441
454
  if "|" in sproc_export_file_name:
@@ -445,7 +458,7 @@ class PolynomialFeatures(BaseTransformer):
445
458
  print("\n".join(fields[1:]))
446
459
 
447
460
  session.file.get(
448
- os.path.join(stage_result_file_name, sproc_export_file_name),
461
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
449
462
  local_result_file_name,
450
463
  statement_params=statement_params
451
464
  )
@@ -491,7 +504,7 @@ class PolynomialFeatures(BaseTransformer):
491
504
 
492
505
  # Register vectorized UDF for batch inference
493
506
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
494
- safe_id=self.id, method=inference_method)
507
+ safe_id=self._get_rand_id(), method=inference_method)
495
508
 
496
509
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
497
510
  # will try to pickle all of self which fails.
@@ -583,7 +596,7 @@ class PolynomialFeatures(BaseTransformer):
583
596
  return transformed_pandas_df.to_dict("records")
584
597
 
585
598
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
586
- safe_id=self.id
599
+ safe_id=self._get_rand_id()
587
600
  )
588
601
 
589
602
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -748,11 +761,18 @@ class PolynomialFeatures(BaseTransformer):
748
761
  Transformed dataset.
749
762
  """
750
763
  if isinstance(dataset, DataFrame):
764
+ expected_type_inferred = ""
765
+ # when it is classifier, infer the datatype from label columns
766
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
767
+ expected_type_inferred = convert_sp_to_sf_type(
768
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
769
+ )
770
+
751
771
  output_df = self._batch_inference(
752
772
  dataset=dataset,
753
773
  inference_method="predict",
754
774
  expected_output_cols_list=self.output_cols,
755
- expected_output_cols_type="",
775
+ expected_output_cols_type=expected_type_inferred,
756
776
  )
757
777
  elif isinstance(dataset, pd.DataFrame):
758
778
  output_df = self._sklearn_inference(
@@ -825,10 +845,10 @@ class PolynomialFeatures(BaseTransformer):
825
845
 
826
846
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
827
847
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
828
- Returns an empty list if current object is not a classifier or not yet fitted.
848
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
829
849
  """
830
850
  if getattr(self._sklearn_object, "classes_", None) is None:
831
- return []
851
+ return [output_cols_prefix]
832
852
 
833
853
  classes = self._sklearn_object.classes_
834
854
  if isinstance(classes, numpy.ndarray):
@@ -1053,7 +1073,7 @@ class PolynomialFeatures(BaseTransformer):
1053
1073
  cp.dump(self._sklearn_object, local_score_file)
1054
1074
 
1055
1075
  # Create temp stage to run score.
1056
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1076
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1057
1077
  session = dataset._session
1058
1078
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1059
1079
  SqlResultValidator(
@@ -1067,8 +1087,9 @@ class PolynomialFeatures(BaseTransformer):
1067
1087
  expected_value=f"Stage area {score_stage_name} successfully created."
1068
1088
  ).validate()
1069
1089
 
1070
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1071
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1090
+ # Use posixpath to construct stage paths
1091
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1092
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1072
1093
  statement_params = telemetry.get_function_usage_statement_params(
1073
1094
  project=_PROJECT,
1074
1095
  subproject=_SUBPROJECT,
@@ -1094,6 +1115,7 @@ class PolynomialFeatures(BaseTransformer):
1094
1115
  replace=True,
1095
1116
  session=session,
1096
1117
  statement_params=statement_params,
1118
+ anonymous=True
1097
1119
  )
1098
1120
  def score_wrapper_sproc(
1099
1121
  session: Session,
@@ -1101,7 +1123,8 @@ class PolynomialFeatures(BaseTransformer):
1101
1123
  stage_score_file_name: str,
1102
1124
  input_cols: List[str],
1103
1125
  label_cols: List[str],
1104
- sample_weight_col: Optional[str]
1126
+ sample_weight_col: Optional[str],
1127
+ statement_params: Dict[str, str]
1105
1128
  ) -> float:
1106
1129
  import cloudpickle as cp
1107
1130
  import numpy as np
@@ -1151,14 +1174,14 @@ class PolynomialFeatures(BaseTransformer):
1151
1174
  api_calls=[Session.call],
1152
1175
  custom_tags=dict([("autogen", True)]),
1153
1176
  )
1154
- score = session.call(
1155
- score_sproc_name,
1177
+ score = score_wrapper_sproc(
1178
+ session,
1156
1179
  query,
1157
1180
  stage_score_file_name,
1158
1181
  identifier.get_unescaped_names(self.input_cols),
1159
1182
  identifier.get_unescaped_names(self.label_cols),
1160
1183
  identifier.get_unescaped_names(self.sample_weight_col),
1161
- statement_params=statement_params,
1184
+ statement_params,
1162
1185
  )
1163
1186
 
1164
1187
  cleanup_temp_files([local_score_file_name])
@@ -1176,18 +1199,20 @@ class PolynomialFeatures(BaseTransformer):
1176
1199
  if self._sklearn_object._estimator_type == 'classifier':
1177
1200
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1178
1201
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1179
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1202
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1203
+ ([] if self._drop_input_cols else inputs) + outputs)
1180
1204
  # For regressor, the type of predict is float64
1181
1205
  elif self._sklearn_object._estimator_type == 'regressor':
1182
1206
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1183
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1184
-
1207
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1208
+ ([] if self._drop_input_cols else inputs) + outputs)
1185
1209
  for prob_func in PROB_FUNCTIONS:
1186
1210
  if hasattr(self, prob_func):
1187
1211
  output_cols_prefix: str = f"{prob_func}_"
1188
1212
  output_column_names = self._get_output_column_names(output_cols_prefix)
1189
1213
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1190
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1214
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1215
+ ([] if self._drop_input_cols else inputs) + outputs)
1191
1216
 
1192
1217
  @property
1193
1218
  def model_signatures(self) -> Dict[str, ModelSignature]:
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -210,7 +212,6 @@ class LabelPropagation(BaseTransformer):
210
212
  sample_weight_col: Optional[str] = None,
211
213
  ) -> None:
212
214
  super().__init__()
213
- self.id = str(uuid4()).replace("-", "_").upper()
214
215
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
215
216
 
216
217
  self._deps = list(deps)
@@ -235,6 +236,15 @@ class LabelPropagation(BaseTransformer):
235
236
  self.set_drop_input_cols(drop_input_cols)
236
237
  self.set_sample_weight_col(sample_weight_col)
237
238
 
239
+ def _get_rand_id(self) -> str:
240
+ """
241
+ Generate random id to be used in sproc and stage names.
242
+
243
+ Returns:
244
+ Random id string usable in sproc, table, and stage names.
245
+ """
246
+ return str(uuid4()).replace("-", "_").upper()
247
+
238
248
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
239
249
  """
240
250
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -313,7 +323,7 @@ class LabelPropagation(BaseTransformer):
313
323
  cp.dump(self._sklearn_object, local_transform_file)
314
324
 
315
325
  # Create temp stage to run fit.
316
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
326
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
317
327
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
318
328
  SqlResultValidator(
319
329
  session=session,
@@ -326,11 +336,12 @@ class LabelPropagation(BaseTransformer):
326
336
  expected_value=f"Stage area {transform_stage_name} successfully created."
327
337
  ).validate()
328
338
 
329
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
339
+ # Use posixpath to construct stage paths
340
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
341
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
330
342
  local_result_file_name = get_temp_file_path()
331
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
332
343
 
333
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
344
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
334
345
  statement_params = telemetry.get_function_usage_statement_params(
335
346
  project=_PROJECT,
336
347
  subproject=_SUBPROJECT,
@@ -356,6 +367,7 @@ class LabelPropagation(BaseTransformer):
356
367
  replace=True,
357
368
  session=session,
358
369
  statement_params=statement_params,
370
+ anonymous=True
359
371
  )
360
372
  def fit_wrapper_sproc(
361
373
  session: Session,
@@ -364,7 +376,8 @@ class LabelPropagation(BaseTransformer):
364
376
  stage_result_file_name: str,
365
377
  input_cols: List[str],
366
378
  label_cols: List[str],
367
- sample_weight_col: Optional[str]
379
+ sample_weight_col: Optional[str],
380
+ statement_params: Dict[str, str]
368
381
  ) -> str:
369
382
  import cloudpickle as cp
370
383
  import numpy as np
@@ -431,15 +444,15 @@ class LabelPropagation(BaseTransformer):
431
444
  api_calls=[Session.call],
432
445
  custom_tags=dict([("autogen", True)]),
433
446
  )
434
- sproc_export_file_name = session.call(
435
- fit_sproc_name,
447
+ sproc_export_file_name = fit_wrapper_sproc(
448
+ session,
436
449
  query,
437
450
  stage_transform_file_name,
438
451
  stage_result_file_name,
439
452
  identifier.get_unescaped_names(self.input_cols),
440
453
  identifier.get_unescaped_names(self.label_cols),
441
454
  identifier.get_unescaped_names(self.sample_weight_col),
442
- statement_params=statement_params,
455
+ statement_params,
443
456
  )
444
457
 
445
458
  if "|" in sproc_export_file_name:
@@ -449,7 +462,7 @@ class LabelPropagation(BaseTransformer):
449
462
  print("\n".join(fields[1:]))
450
463
 
451
464
  session.file.get(
452
- os.path.join(stage_result_file_name, sproc_export_file_name),
465
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
453
466
  local_result_file_name,
454
467
  statement_params=statement_params
455
468
  )
@@ -495,7 +508,7 @@ class LabelPropagation(BaseTransformer):
495
508
 
496
509
  # Register vectorized UDF for batch inference
497
510
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
498
- safe_id=self.id, method=inference_method)
511
+ safe_id=self._get_rand_id(), method=inference_method)
499
512
 
500
513
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
501
514
  # will try to pickle all of self which fails.
@@ -587,7 +600,7 @@ class LabelPropagation(BaseTransformer):
587
600
  return transformed_pandas_df.to_dict("records")
588
601
 
589
602
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
590
- safe_id=self.id
603
+ safe_id=self._get_rand_id()
591
604
  )
592
605
 
593
606
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -754,11 +767,18 @@ class LabelPropagation(BaseTransformer):
754
767
  Transformed dataset.
755
768
  """
756
769
  if isinstance(dataset, DataFrame):
770
+ expected_type_inferred = ""
771
+ # when it is classifier, infer the datatype from label columns
772
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
773
+ expected_type_inferred = convert_sp_to_sf_type(
774
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
775
+ )
776
+
757
777
  output_df = self._batch_inference(
758
778
  dataset=dataset,
759
779
  inference_method="predict",
760
780
  expected_output_cols_list=self.output_cols,
761
- expected_output_cols_type="",
781
+ expected_output_cols_type=expected_type_inferred,
762
782
  )
763
783
  elif isinstance(dataset, pd.DataFrame):
764
784
  output_df = self._sklearn_inference(
@@ -829,10 +849,10 @@ class LabelPropagation(BaseTransformer):
829
849
 
830
850
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
831
851
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
832
- Returns an empty list if current object is not a classifier or not yet fitted.
852
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
833
853
  """
834
854
  if getattr(self._sklearn_object, "classes_", None) is None:
835
- return []
855
+ return [output_cols_prefix]
836
856
 
837
857
  classes = self._sklearn_object.classes_
838
858
  if isinstance(classes, numpy.ndarray):
@@ -1061,7 +1081,7 @@ class LabelPropagation(BaseTransformer):
1061
1081
  cp.dump(self._sklearn_object, local_score_file)
1062
1082
 
1063
1083
  # Create temp stage to run score.
1064
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1084
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1065
1085
  session = dataset._session
1066
1086
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1067
1087
  SqlResultValidator(
@@ -1075,8 +1095,9 @@ class LabelPropagation(BaseTransformer):
1075
1095
  expected_value=f"Stage area {score_stage_name} successfully created."
1076
1096
  ).validate()
1077
1097
 
1078
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1079
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1098
+ # Use posixpath to construct stage paths
1099
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1100
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1080
1101
  statement_params = telemetry.get_function_usage_statement_params(
1081
1102
  project=_PROJECT,
1082
1103
  subproject=_SUBPROJECT,
@@ -1102,6 +1123,7 @@ class LabelPropagation(BaseTransformer):
1102
1123
  replace=True,
1103
1124
  session=session,
1104
1125
  statement_params=statement_params,
1126
+ anonymous=True
1105
1127
  )
1106
1128
  def score_wrapper_sproc(
1107
1129
  session: Session,
@@ -1109,7 +1131,8 @@ class LabelPropagation(BaseTransformer):
1109
1131
  stage_score_file_name: str,
1110
1132
  input_cols: List[str],
1111
1133
  label_cols: List[str],
1112
- sample_weight_col: Optional[str]
1134
+ sample_weight_col: Optional[str],
1135
+ statement_params: Dict[str, str]
1113
1136
  ) -> float:
1114
1137
  import cloudpickle as cp
1115
1138
  import numpy as np
@@ -1159,14 +1182,14 @@ class LabelPropagation(BaseTransformer):
1159
1182
  api_calls=[Session.call],
1160
1183
  custom_tags=dict([("autogen", True)]),
1161
1184
  )
1162
- score = session.call(
1163
- score_sproc_name,
1185
+ score = score_wrapper_sproc(
1186
+ session,
1164
1187
  query,
1165
1188
  stage_score_file_name,
1166
1189
  identifier.get_unescaped_names(self.input_cols),
1167
1190
  identifier.get_unescaped_names(self.label_cols),
1168
1191
  identifier.get_unescaped_names(self.sample_weight_col),
1169
- statement_params=statement_params,
1192
+ statement_params,
1170
1193
  )
1171
1194
 
1172
1195
  cleanup_temp_files([local_score_file_name])
@@ -1184,18 +1207,20 @@ class LabelPropagation(BaseTransformer):
1184
1207
  if self._sklearn_object._estimator_type == 'classifier':
1185
1208
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1186
1209
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1187
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1210
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1211
+ ([] if self._drop_input_cols else inputs) + outputs)
1188
1212
  # For regressor, the type of predict is float64
1189
1213
  elif self._sklearn_object._estimator_type == 'regressor':
1190
1214
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1191
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1192
-
1215
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1216
+ ([] if self._drop_input_cols else inputs) + outputs)
1193
1217
  for prob_func in PROB_FUNCTIONS:
1194
1218
  if hasattr(self, prob_func):
1195
1219
  output_cols_prefix: str = f"{prob_func}_"
1196
1220
  output_column_names = self._get_output_column_names(output_cols_prefix)
1197
1221
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1198
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1222
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1223
+ ([] if self._drop_input_cols else inputs) + outputs)
1199
1224
 
1200
1225
  @property
1201
1226
  def model_signatures(self) -> Dict[str, ModelSignature]: