snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. snowflake/ml/_internal/env_utils.py +2 -1
  2. snowflake/ml/_internal/file_utils.py +29 -7
  3. snowflake/ml/_internal/telemetry.py +5 -8
  4. snowflake/ml/_internal/utils/uri.py +7 -2
  5. snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
  6. snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
  7. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
  8. snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
  9. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
  10. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
  11. snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
  12. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
  13. snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
  14. snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
  15. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
  16. snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
  17. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
  18. snowflake/ml/model/_deployer.py +14 -27
  19. snowflake/ml/model/_env.py +4 -4
  20. snowflake/ml/model/_handlers/custom.py +14 -2
  21. snowflake/ml/model/_handlers/pytorch.py +186 -0
  22. snowflake/ml/model/_handlers/sklearn.py +14 -9
  23. snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
  24. snowflake/ml/model/_handlers/torchscript.py +180 -0
  25. snowflake/ml/model/_handlers/xgboost.py +19 -9
  26. snowflake/ml/model/_model.py +3 -2
  27. snowflake/ml/model/_model_meta.py +12 -7
  28. snowflake/ml/model/model_signature.py +446 -66
  29. snowflake/ml/model/type_hints.py +23 -4
  30. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
  31. snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
  32. snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
  33. snowflake/ml/modeling/cluster/birch.py +51 -26
  34. snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
  35. snowflake/ml/modeling/cluster/dbscan.py +51 -26
  36. snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
  37. snowflake/ml/modeling/cluster/k_means.py +51 -26
  38. snowflake/ml/modeling/cluster/mean_shift.py +51 -26
  39. snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
  40. snowflake/ml/modeling/cluster/optics.py +51 -26
  41. snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
  42. snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
  43. snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
  44. snowflake/ml/modeling/compose/column_transformer.py +51 -26
  45. snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
  46. snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
  47. snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
  48. snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
  49. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
  50. snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
  51. snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
  52. snowflake/ml/modeling/covariance/oas.py +51 -26
  53. snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
  54. snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
  55. snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
  56. snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
  57. snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
  58. snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
  59. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
  60. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
  61. snowflake/ml/modeling/decomposition/pca.py +51 -26
  62. snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
  63. snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
  64. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
  65. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
  66. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
  67. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
  68. snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
  69. snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
  70. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
  71. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
  72. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
  73. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
  74. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
  75. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
  76. snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
  77. snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
  78. snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
  79. snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
  80. snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
  81. snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
  82. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
  83. snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
  84. snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
  85. snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
  86. snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
  87. snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
  88. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
  89. snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
  90. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
  91. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
  92. snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
  93. snowflake/ml/modeling/impute/knn_imputer.py +51 -26
  94. snowflake/ml/modeling/impute/missing_indicator.py +51 -26
  95. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
  96. snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
  97. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
  98. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
  99. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
  100. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
  101. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
  102. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
  103. snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
  104. snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
  105. snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
  106. snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
  107. snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
  108. snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
  109. snowflake/ml/modeling/linear_model/lars.py +51 -26
  110. snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
  111. snowflake/ml/modeling/linear_model/lasso.py +51 -26
  112. snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
  113. snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
  114. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
  115. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
  116. snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
  117. snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
  118. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
  119. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
  120. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
  121. snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
  122. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
  123. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
  124. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
  125. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
  126. snowflake/ml/modeling/linear_model/perceptron.py +51 -26
  127. snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
  128. snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
  129. snowflake/ml/modeling/linear_model/ridge.py +51 -26
  130. snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
  131. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
  132. snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
  133. snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
  134. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
  135. snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
  136. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
  137. snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
  138. snowflake/ml/modeling/manifold/isomap.py +51 -26
  139. snowflake/ml/modeling/manifold/mds.py +51 -26
  140. snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
  141. snowflake/ml/modeling/manifold/tsne.py +51 -26
  142. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
  143. snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
  144. snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
  145. snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
  146. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
  147. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
  148. snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
  149. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
  150. snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
  151. snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
  152. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
  153. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
  154. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
  155. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
  156. snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
  157. snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
  158. snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
  159. snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
  160. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
  161. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
  162. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
  163. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
  164. snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
  165. snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
  166. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
  167. snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
  168. snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
  169. snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
  170. snowflake/ml/modeling/svm/linear_svc.py +51 -26
  171. snowflake/ml/modeling/svm/linear_svr.py +51 -26
  172. snowflake/ml/modeling/svm/nu_svc.py +51 -26
  173. snowflake/ml/modeling/svm/nu_svr.py +51 -26
  174. snowflake/ml/modeling/svm/svc.py +51 -26
  175. snowflake/ml/modeling/svm/svr.py +51 -26
  176. snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
  177. snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
  178. snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
  179. snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
  180. snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
  181. snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
  182. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
  183. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
  184. snowflake/ml/registry/model_registry.py +74 -56
  185. snowflake/ml/version.py +1 -1
  186. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
  187. snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
  188. snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
  189. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -218,7 +220,6 @@ class LabelSpreading(BaseTransformer):
218
220
  sample_weight_col: Optional[str] = None,
219
221
  ) -> None:
220
222
  super().__init__()
221
- self.id = str(uuid4()).replace("-", "_").upper()
222
223
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
223
224
 
224
225
  self._deps = list(deps)
@@ -244,6 +245,15 @@ class LabelSpreading(BaseTransformer):
244
245
  self.set_drop_input_cols(drop_input_cols)
245
246
  self.set_sample_weight_col(sample_weight_col)
246
247
 
248
+ def _get_rand_id(self) -> str:
249
+ """
250
+ Generate random id to be used in sproc and stage names.
251
+
252
+ Returns:
253
+ Random id string usable in sproc, table, and stage names.
254
+ """
255
+ return str(uuid4()).replace("-", "_").upper()
256
+
247
257
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
248
258
  """
249
259
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -322,7 +332,7 @@ class LabelSpreading(BaseTransformer):
322
332
  cp.dump(self._sklearn_object, local_transform_file)
323
333
 
324
334
  # Create temp stage to run fit.
325
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
335
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
326
336
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
327
337
  SqlResultValidator(
328
338
  session=session,
@@ -335,11 +345,12 @@ class LabelSpreading(BaseTransformer):
335
345
  expected_value=f"Stage area {transform_stage_name} successfully created."
336
346
  ).validate()
337
347
 
338
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
348
+ # Use posixpath to construct stage paths
349
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
350
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
339
351
  local_result_file_name = get_temp_file_path()
340
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
341
352
 
342
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
353
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
343
354
  statement_params = telemetry.get_function_usage_statement_params(
344
355
  project=_PROJECT,
345
356
  subproject=_SUBPROJECT,
@@ -365,6 +376,7 @@ class LabelSpreading(BaseTransformer):
365
376
  replace=True,
366
377
  session=session,
367
378
  statement_params=statement_params,
379
+ anonymous=True
368
380
  )
369
381
  def fit_wrapper_sproc(
370
382
  session: Session,
@@ -373,7 +385,8 @@ class LabelSpreading(BaseTransformer):
373
385
  stage_result_file_name: str,
374
386
  input_cols: List[str],
375
387
  label_cols: List[str],
376
- sample_weight_col: Optional[str]
388
+ sample_weight_col: Optional[str],
389
+ statement_params: Dict[str, str]
377
390
  ) -> str:
378
391
  import cloudpickle as cp
379
392
  import numpy as np
@@ -440,15 +453,15 @@ class LabelSpreading(BaseTransformer):
440
453
  api_calls=[Session.call],
441
454
  custom_tags=dict([("autogen", True)]),
442
455
  )
443
- sproc_export_file_name = session.call(
444
- fit_sproc_name,
456
+ sproc_export_file_name = fit_wrapper_sproc(
457
+ session,
445
458
  query,
446
459
  stage_transform_file_name,
447
460
  stage_result_file_name,
448
461
  identifier.get_unescaped_names(self.input_cols),
449
462
  identifier.get_unescaped_names(self.label_cols),
450
463
  identifier.get_unescaped_names(self.sample_weight_col),
451
- statement_params=statement_params,
464
+ statement_params,
452
465
  )
453
466
 
454
467
  if "|" in sproc_export_file_name:
@@ -458,7 +471,7 @@ class LabelSpreading(BaseTransformer):
458
471
  print("\n".join(fields[1:]))
459
472
 
460
473
  session.file.get(
461
- os.path.join(stage_result_file_name, sproc_export_file_name),
474
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
462
475
  local_result_file_name,
463
476
  statement_params=statement_params
464
477
  )
@@ -504,7 +517,7 @@ class LabelSpreading(BaseTransformer):
504
517
 
505
518
  # Register vectorized UDF for batch inference
506
519
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
507
- safe_id=self.id, method=inference_method)
520
+ safe_id=self._get_rand_id(), method=inference_method)
508
521
 
509
522
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
510
523
  # will try to pickle all of self which fails.
@@ -596,7 +609,7 @@ class LabelSpreading(BaseTransformer):
596
609
  return transformed_pandas_df.to_dict("records")
597
610
 
598
611
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
599
- safe_id=self.id
612
+ safe_id=self._get_rand_id()
600
613
  )
601
614
 
602
615
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -763,11 +776,18 @@ class LabelSpreading(BaseTransformer):
763
776
  Transformed dataset.
764
777
  """
765
778
  if isinstance(dataset, DataFrame):
779
+ expected_type_inferred = ""
780
+ # when it is classifier, infer the datatype from label columns
781
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
782
+ expected_type_inferred = convert_sp_to_sf_type(
783
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
784
+ )
785
+
766
786
  output_df = self._batch_inference(
767
787
  dataset=dataset,
768
788
  inference_method="predict",
769
789
  expected_output_cols_list=self.output_cols,
770
- expected_output_cols_type="",
790
+ expected_output_cols_type=expected_type_inferred,
771
791
  )
772
792
  elif isinstance(dataset, pd.DataFrame):
773
793
  output_df = self._sklearn_inference(
@@ -838,10 +858,10 @@ class LabelSpreading(BaseTransformer):
838
858
 
839
859
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
840
860
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
841
- Returns an empty list if current object is not a classifier or not yet fitted.
861
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
842
862
  """
843
863
  if getattr(self._sklearn_object, "classes_", None) is None:
844
- return []
864
+ return [output_cols_prefix]
845
865
 
846
866
  classes = self._sklearn_object.classes_
847
867
  if isinstance(classes, numpy.ndarray):
@@ -1070,7 +1090,7 @@ class LabelSpreading(BaseTransformer):
1070
1090
  cp.dump(self._sklearn_object, local_score_file)
1071
1091
 
1072
1092
  # Create temp stage to run score.
1073
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1093
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1074
1094
  session = dataset._session
1075
1095
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1076
1096
  SqlResultValidator(
@@ -1084,8 +1104,9 @@ class LabelSpreading(BaseTransformer):
1084
1104
  expected_value=f"Stage area {score_stage_name} successfully created."
1085
1105
  ).validate()
1086
1106
 
1087
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1088
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1107
+ # Use posixpath to construct stage paths
1108
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1109
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1089
1110
  statement_params = telemetry.get_function_usage_statement_params(
1090
1111
  project=_PROJECT,
1091
1112
  subproject=_SUBPROJECT,
@@ -1111,6 +1132,7 @@ class LabelSpreading(BaseTransformer):
1111
1132
  replace=True,
1112
1133
  session=session,
1113
1134
  statement_params=statement_params,
1135
+ anonymous=True
1114
1136
  )
1115
1137
  def score_wrapper_sproc(
1116
1138
  session: Session,
@@ -1118,7 +1140,8 @@ class LabelSpreading(BaseTransformer):
1118
1140
  stage_score_file_name: str,
1119
1141
  input_cols: List[str],
1120
1142
  label_cols: List[str],
1121
- sample_weight_col: Optional[str]
1143
+ sample_weight_col: Optional[str],
1144
+ statement_params: Dict[str, str]
1122
1145
  ) -> float:
1123
1146
  import cloudpickle as cp
1124
1147
  import numpy as np
@@ -1168,14 +1191,14 @@ class LabelSpreading(BaseTransformer):
1168
1191
  api_calls=[Session.call],
1169
1192
  custom_tags=dict([("autogen", True)]),
1170
1193
  )
1171
- score = session.call(
1172
- score_sproc_name,
1194
+ score = score_wrapper_sproc(
1195
+ session,
1173
1196
  query,
1174
1197
  stage_score_file_name,
1175
1198
  identifier.get_unescaped_names(self.input_cols),
1176
1199
  identifier.get_unescaped_names(self.label_cols),
1177
1200
  identifier.get_unescaped_names(self.sample_weight_col),
1178
- statement_params=statement_params,
1201
+ statement_params,
1179
1202
  )
1180
1203
 
1181
1204
  cleanup_temp_files([local_score_file_name])
@@ -1193,18 +1216,20 @@ class LabelSpreading(BaseTransformer):
1193
1216
  if self._sklearn_object._estimator_type == 'classifier':
1194
1217
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1195
1218
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1196
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1219
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1220
+ ([] if self._drop_input_cols else inputs) + outputs)
1197
1221
  # For regressor, the type of predict is float64
1198
1222
  elif self._sklearn_object._estimator_type == 'regressor':
1199
1223
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1200
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1201
-
1224
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1225
+ ([] if self._drop_input_cols else inputs) + outputs)
1202
1226
  for prob_func in PROB_FUNCTIONS:
1203
1227
  if hasattr(self, prob_func):
1204
1228
  output_cols_prefix: str = f"{prob_func}_"
1205
1229
  output_column_names = self._get_output_column_names(output_cols_prefix)
1206
1230
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1207
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1231
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1232
+ ([] if self._drop_input_cols else inputs) + outputs)
1208
1233
 
1209
1234
  @property
1210
1235
  def model_signatures(self) -> Dict[str, ModelSignature]:
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -264,7 +266,6 @@ class LinearSVC(BaseTransformer):
264
266
  sample_weight_col: Optional[str] = None,
265
267
  ) -> None:
266
268
  super().__init__()
267
- self.id = str(uuid4()).replace("-", "_").upper()
268
269
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
269
270
 
270
271
  self._deps = list(deps)
@@ -295,6 +296,15 @@ class LinearSVC(BaseTransformer):
295
296
  self.set_drop_input_cols(drop_input_cols)
296
297
  self.set_sample_weight_col(sample_weight_col)
297
298
 
299
+ def _get_rand_id(self) -> str:
300
+ """
301
+ Generate random id to be used in sproc and stage names.
302
+
303
+ Returns:
304
+ Random id string usable in sproc, table, and stage names.
305
+ """
306
+ return str(uuid4()).replace("-", "_").upper()
307
+
298
308
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
299
309
  """
300
310
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -373,7 +383,7 @@ class LinearSVC(BaseTransformer):
373
383
  cp.dump(self._sklearn_object, local_transform_file)
374
384
 
375
385
  # Create temp stage to run fit.
376
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
386
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
377
387
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
378
388
  SqlResultValidator(
379
389
  session=session,
@@ -386,11 +396,12 @@ class LinearSVC(BaseTransformer):
386
396
  expected_value=f"Stage area {transform_stage_name} successfully created."
387
397
  ).validate()
388
398
 
389
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
399
+ # Use posixpath to construct stage paths
400
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
401
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
390
402
  local_result_file_name = get_temp_file_path()
391
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
392
403
 
393
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
404
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
394
405
  statement_params = telemetry.get_function_usage_statement_params(
395
406
  project=_PROJECT,
396
407
  subproject=_SUBPROJECT,
@@ -416,6 +427,7 @@ class LinearSVC(BaseTransformer):
416
427
  replace=True,
417
428
  session=session,
418
429
  statement_params=statement_params,
430
+ anonymous=True
419
431
  )
420
432
  def fit_wrapper_sproc(
421
433
  session: Session,
@@ -424,7 +436,8 @@ class LinearSVC(BaseTransformer):
424
436
  stage_result_file_name: str,
425
437
  input_cols: List[str],
426
438
  label_cols: List[str],
427
- sample_weight_col: Optional[str]
439
+ sample_weight_col: Optional[str],
440
+ statement_params: Dict[str, str]
428
441
  ) -> str:
429
442
  import cloudpickle as cp
430
443
  import numpy as np
@@ -491,15 +504,15 @@ class LinearSVC(BaseTransformer):
491
504
  api_calls=[Session.call],
492
505
  custom_tags=dict([("autogen", True)]),
493
506
  )
494
- sproc_export_file_name = session.call(
495
- fit_sproc_name,
507
+ sproc_export_file_name = fit_wrapper_sproc(
508
+ session,
496
509
  query,
497
510
  stage_transform_file_name,
498
511
  stage_result_file_name,
499
512
  identifier.get_unescaped_names(self.input_cols),
500
513
  identifier.get_unescaped_names(self.label_cols),
501
514
  identifier.get_unescaped_names(self.sample_weight_col),
502
- statement_params=statement_params,
515
+ statement_params,
503
516
  )
504
517
 
505
518
  if "|" in sproc_export_file_name:
@@ -509,7 +522,7 @@ class LinearSVC(BaseTransformer):
509
522
  print("\n".join(fields[1:]))
510
523
 
511
524
  session.file.get(
512
- os.path.join(stage_result_file_name, sproc_export_file_name),
525
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
513
526
  local_result_file_name,
514
527
  statement_params=statement_params
515
528
  )
@@ -555,7 +568,7 @@ class LinearSVC(BaseTransformer):
555
568
 
556
569
  # Register vectorized UDF for batch inference
557
570
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
558
- safe_id=self.id, method=inference_method)
571
+ safe_id=self._get_rand_id(), method=inference_method)
559
572
 
560
573
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
561
574
  # will try to pickle all of self which fails.
@@ -647,7 +660,7 @@ class LinearSVC(BaseTransformer):
647
660
  return transformed_pandas_df.to_dict("records")
648
661
 
649
662
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
650
- safe_id=self.id
663
+ safe_id=self._get_rand_id()
651
664
  )
652
665
 
653
666
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -814,11 +827,18 @@ class LinearSVC(BaseTransformer):
814
827
  Transformed dataset.
815
828
  """
816
829
  if isinstance(dataset, DataFrame):
830
+ expected_type_inferred = ""
831
+ # when it is classifier, infer the datatype from label columns
832
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
833
+ expected_type_inferred = convert_sp_to_sf_type(
834
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
835
+ )
836
+
817
837
  output_df = self._batch_inference(
818
838
  dataset=dataset,
819
839
  inference_method="predict",
820
840
  expected_output_cols_list=self.output_cols,
821
- expected_output_cols_type="",
841
+ expected_output_cols_type=expected_type_inferred,
822
842
  )
823
843
  elif isinstance(dataset, pd.DataFrame):
824
844
  output_df = self._sklearn_inference(
@@ -889,10 +909,10 @@ class LinearSVC(BaseTransformer):
889
909
 
890
910
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
891
911
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
892
- Returns an empty list if current object is not a classifier or not yet fitted.
912
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
893
913
  """
894
914
  if getattr(self._sklearn_object, "classes_", None) is None:
895
- return []
915
+ return [output_cols_prefix]
896
916
 
897
917
  classes = self._sklearn_object.classes_
898
918
  if isinstance(classes, numpy.ndarray):
@@ -1119,7 +1139,7 @@ class LinearSVC(BaseTransformer):
1119
1139
  cp.dump(self._sklearn_object, local_score_file)
1120
1140
 
1121
1141
  # Create temp stage to run score.
1122
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1142
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1123
1143
  session = dataset._session
1124
1144
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1125
1145
  SqlResultValidator(
@@ -1133,8 +1153,9 @@ class LinearSVC(BaseTransformer):
1133
1153
  expected_value=f"Stage area {score_stage_name} successfully created."
1134
1154
  ).validate()
1135
1155
 
1136
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1137
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1156
+ # Use posixpath to construct stage paths
1157
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1158
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1138
1159
  statement_params = telemetry.get_function_usage_statement_params(
1139
1160
  project=_PROJECT,
1140
1161
  subproject=_SUBPROJECT,
@@ -1160,6 +1181,7 @@ class LinearSVC(BaseTransformer):
1160
1181
  replace=True,
1161
1182
  session=session,
1162
1183
  statement_params=statement_params,
1184
+ anonymous=True
1163
1185
  )
1164
1186
  def score_wrapper_sproc(
1165
1187
  session: Session,
@@ -1167,7 +1189,8 @@ class LinearSVC(BaseTransformer):
1167
1189
  stage_score_file_name: str,
1168
1190
  input_cols: List[str],
1169
1191
  label_cols: List[str],
1170
- sample_weight_col: Optional[str]
1192
+ sample_weight_col: Optional[str],
1193
+ statement_params: Dict[str, str]
1171
1194
  ) -> float:
1172
1195
  import cloudpickle as cp
1173
1196
  import numpy as np
@@ -1217,14 +1240,14 @@ class LinearSVC(BaseTransformer):
1217
1240
  api_calls=[Session.call],
1218
1241
  custom_tags=dict([("autogen", True)]),
1219
1242
  )
1220
- score = session.call(
1221
- score_sproc_name,
1243
+ score = score_wrapper_sproc(
1244
+ session,
1222
1245
  query,
1223
1246
  stage_score_file_name,
1224
1247
  identifier.get_unescaped_names(self.input_cols),
1225
1248
  identifier.get_unescaped_names(self.label_cols),
1226
1249
  identifier.get_unescaped_names(self.sample_weight_col),
1227
- statement_params=statement_params,
1250
+ statement_params,
1228
1251
  )
1229
1252
 
1230
1253
  cleanup_temp_files([local_score_file_name])
@@ -1242,18 +1265,20 @@ class LinearSVC(BaseTransformer):
1242
1265
  if self._sklearn_object._estimator_type == 'classifier':
1243
1266
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1244
1267
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1245
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1268
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1269
+ ([] if self._drop_input_cols else inputs) + outputs)
1246
1270
  # For regressor, the type of predict is float64
1247
1271
  elif self._sklearn_object._estimator_type == 'regressor':
1248
1272
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1249
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1250
-
1273
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1274
+ ([] if self._drop_input_cols else inputs) + outputs)
1251
1275
  for prob_func in PROB_FUNCTIONS:
1252
1276
  if hasattr(self, prob_func):
1253
1277
  output_cols_prefix: str = f"{prob_func}_"
1254
1278
  output_column_names = self._get_output_column_names(output_cols_prefix)
1255
1279
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1256
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1280
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1281
+ ([] if self._drop_input_cols else inputs) + outputs)
1257
1282
 
1258
1283
  @property
1259
1284
  def model_signatures(self) -> Dict[str, ModelSignature]: