snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. snowflake/ml/_internal/env_utils.py +2 -1
  2. snowflake/ml/_internal/file_utils.py +29 -7
  3. snowflake/ml/_internal/telemetry.py +5 -8
  4. snowflake/ml/_internal/utils/uri.py +7 -2
  5. snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
  6. snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
  7. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
  8. snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
  9. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
  10. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
  11. snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
  12. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
  13. snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
  14. snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
  15. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
  16. snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
  17. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
  18. snowflake/ml/model/_deployer.py +14 -27
  19. snowflake/ml/model/_env.py +4 -4
  20. snowflake/ml/model/_handlers/custom.py +14 -2
  21. snowflake/ml/model/_handlers/pytorch.py +186 -0
  22. snowflake/ml/model/_handlers/sklearn.py +14 -9
  23. snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
  24. snowflake/ml/model/_handlers/torchscript.py +180 -0
  25. snowflake/ml/model/_handlers/xgboost.py +19 -9
  26. snowflake/ml/model/_model.py +3 -2
  27. snowflake/ml/model/_model_meta.py +12 -7
  28. snowflake/ml/model/model_signature.py +446 -66
  29. snowflake/ml/model/type_hints.py +23 -4
  30. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
  31. snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
  32. snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
  33. snowflake/ml/modeling/cluster/birch.py +51 -26
  34. snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
  35. snowflake/ml/modeling/cluster/dbscan.py +51 -26
  36. snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
  37. snowflake/ml/modeling/cluster/k_means.py +51 -26
  38. snowflake/ml/modeling/cluster/mean_shift.py +51 -26
  39. snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
  40. snowflake/ml/modeling/cluster/optics.py +51 -26
  41. snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
  42. snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
  43. snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
  44. snowflake/ml/modeling/compose/column_transformer.py +51 -26
  45. snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
  46. snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
  47. snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
  48. snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
  49. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
  50. snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
  51. snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
  52. snowflake/ml/modeling/covariance/oas.py +51 -26
  53. snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
  54. snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
  55. snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
  56. snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
  57. snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
  58. snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
  59. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
  60. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
  61. snowflake/ml/modeling/decomposition/pca.py +51 -26
  62. snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
  63. snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
  64. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
  65. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
  66. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
  67. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
  68. snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
  69. snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
  70. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
  71. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
  72. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
  73. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
  74. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
  75. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
  76. snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
  77. snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
  78. snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
  79. snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
  80. snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
  81. snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
  82. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
  83. snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
  84. snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
  85. snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
  86. snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
  87. snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
  88. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
  89. snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
  90. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
  91. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
  92. snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
  93. snowflake/ml/modeling/impute/knn_imputer.py +51 -26
  94. snowflake/ml/modeling/impute/missing_indicator.py +51 -26
  95. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
  96. snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
  97. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
  98. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
  99. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
  100. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
  101. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
  102. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
  103. snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
  104. snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
  105. snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
  106. snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
  107. snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
  108. snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
  109. snowflake/ml/modeling/linear_model/lars.py +51 -26
  110. snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
  111. snowflake/ml/modeling/linear_model/lasso.py +51 -26
  112. snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
  113. snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
  114. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
  115. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
  116. snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
  117. snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
  118. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
  119. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
  120. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
  121. snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
  122. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
  123. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
  124. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
  125. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
  126. snowflake/ml/modeling/linear_model/perceptron.py +51 -26
  127. snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
  128. snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
  129. snowflake/ml/modeling/linear_model/ridge.py +51 -26
  130. snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
  131. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
  132. snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
  133. snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
  134. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
  135. snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
  136. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
  137. snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
  138. snowflake/ml/modeling/manifold/isomap.py +51 -26
  139. snowflake/ml/modeling/manifold/mds.py +51 -26
  140. snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
  141. snowflake/ml/modeling/manifold/tsne.py +51 -26
  142. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
  143. snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
  144. snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
  145. snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
  146. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
  147. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
  148. snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
  149. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
  150. snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
  151. snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
  152. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
  153. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
  154. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
  155. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
  156. snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
  157. snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
  158. snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
  159. snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
  160. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
  161. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
  162. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
  163. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
  164. snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
  165. snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
  166. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
  167. snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
  168. snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
  169. snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
  170. snowflake/ml/modeling/svm/linear_svc.py +51 -26
  171. snowflake/ml/modeling/svm/linear_svr.py +51 -26
  172. snowflake/ml/modeling/svm/nu_svc.py +51 -26
  173. snowflake/ml/modeling/svm/nu_svr.py +51 -26
  174. snowflake/ml/modeling/svm/svc.py +51 -26
  175. snowflake/ml/modeling/svm/svr.py +51 -26
  176. snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
  177. snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
  178. snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
  179. snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
  180. snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
  181. snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
  182. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
  183. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
  184. snowflake/ml/registry/model_registry.py +74 -56
  185. snowflake/ml/version.py +1 -1
  186. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
  187. snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
  188. snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
  189. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -274,7 +276,6 @@ class PassiveAggressiveClassifier(BaseTransformer):
274
276
  sample_weight_col: Optional[str] = None,
275
277
  ) -> None:
276
278
  super().__init__()
277
- self.id = str(uuid4()).replace("-", "_").upper()
278
279
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
279
280
 
280
281
  self._deps = list(deps)
@@ -308,6 +309,15 @@ class PassiveAggressiveClassifier(BaseTransformer):
308
309
  self.set_drop_input_cols(drop_input_cols)
309
310
  self.set_sample_weight_col(sample_weight_col)
310
311
 
312
+ def _get_rand_id(self) -> str:
313
+ """
314
+ Generate random id to be used in sproc and stage names.
315
+
316
+ Returns:
317
+ Random id string usable in sproc, table, and stage names.
318
+ """
319
+ return str(uuid4()).replace("-", "_").upper()
320
+
311
321
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
312
322
  """
313
323
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -386,7 +396,7 @@ class PassiveAggressiveClassifier(BaseTransformer):
386
396
  cp.dump(self._sklearn_object, local_transform_file)
387
397
 
388
398
  # Create temp stage to run fit.
389
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
399
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
390
400
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
391
401
  SqlResultValidator(
392
402
  session=session,
@@ -399,11 +409,12 @@ class PassiveAggressiveClassifier(BaseTransformer):
399
409
  expected_value=f"Stage area {transform_stage_name} successfully created."
400
410
  ).validate()
401
411
 
402
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
412
+ # Use posixpath to construct stage paths
413
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
414
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
403
415
  local_result_file_name = get_temp_file_path()
404
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
405
416
 
406
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
417
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
407
418
  statement_params = telemetry.get_function_usage_statement_params(
408
419
  project=_PROJECT,
409
420
  subproject=_SUBPROJECT,
@@ -429,6 +440,7 @@ class PassiveAggressiveClassifier(BaseTransformer):
429
440
  replace=True,
430
441
  session=session,
431
442
  statement_params=statement_params,
443
+ anonymous=True
432
444
  )
433
445
  def fit_wrapper_sproc(
434
446
  session: Session,
@@ -437,7 +449,8 @@ class PassiveAggressiveClassifier(BaseTransformer):
437
449
  stage_result_file_name: str,
438
450
  input_cols: List[str],
439
451
  label_cols: List[str],
440
- sample_weight_col: Optional[str]
452
+ sample_weight_col: Optional[str],
453
+ statement_params: Dict[str, str]
441
454
  ) -> str:
442
455
  import cloudpickle as cp
443
456
  import numpy as np
@@ -504,15 +517,15 @@ class PassiveAggressiveClassifier(BaseTransformer):
504
517
  api_calls=[Session.call],
505
518
  custom_tags=dict([("autogen", True)]),
506
519
  )
507
- sproc_export_file_name = session.call(
508
- fit_sproc_name,
520
+ sproc_export_file_name = fit_wrapper_sproc(
521
+ session,
509
522
  query,
510
523
  stage_transform_file_name,
511
524
  stage_result_file_name,
512
525
  identifier.get_unescaped_names(self.input_cols),
513
526
  identifier.get_unescaped_names(self.label_cols),
514
527
  identifier.get_unescaped_names(self.sample_weight_col),
515
- statement_params=statement_params,
528
+ statement_params,
516
529
  )
517
530
 
518
531
  if "|" in sproc_export_file_name:
@@ -522,7 +535,7 @@ class PassiveAggressiveClassifier(BaseTransformer):
522
535
  print("\n".join(fields[1:]))
523
536
 
524
537
  session.file.get(
525
- os.path.join(stage_result_file_name, sproc_export_file_name),
538
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
526
539
  local_result_file_name,
527
540
  statement_params=statement_params
528
541
  )
@@ -568,7 +581,7 @@ class PassiveAggressiveClassifier(BaseTransformer):
568
581
 
569
582
  # Register vectorized UDF for batch inference
570
583
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
571
- safe_id=self.id, method=inference_method)
584
+ safe_id=self._get_rand_id(), method=inference_method)
572
585
 
573
586
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
574
587
  # will try to pickle all of self which fails.
@@ -660,7 +673,7 @@ class PassiveAggressiveClassifier(BaseTransformer):
660
673
  return transformed_pandas_df.to_dict("records")
661
674
 
662
675
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
663
- safe_id=self.id
676
+ safe_id=self._get_rand_id()
664
677
  )
665
678
 
666
679
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -827,11 +840,18 @@ class PassiveAggressiveClassifier(BaseTransformer):
827
840
  Transformed dataset.
828
841
  """
829
842
  if isinstance(dataset, DataFrame):
843
+ expected_type_inferred = ""
844
+ # when it is classifier, infer the datatype from label columns
845
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
846
+ expected_type_inferred = convert_sp_to_sf_type(
847
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
848
+ )
849
+
830
850
  output_df = self._batch_inference(
831
851
  dataset=dataset,
832
852
  inference_method="predict",
833
853
  expected_output_cols_list=self.output_cols,
834
- expected_output_cols_type="",
854
+ expected_output_cols_type=expected_type_inferred,
835
855
  )
836
856
  elif isinstance(dataset, pd.DataFrame):
837
857
  output_df = self._sklearn_inference(
@@ -902,10 +922,10 @@ class PassiveAggressiveClassifier(BaseTransformer):
902
922
 
903
923
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
904
924
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
905
- Returns an empty list if current object is not a classifier or not yet fitted.
925
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
906
926
  """
907
927
  if getattr(self._sklearn_object, "classes_", None) is None:
908
- return []
928
+ return [output_cols_prefix]
909
929
 
910
930
  classes = self._sklearn_object.classes_
911
931
  if isinstance(classes, numpy.ndarray):
@@ -1132,7 +1152,7 @@ class PassiveAggressiveClassifier(BaseTransformer):
1132
1152
  cp.dump(self._sklearn_object, local_score_file)
1133
1153
 
1134
1154
  # Create temp stage to run score.
1135
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1155
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1136
1156
  session = dataset._session
1137
1157
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1138
1158
  SqlResultValidator(
@@ -1146,8 +1166,9 @@ class PassiveAggressiveClassifier(BaseTransformer):
1146
1166
  expected_value=f"Stage area {score_stage_name} successfully created."
1147
1167
  ).validate()
1148
1168
 
1149
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1150
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1169
+ # Use posixpath to construct stage paths
1170
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1171
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1151
1172
  statement_params = telemetry.get_function_usage_statement_params(
1152
1173
  project=_PROJECT,
1153
1174
  subproject=_SUBPROJECT,
@@ -1173,6 +1194,7 @@ class PassiveAggressiveClassifier(BaseTransformer):
1173
1194
  replace=True,
1174
1195
  session=session,
1175
1196
  statement_params=statement_params,
1197
+ anonymous=True
1176
1198
  )
1177
1199
  def score_wrapper_sproc(
1178
1200
  session: Session,
@@ -1180,7 +1202,8 @@ class PassiveAggressiveClassifier(BaseTransformer):
1180
1202
  stage_score_file_name: str,
1181
1203
  input_cols: List[str],
1182
1204
  label_cols: List[str],
1183
- sample_weight_col: Optional[str]
1205
+ sample_weight_col: Optional[str],
1206
+ statement_params: Dict[str, str]
1184
1207
  ) -> float:
1185
1208
  import cloudpickle as cp
1186
1209
  import numpy as np
@@ -1230,14 +1253,14 @@ class PassiveAggressiveClassifier(BaseTransformer):
1230
1253
  api_calls=[Session.call],
1231
1254
  custom_tags=dict([("autogen", True)]),
1232
1255
  )
1233
- score = session.call(
1234
- score_sproc_name,
1256
+ score = score_wrapper_sproc(
1257
+ session,
1235
1258
  query,
1236
1259
  stage_score_file_name,
1237
1260
  identifier.get_unescaped_names(self.input_cols),
1238
1261
  identifier.get_unescaped_names(self.label_cols),
1239
1262
  identifier.get_unescaped_names(self.sample_weight_col),
1240
- statement_params=statement_params,
1263
+ statement_params,
1241
1264
  )
1242
1265
 
1243
1266
  cleanup_temp_files([local_score_file_name])
@@ -1255,18 +1278,20 @@ class PassiveAggressiveClassifier(BaseTransformer):
1255
1278
  if self._sklearn_object._estimator_type == 'classifier':
1256
1279
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1257
1280
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1258
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1281
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1282
+ ([] if self._drop_input_cols else inputs) + outputs)
1259
1283
  # For regressor, the type of predict is float64
1260
1284
  elif self._sklearn_object._estimator_type == 'regressor':
1261
1285
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1262
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1263
-
1286
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1287
+ ([] if self._drop_input_cols else inputs) + outputs)
1264
1288
  for prob_func in PROB_FUNCTIONS:
1265
1289
  if hasattr(self, prob_func):
1266
1290
  output_cols_prefix: str = f"{prob_func}_"
1267
1291
  output_column_names = self._get_output_column_names(output_cols_prefix)
1268
1292
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1269
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1293
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1294
+ ([] if self._drop_input_cols else inputs) + outputs)
1270
1295
 
1271
1296
  @property
1272
1297
  def model_signatures(self) -> Dict[str, ModelSignature]:
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -262,7 +264,6 @@ class PassiveAggressiveRegressor(BaseTransformer):
262
264
  sample_weight_col: Optional[str] = None,
263
265
  ) -> None:
264
266
  super().__init__()
265
- self.id = str(uuid4()).replace("-", "_").upper()
266
267
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
267
268
 
268
269
  self._deps = list(deps)
@@ -295,6 +296,15 @@ class PassiveAggressiveRegressor(BaseTransformer):
295
296
  self.set_drop_input_cols(drop_input_cols)
296
297
  self.set_sample_weight_col(sample_weight_col)
297
298
 
299
+ def _get_rand_id(self) -> str:
300
+ """
301
+ Generate random id to be used in sproc and stage names.
302
+
303
+ Returns:
304
+ Random id string usable in sproc, table, and stage names.
305
+ """
306
+ return str(uuid4()).replace("-", "_").upper()
307
+
298
308
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
299
309
  """
300
310
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -373,7 +383,7 @@ class PassiveAggressiveRegressor(BaseTransformer):
373
383
  cp.dump(self._sklearn_object, local_transform_file)
374
384
 
375
385
  # Create temp stage to run fit.
376
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
386
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
377
387
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
378
388
  SqlResultValidator(
379
389
  session=session,
@@ -386,11 +396,12 @@ class PassiveAggressiveRegressor(BaseTransformer):
386
396
  expected_value=f"Stage area {transform_stage_name} successfully created."
387
397
  ).validate()
388
398
 
389
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
399
+ # Use posixpath to construct stage paths
400
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
401
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
390
402
  local_result_file_name = get_temp_file_path()
391
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
392
403
 
393
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
404
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
394
405
  statement_params = telemetry.get_function_usage_statement_params(
395
406
  project=_PROJECT,
396
407
  subproject=_SUBPROJECT,
@@ -416,6 +427,7 @@ class PassiveAggressiveRegressor(BaseTransformer):
416
427
  replace=True,
417
428
  session=session,
418
429
  statement_params=statement_params,
430
+ anonymous=True
419
431
  )
420
432
  def fit_wrapper_sproc(
421
433
  session: Session,
@@ -424,7 +436,8 @@ class PassiveAggressiveRegressor(BaseTransformer):
424
436
  stage_result_file_name: str,
425
437
  input_cols: List[str],
426
438
  label_cols: List[str],
427
- sample_weight_col: Optional[str]
439
+ sample_weight_col: Optional[str],
440
+ statement_params: Dict[str, str]
428
441
  ) -> str:
429
442
  import cloudpickle as cp
430
443
  import numpy as np
@@ -491,15 +504,15 @@ class PassiveAggressiveRegressor(BaseTransformer):
491
504
  api_calls=[Session.call],
492
505
  custom_tags=dict([("autogen", True)]),
493
506
  )
494
- sproc_export_file_name = session.call(
495
- fit_sproc_name,
507
+ sproc_export_file_name = fit_wrapper_sproc(
508
+ session,
496
509
  query,
497
510
  stage_transform_file_name,
498
511
  stage_result_file_name,
499
512
  identifier.get_unescaped_names(self.input_cols),
500
513
  identifier.get_unescaped_names(self.label_cols),
501
514
  identifier.get_unescaped_names(self.sample_weight_col),
502
- statement_params=statement_params,
515
+ statement_params,
503
516
  )
504
517
 
505
518
  if "|" in sproc_export_file_name:
@@ -509,7 +522,7 @@ class PassiveAggressiveRegressor(BaseTransformer):
509
522
  print("\n".join(fields[1:]))
510
523
 
511
524
  session.file.get(
512
- os.path.join(stage_result_file_name, sproc_export_file_name),
525
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
513
526
  local_result_file_name,
514
527
  statement_params=statement_params
515
528
  )
@@ -555,7 +568,7 @@ class PassiveAggressiveRegressor(BaseTransformer):
555
568
 
556
569
  # Register vectorized UDF for batch inference
557
570
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
558
- safe_id=self.id, method=inference_method)
571
+ safe_id=self._get_rand_id(), method=inference_method)
559
572
 
560
573
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
561
574
  # will try to pickle all of self which fails.
@@ -647,7 +660,7 @@ class PassiveAggressiveRegressor(BaseTransformer):
647
660
  return transformed_pandas_df.to_dict("records")
648
661
 
649
662
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
650
- safe_id=self.id
663
+ safe_id=self._get_rand_id()
651
664
  )
652
665
 
653
666
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -814,11 +827,18 @@ class PassiveAggressiveRegressor(BaseTransformer):
814
827
  Transformed dataset.
815
828
  """
816
829
  if isinstance(dataset, DataFrame):
830
+ expected_type_inferred = "float"
831
+ # when it is classifier, infer the datatype from label columns
832
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
833
+ expected_type_inferred = convert_sp_to_sf_type(
834
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
835
+ )
836
+
817
837
  output_df = self._batch_inference(
818
838
  dataset=dataset,
819
839
  inference_method="predict",
820
840
  expected_output_cols_list=self.output_cols,
821
- expected_output_cols_type="float",
841
+ expected_output_cols_type=expected_type_inferred,
822
842
  )
823
843
  elif isinstance(dataset, pd.DataFrame):
824
844
  output_df = self._sklearn_inference(
@@ -889,10 +909,10 @@ class PassiveAggressiveRegressor(BaseTransformer):
889
909
 
890
910
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
891
911
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
892
- Returns an empty list if current object is not a classifier or not yet fitted.
912
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
893
913
  """
894
914
  if getattr(self._sklearn_object, "classes_", None) is None:
895
- return []
915
+ return [output_cols_prefix]
896
916
 
897
917
  classes = self._sklearn_object.classes_
898
918
  if isinstance(classes, numpy.ndarray):
@@ -1117,7 +1137,7 @@ class PassiveAggressiveRegressor(BaseTransformer):
1117
1137
  cp.dump(self._sklearn_object, local_score_file)
1118
1138
 
1119
1139
  # Create temp stage to run score.
1120
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1140
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1121
1141
  session = dataset._session
1122
1142
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1123
1143
  SqlResultValidator(
@@ -1131,8 +1151,9 @@ class PassiveAggressiveRegressor(BaseTransformer):
1131
1151
  expected_value=f"Stage area {score_stage_name} successfully created."
1132
1152
  ).validate()
1133
1153
 
1134
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1135
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1154
+ # Use posixpath to construct stage paths
1155
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1156
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1136
1157
  statement_params = telemetry.get_function_usage_statement_params(
1137
1158
  project=_PROJECT,
1138
1159
  subproject=_SUBPROJECT,
@@ -1158,6 +1179,7 @@ class PassiveAggressiveRegressor(BaseTransformer):
1158
1179
  replace=True,
1159
1180
  session=session,
1160
1181
  statement_params=statement_params,
1182
+ anonymous=True
1161
1183
  )
1162
1184
  def score_wrapper_sproc(
1163
1185
  session: Session,
@@ -1165,7 +1187,8 @@ class PassiveAggressiveRegressor(BaseTransformer):
1165
1187
  stage_score_file_name: str,
1166
1188
  input_cols: List[str],
1167
1189
  label_cols: List[str],
1168
- sample_weight_col: Optional[str]
1190
+ sample_weight_col: Optional[str],
1191
+ statement_params: Dict[str, str]
1169
1192
  ) -> float:
1170
1193
  import cloudpickle as cp
1171
1194
  import numpy as np
@@ -1215,14 +1238,14 @@ class PassiveAggressiveRegressor(BaseTransformer):
1215
1238
  api_calls=[Session.call],
1216
1239
  custom_tags=dict([("autogen", True)]),
1217
1240
  )
1218
- score = session.call(
1219
- score_sproc_name,
1241
+ score = score_wrapper_sproc(
1242
+ session,
1220
1243
  query,
1221
1244
  stage_score_file_name,
1222
1245
  identifier.get_unescaped_names(self.input_cols),
1223
1246
  identifier.get_unescaped_names(self.label_cols),
1224
1247
  identifier.get_unescaped_names(self.sample_weight_col),
1225
- statement_params=statement_params,
1248
+ statement_params,
1226
1249
  )
1227
1250
 
1228
1251
  cleanup_temp_files([local_score_file_name])
@@ -1240,18 +1263,20 @@ class PassiveAggressiveRegressor(BaseTransformer):
1240
1263
  if self._sklearn_object._estimator_type == 'classifier':
1241
1264
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1242
1265
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1243
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1266
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1267
+ ([] if self._drop_input_cols else inputs) + outputs)
1244
1268
  # For regressor, the type of predict is float64
1245
1269
  elif self._sklearn_object._estimator_type == 'regressor':
1246
1270
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1247
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1248
-
1271
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1272
+ ([] if self._drop_input_cols else inputs) + outputs)
1249
1273
  for prob_func in PROB_FUNCTIONS:
1250
1274
  if hasattr(self, prob_func):
1251
1275
  output_cols_prefix: str = f"{prob_func}_"
1252
1276
  output_column_names = self._get_output_column_names(output_cols_prefix)
1253
1277
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1254
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1278
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1279
+ ([] if self._drop_input_cols else inputs) + outputs)
1255
1280
 
1256
1281
  @property
1257
1282
  def model_signatures(self) -> Dict[str, ModelSignature]: