snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. snowflake/ml/_internal/env_utils.py +2 -1
  2. snowflake/ml/_internal/file_utils.py +29 -7
  3. snowflake/ml/_internal/telemetry.py +5 -8
  4. snowflake/ml/_internal/utils/uri.py +7 -2
  5. snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
  6. snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
  7. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
  8. snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
  9. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
  10. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
  11. snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
  12. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
  13. snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
  14. snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
  15. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
  16. snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
  17. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
  18. snowflake/ml/model/_deployer.py +14 -27
  19. snowflake/ml/model/_env.py +4 -4
  20. snowflake/ml/model/_handlers/custom.py +14 -2
  21. snowflake/ml/model/_handlers/pytorch.py +186 -0
  22. snowflake/ml/model/_handlers/sklearn.py +14 -9
  23. snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
  24. snowflake/ml/model/_handlers/torchscript.py +180 -0
  25. snowflake/ml/model/_handlers/xgboost.py +19 -9
  26. snowflake/ml/model/_model.py +3 -2
  27. snowflake/ml/model/_model_meta.py +12 -7
  28. snowflake/ml/model/model_signature.py +446 -66
  29. snowflake/ml/model/type_hints.py +23 -4
  30. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
  31. snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
  32. snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
  33. snowflake/ml/modeling/cluster/birch.py +51 -26
  34. snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
  35. snowflake/ml/modeling/cluster/dbscan.py +51 -26
  36. snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
  37. snowflake/ml/modeling/cluster/k_means.py +51 -26
  38. snowflake/ml/modeling/cluster/mean_shift.py +51 -26
  39. snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
  40. snowflake/ml/modeling/cluster/optics.py +51 -26
  41. snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
  42. snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
  43. snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
  44. snowflake/ml/modeling/compose/column_transformer.py +51 -26
  45. snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
  46. snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
  47. snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
  48. snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
  49. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
  50. snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
  51. snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
  52. snowflake/ml/modeling/covariance/oas.py +51 -26
  53. snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
  54. snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
  55. snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
  56. snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
  57. snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
  58. snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
  59. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
  60. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
  61. snowflake/ml/modeling/decomposition/pca.py +51 -26
  62. snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
  63. snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
  64. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
  65. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
  66. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
  67. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
  68. snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
  69. snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
  70. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
  71. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
  72. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
  73. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
  74. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
  75. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
  76. snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
  77. snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
  78. snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
  79. snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
  80. snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
  81. snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
  82. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
  83. snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
  84. snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
  85. snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
  86. snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
  87. snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
  88. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
  89. snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
  90. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
  91. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
  92. snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
  93. snowflake/ml/modeling/impute/knn_imputer.py +51 -26
  94. snowflake/ml/modeling/impute/missing_indicator.py +51 -26
  95. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
  96. snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
  97. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
  98. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
  99. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
  100. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
  101. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
  102. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
  103. snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
  104. snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
  105. snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
  106. snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
  107. snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
  108. snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
  109. snowflake/ml/modeling/linear_model/lars.py +51 -26
  110. snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
  111. snowflake/ml/modeling/linear_model/lasso.py +51 -26
  112. snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
  113. snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
  114. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
  115. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
  116. snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
  117. snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
  118. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
  119. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
  120. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
  121. snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
  122. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
  123. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
  124. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
  125. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
  126. snowflake/ml/modeling/linear_model/perceptron.py +51 -26
  127. snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
  128. snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
  129. snowflake/ml/modeling/linear_model/ridge.py +51 -26
  130. snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
  131. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
  132. snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
  133. snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
  134. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
  135. snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
  136. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
  137. snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
  138. snowflake/ml/modeling/manifold/isomap.py +51 -26
  139. snowflake/ml/modeling/manifold/mds.py +51 -26
  140. snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
  141. snowflake/ml/modeling/manifold/tsne.py +51 -26
  142. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
  143. snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
  144. snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
  145. snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
  146. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
  147. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
  148. snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
  149. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
  150. snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
  151. snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
  152. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
  153. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
  154. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
  155. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
  156. snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
  157. snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
  158. snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
  159. snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
  160. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
  161. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
  162. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
  163. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
  164. snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
  165. snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
  166. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
  167. snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
  168. snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
  169. snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
  170. snowflake/ml/modeling/svm/linear_svc.py +51 -26
  171. snowflake/ml/modeling/svm/linear_svr.py +51 -26
  172. snowflake/ml/modeling/svm/nu_svc.py +51 -26
  173. snowflake/ml/modeling/svm/nu_svr.py +51 -26
  174. snowflake/ml/modeling/svm/svc.py +51 -26
  175. snowflake/ml/modeling/svm/svr.py +51 -26
  176. snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
  177. snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
  178. snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
  179. snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
  180. snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
  181. snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
  182. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
  183. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
  184. snowflake/ml/registry/model_registry.py +74 -56
  185. snowflake/ml/version.py +1 -1
  186. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
  187. snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
  188. snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
  189. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -273,7 +275,6 @@ class Perceptron(BaseTransformer):
273
275
  sample_weight_col: Optional[str] = None,
274
276
  ) -> None:
275
277
  super().__init__()
276
- self.id = str(uuid4()).replace("-", "_").upper()
277
278
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
278
279
 
279
280
  self._deps = list(deps)
@@ -308,6 +309,15 @@ class Perceptron(BaseTransformer):
308
309
  self.set_drop_input_cols(drop_input_cols)
309
310
  self.set_sample_weight_col(sample_weight_col)
310
311
 
312
+ def _get_rand_id(self) -> str:
313
+ """
314
+ Generate random id to be used in sproc and stage names.
315
+
316
+ Returns:
317
+ Random id string usable in sproc, table, and stage names.
318
+ """
319
+ return str(uuid4()).replace("-", "_").upper()
320
+
311
321
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
312
322
  """
313
323
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -386,7 +396,7 @@ class Perceptron(BaseTransformer):
386
396
  cp.dump(self._sklearn_object, local_transform_file)
387
397
 
388
398
  # Create temp stage to run fit.
389
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
399
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
390
400
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
391
401
  SqlResultValidator(
392
402
  session=session,
@@ -399,11 +409,12 @@ class Perceptron(BaseTransformer):
399
409
  expected_value=f"Stage area {transform_stage_name} successfully created."
400
410
  ).validate()
401
411
 
402
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
412
+ # Use posixpath to construct stage paths
413
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
414
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
403
415
  local_result_file_name = get_temp_file_path()
404
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
405
416
 
406
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
417
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
407
418
  statement_params = telemetry.get_function_usage_statement_params(
408
419
  project=_PROJECT,
409
420
  subproject=_SUBPROJECT,
@@ -429,6 +440,7 @@ class Perceptron(BaseTransformer):
429
440
  replace=True,
430
441
  session=session,
431
442
  statement_params=statement_params,
443
+ anonymous=True
432
444
  )
433
445
  def fit_wrapper_sproc(
434
446
  session: Session,
@@ -437,7 +449,8 @@ class Perceptron(BaseTransformer):
437
449
  stage_result_file_name: str,
438
450
  input_cols: List[str],
439
451
  label_cols: List[str],
440
- sample_weight_col: Optional[str]
452
+ sample_weight_col: Optional[str],
453
+ statement_params: Dict[str, str]
441
454
  ) -> str:
442
455
  import cloudpickle as cp
443
456
  import numpy as np
@@ -504,15 +517,15 @@ class Perceptron(BaseTransformer):
504
517
  api_calls=[Session.call],
505
518
  custom_tags=dict([("autogen", True)]),
506
519
  )
507
- sproc_export_file_name = session.call(
508
- fit_sproc_name,
520
+ sproc_export_file_name = fit_wrapper_sproc(
521
+ session,
509
522
  query,
510
523
  stage_transform_file_name,
511
524
  stage_result_file_name,
512
525
  identifier.get_unescaped_names(self.input_cols),
513
526
  identifier.get_unescaped_names(self.label_cols),
514
527
  identifier.get_unescaped_names(self.sample_weight_col),
515
- statement_params=statement_params,
528
+ statement_params,
516
529
  )
517
530
 
518
531
  if "|" in sproc_export_file_name:
@@ -522,7 +535,7 @@ class Perceptron(BaseTransformer):
522
535
  print("\n".join(fields[1:]))
523
536
 
524
537
  session.file.get(
525
- os.path.join(stage_result_file_name, sproc_export_file_name),
538
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
526
539
  local_result_file_name,
527
540
  statement_params=statement_params
528
541
  )
@@ -568,7 +581,7 @@ class Perceptron(BaseTransformer):
568
581
 
569
582
  # Register vectorized UDF for batch inference
570
583
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
571
- safe_id=self.id, method=inference_method)
584
+ safe_id=self._get_rand_id(), method=inference_method)
572
585
 
573
586
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
574
587
  # will try to pickle all of self which fails.
@@ -660,7 +673,7 @@ class Perceptron(BaseTransformer):
660
673
  return transformed_pandas_df.to_dict("records")
661
674
 
662
675
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
663
- safe_id=self.id
676
+ safe_id=self._get_rand_id()
664
677
  )
665
678
 
666
679
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -827,11 +840,18 @@ class Perceptron(BaseTransformer):
827
840
  Transformed dataset.
828
841
  """
829
842
  if isinstance(dataset, DataFrame):
843
+ expected_type_inferred = ""
844
+ # when it is classifier, infer the datatype from label columns
845
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
846
+ expected_type_inferred = convert_sp_to_sf_type(
847
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
848
+ )
849
+
830
850
  output_df = self._batch_inference(
831
851
  dataset=dataset,
832
852
  inference_method="predict",
833
853
  expected_output_cols_list=self.output_cols,
834
- expected_output_cols_type="",
854
+ expected_output_cols_type=expected_type_inferred,
835
855
  )
836
856
  elif isinstance(dataset, pd.DataFrame):
837
857
  output_df = self._sklearn_inference(
@@ -902,10 +922,10 @@ class Perceptron(BaseTransformer):
902
922
 
903
923
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
904
924
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
905
- Returns an empty list if current object is not a classifier or not yet fitted.
925
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
906
926
  """
907
927
  if getattr(self._sklearn_object, "classes_", None) is None:
908
- return []
928
+ return [output_cols_prefix]
909
929
 
910
930
  classes = self._sklearn_object.classes_
911
931
  if isinstance(classes, numpy.ndarray):
@@ -1132,7 +1152,7 @@ class Perceptron(BaseTransformer):
1132
1152
  cp.dump(self._sklearn_object, local_score_file)
1133
1153
 
1134
1154
  # Create temp stage to run score.
1135
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1155
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1136
1156
  session = dataset._session
1137
1157
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1138
1158
  SqlResultValidator(
@@ -1146,8 +1166,9 @@ class Perceptron(BaseTransformer):
1146
1166
  expected_value=f"Stage area {score_stage_name} successfully created."
1147
1167
  ).validate()
1148
1168
 
1149
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1150
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1169
+ # Use posixpath to construct stage paths
1170
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1171
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1151
1172
  statement_params = telemetry.get_function_usage_statement_params(
1152
1173
  project=_PROJECT,
1153
1174
  subproject=_SUBPROJECT,
@@ -1173,6 +1194,7 @@ class Perceptron(BaseTransformer):
1173
1194
  replace=True,
1174
1195
  session=session,
1175
1196
  statement_params=statement_params,
1197
+ anonymous=True
1176
1198
  )
1177
1199
  def score_wrapper_sproc(
1178
1200
  session: Session,
@@ -1180,7 +1202,8 @@ class Perceptron(BaseTransformer):
1180
1202
  stage_score_file_name: str,
1181
1203
  input_cols: List[str],
1182
1204
  label_cols: List[str],
1183
- sample_weight_col: Optional[str]
1205
+ sample_weight_col: Optional[str],
1206
+ statement_params: Dict[str, str]
1184
1207
  ) -> float:
1185
1208
  import cloudpickle as cp
1186
1209
  import numpy as np
@@ -1230,14 +1253,14 @@ class Perceptron(BaseTransformer):
1230
1253
  api_calls=[Session.call],
1231
1254
  custom_tags=dict([("autogen", True)]),
1232
1255
  )
1233
- score = session.call(
1234
- score_sproc_name,
1256
+ score = score_wrapper_sproc(
1257
+ session,
1235
1258
  query,
1236
1259
  stage_score_file_name,
1237
1260
  identifier.get_unescaped_names(self.input_cols),
1238
1261
  identifier.get_unescaped_names(self.label_cols),
1239
1262
  identifier.get_unescaped_names(self.sample_weight_col),
1240
- statement_params=statement_params,
1263
+ statement_params,
1241
1264
  )
1242
1265
 
1243
1266
  cleanup_temp_files([local_score_file_name])
@@ -1255,18 +1278,20 @@ class Perceptron(BaseTransformer):
1255
1278
  if self._sklearn_object._estimator_type == 'classifier':
1256
1279
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1257
1280
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1258
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1281
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1282
+ ([] if self._drop_input_cols else inputs) + outputs)
1259
1283
  # For regressor, the type of predict is float64
1260
1284
  elif self._sklearn_object._estimator_type == 'regressor':
1261
1285
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1262
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1263
-
1286
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1287
+ ([] if self._drop_input_cols else inputs) + outputs)
1264
1288
  for prob_func in PROB_FUNCTIONS:
1265
1289
  if hasattr(self, prob_func):
1266
1290
  output_cols_prefix: str = f"{prob_func}_"
1267
1291
  output_column_names = self._get_output_column_names(output_cols_prefix)
1268
1292
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1269
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1293
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1294
+ ([] if self._drop_input_cols else inputs) + outputs)
1270
1295
 
1271
1296
  @property
1272
1297
  def model_signatures(self) -> Dict[str, ModelSignature]:
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -230,7 +232,6 @@ class PoissonRegressor(BaseTransformer):
230
232
  sample_weight_col: Optional[str] = None,
231
233
  ) -> None:
232
234
  super().__init__()
233
- self.id = str(uuid4()).replace("-", "_").upper()
234
235
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
235
236
 
236
237
  self._deps = list(deps)
@@ -256,6 +257,15 @@ class PoissonRegressor(BaseTransformer):
256
257
  self.set_drop_input_cols(drop_input_cols)
257
258
  self.set_sample_weight_col(sample_weight_col)
258
259
 
260
+ def _get_rand_id(self) -> str:
261
+ """
262
+ Generate random id to be used in sproc and stage names.
263
+
264
+ Returns:
265
+ Random id string usable in sproc, table, and stage names.
266
+ """
267
+ return str(uuid4()).replace("-", "_").upper()
268
+
259
269
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
260
270
  """
261
271
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -334,7 +344,7 @@ class PoissonRegressor(BaseTransformer):
334
344
  cp.dump(self._sklearn_object, local_transform_file)
335
345
 
336
346
  # Create temp stage to run fit.
337
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
347
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
338
348
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
339
349
  SqlResultValidator(
340
350
  session=session,
@@ -347,11 +357,12 @@ class PoissonRegressor(BaseTransformer):
347
357
  expected_value=f"Stage area {transform_stage_name} successfully created."
348
358
  ).validate()
349
359
 
350
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
360
+ # Use posixpath to construct stage paths
361
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
362
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
351
363
  local_result_file_name = get_temp_file_path()
352
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
353
364
 
354
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
365
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
355
366
  statement_params = telemetry.get_function_usage_statement_params(
356
367
  project=_PROJECT,
357
368
  subproject=_SUBPROJECT,
@@ -377,6 +388,7 @@ class PoissonRegressor(BaseTransformer):
377
388
  replace=True,
378
389
  session=session,
379
390
  statement_params=statement_params,
391
+ anonymous=True
380
392
  )
381
393
  def fit_wrapper_sproc(
382
394
  session: Session,
@@ -385,7 +397,8 @@ class PoissonRegressor(BaseTransformer):
385
397
  stage_result_file_name: str,
386
398
  input_cols: List[str],
387
399
  label_cols: List[str],
388
- sample_weight_col: Optional[str]
400
+ sample_weight_col: Optional[str],
401
+ statement_params: Dict[str, str]
389
402
  ) -> str:
390
403
  import cloudpickle as cp
391
404
  import numpy as np
@@ -452,15 +465,15 @@ class PoissonRegressor(BaseTransformer):
452
465
  api_calls=[Session.call],
453
466
  custom_tags=dict([("autogen", True)]),
454
467
  )
455
- sproc_export_file_name = session.call(
456
- fit_sproc_name,
468
+ sproc_export_file_name = fit_wrapper_sproc(
469
+ session,
457
470
  query,
458
471
  stage_transform_file_name,
459
472
  stage_result_file_name,
460
473
  identifier.get_unescaped_names(self.input_cols),
461
474
  identifier.get_unescaped_names(self.label_cols),
462
475
  identifier.get_unescaped_names(self.sample_weight_col),
463
- statement_params=statement_params,
476
+ statement_params,
464
477
  )
465
478
 
466
479
  if "|" in sproc_export_file_name:
@@ -470,7 +483,7 @@ class PoissonRegressor(BaseTransformer):
470
483
  print("\n".join(fields[1:]))
471
484
 
472
485
  session.file.get(
473
- os.path.join(stage_result_file_name, sproc_export_file_name),
486
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
474
487
  local_result_file_name,
475
488
  statement_params=statement_params
476
489
  )
@@ -516,7 +529,7 @@ class PoissonRegressor(BaseTransformer):
516
529
 
517
530
  # Register vectorized UDF for batch inference
518
531
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
519
- safe_id=self.id, method=inference_method)
532
+ safe_id=self._get_rand_id(), method=inference_method)
520
533
 
521
534
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
522
535
  # will try to pickle all of self which fails.
@@ -608,7 +621,7 @@ class PoissonRegressor(BaseTransformer):
608
621
  return transformed_pandas_df.to_dict("records")
609
622
 
610
623
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
611
- safe_id=self.id
624
+ safe_id=self._get_rand_id()
612
625
  )
613
626
 
614
627
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -775,11 +788,18 @@ class PoissonRegressor(BaseTransformer):
775
788
  Transformed dataset.
776
789
  """
777
790
  if isinstance(dataset, DataFrame):
791
+ expected_type_inferred = "float"
792
+ # when it is classifier, infer the datatype from label columns
793
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
794
+ expected_type_inferred = convert_sp_to_sf_type(
795
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
796
+ )
797
+
778
798
  output_df = self._batch_inference(
779
799
  dataset=dataset,
780
800
  inference_method="predict",
781
801
  expected_output_cols_list=self.output_cols,
782
- expected_output_cols_type="float",
802
+ expected_output_cols_type=expected_type_inferred,
783
803
  )
784
804
  elif isinstance(dataset, pd.DataFrame):
785
805
  output_df = self._sklearn_inference(
@@ -850,10 +870,10 @@ class PoissonRegressor(BaseTransformer):
850
870
 
851
871
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
852
872
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
853
- Returns an empty list if current object is not a classifier or not yet fitted.
873
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
854
874
  """
855
875
  if getattr(self._sklearn_object, "classes_", None) is None:
856
- return []
876
+ return [output_cols_prefix]
857
877
 
858
878
  classes = self._sklearn_object.classes_
859
879
  if isinstance(classes, numpy.ndarray):
@@ -1078,7 +1098,7 @@ class PoissonRegressor(BaseTransformer):
1078
1098
  cp.dump(self._sklearn_object, local_score_file)
1079
1099
 
1080
1100
  # Create temp stage to run score.
1081
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1101
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1082
1102
  session = dataset._session
1083
1103
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1084
1104
  SqlResultValidator(
@@ -1092,8 +1112,9 @@ class PoissonRegressor(BaseTransformer):
1092
1112
  expected_value=f"Stage area {score_stage_name} successfully created."
1093
1113
  ).validate()
1094
1114
 
1095
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1096
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1115
+ # Use posixpath to construct stage paths
1116
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1117
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1097
1118
  statement_params = telemetry.get_function_usage_statement_params(
1098
1119
  project=_PROJECT,
1099
1120
  subproject=_SUBPROJECT,
@@ -1119,6 +1140,7 @@ class PoissonRegressor(BaseTransformer):
1119
1140
  replace=True,
1120
1141
  session=session,
1121
1142
  statement_params=statement_params,
1143
+ anonymous=True
1122
1144
  )
1123
1145
  def score_wrapper_sproc(
1124
1146
  session: Session,
@@ -1126,7 +1148,8 @@ class PoissonRegressor(BaseTransformer):
1126
1148
  stage_score_file_name: str,
1127
1149
  input_cols: List[str],
1128
1150
  label_cols: List[str],
1129
- sample_weight_col: Optional[str]
1151
+ sample_weight_col: Optional[str],
1152
+ statement_params: Dict[str, str]
1130
1153
  ) -> float:
1131
1154
  import cloudpickle as cp
1132
1155
  import numpy as np
@@ -1176,14 +1199,14 @@ class PoissonRegressor(BaseTransformer):
1176
1199
  api_calls=[Session.call],
1177
1200
  custom_tags=dict([("autogen", True)]),
1178
1201
  )
1179
- score = session.call(
1180
- score_sproc_name,
1202
+ score = score_wrapper_sproc(
1203
+ session,
1181
1204
  query,
1182
1205
  stage_score_file_name,
1183
1206
  identifier.get_unescaped_names(self.input_cols),
1184
1207
  identifier.get_unescaped_names(self.label_cols),
1185
1208
  identifier.get_unescaped_names(self.sample_weight_col),
1186
- statement_params=statement_params,
1209
+ statement_params,
1187
1210
  )
1188
1211
 
1189
1212
  cleanup_temp_files([local_score_file_name])
@@ -1201,18 +1224,20 @@ class PoissonRegressor(BaseTransformer):
1201
1224
  if self._sklearn_object._estimator_type == 'classifier':
1202
1225
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1203
1226
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1204
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1227
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1228
+ ([] if self._drop_input_cols else inputs) + outputs)
1205
1229
  # For regressor, the type of predict is float64
1206
1230
  elif self._sklearn_object._estimator_type == 'regressor':
1207
1231
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1208
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1209
-
1232
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1233
+ ([] if self._drop_input_cols else inputs) + outputs)
1210
1234
  for prob_func in PROB_FUNCTIONS:
1211
1235
  if hasattr(self, prob_func):
1212
1236
  output_cols_prefix: str = f"{prob_func}_"
1213
1237
  output_column_names = self._get_output_column_names(output_cols_prefix)
1214
1238
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1215
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1239
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1240
+ ([] if self._drop_input_cols else inputs) + outputs)
1216
1241
 
1217
1242
  @property
1218
1243
  def model_signatures(self) -> Dict[str, ModelSignature]: