snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. snowflake/ml/_internal/env_utils.py +2 -1
  2. snowflake/ml/_internal/file_utils.py +29 -7
  3. snowflake/ml/_internal/telemetry.py +5 -8
  4. snowflake/ml/_internal/utils/uri.py +7 -2
  5. snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
  6. snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
  7. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
  8. snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
  9. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
  10. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
  11. snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
  12. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
  13. snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
  14. snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
  15. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
  16. snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
  17. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
  18. snowflake/ml/model/_deployer.py +14 -27
  19. snowflake/ml/model/_env.py +4 -4
  20. snowflake/ml/model/_handlers/custom.py +14 -2
  21. snowflake/ml/model/_handlers/pytorch.py +186 -0
  22. snowflake/ml/model/_handlers/sklearn.py +14 -9
  23. snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
  24. snowflake/ml/model/_handlers/torchscript.py +180 -0
  25. snowflake/ml/model/_handlers/xgboost.py +19 -9
  26. snowflake/ml/model/_model.py +3 -2
  27. snowflake/ml/model/_model_meta.py +12 -7
  28. snowflake/ml/model/model_signature.py +446 -66
  29. snowflake/ml/model/type_hints.py +23 -4
  30. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
  31. snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
  32. snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
  33. snowflake/ml/modeling/cluster/birch.py +51 -26
  34. snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
  35. snowflake/ml/modeling/cluster/dbscan.py +51 -26
  36. snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
  37. snowflake/ml/modeling/cluster/k_means.py +51 -26
  38. snowflake/ml/modeling/cluster/mean_shift.py +51 -26
  39. snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
  40. snowflake/ml/modeling/cluster/optics.py +51 -26
  41. snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
  42. snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
  43. snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
  44. snowflake/ml/modeling/compose/column_transformer.py +51 -26
  45. snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
  46. snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
  47. snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
  48. snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
  49. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
  50. snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
  51. snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
  52. snowflake/ml/modeling/covariance/oas.py +51 -26
  53. snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
  54. snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
  55. snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
  56. snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
  57. snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
  58. snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
  59. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
  60. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
  61. snowflake/ml/modeling/decomposition/pca.py +51 -26
  62. snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
  63. snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
  64. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
  65. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
  66. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
  67. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
  68. snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
  69. snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
  70. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
  71. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
  72. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
  73. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
  74. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
  75. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
  76. snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
  77. snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
  78. snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
  79. snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
  80. snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
  81. snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
  82. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
  83. snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
  84. snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
  85. snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
  86. snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
  87. snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
  88. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
  89. snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
  90. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
  91. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
  92. snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
  93. snowflake/ml/modeling/impute/knn_imputer.py +51 -26
  94. snowflake/ml/modeling/impute/missing_indicator.py +51 -26
  95. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
  96. snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
  97. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
  98. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
  99. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
  100. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
  101. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
  102. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
  103. snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
  104. snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
  105. snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
  106. snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
  107. snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
  108. snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
  109. snowflake/ml/modeling/linear_model/lars.py +51 -26
  110. snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
  111. snowflake/ml/modeling/linear_model/lasso.py +51 -26
  112. snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
  113. snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
  114. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
  115. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
  116. snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
  117. snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
  118. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
  119. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
  120. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
  121. snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
  122. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
  123. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
  124. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
  125. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
  126. snowflake/ml/modeling/linear_model/perceptron.py +51 -26
  127. snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
  128. snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
  129. snowflake/ml/modeling/linear_model/ridge.py +51 -26
  130. snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
  131. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
  132. snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
  133. snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
  134. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
  135. snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
  136. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
  137. snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
  138. snowflake/ml/modeling/manifold/isomap.py +51 -26
  139. snowflake/ml/modeling/manifold/mds.py +51 -26
  140. snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
  141. snowflake/ml/modeling/manifold/tsne.py +51 -26
  142. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
  143. snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
  144. snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
  145. snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
  146. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
  147. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
  148. snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
  149. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
  150. snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
  151. snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
  152. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
  153. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
  154. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
  155. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
  156. snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
  157. snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
  158. snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
  159. snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
  160. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
  161. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
  162. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
  163. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
  164. snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
  165. snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
  166. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
  167. snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
  168. snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
  169. snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
  170. snowflake/ml/modeling/svm/linear_svc.py +51 -26
  171. snowflake/ml/modeling/svm/linear_svr.py +51 -26
  172. snowflake/ml/modeling/svm/nu_svc.py +51 -26
  173. snowflake/ml/modeling/svm/nu_svr.py +51 -26
  174. snowflake/ml/modeling/svm/svc.py +51 -26
  175. snowflake/ml/modeling/svm/svr.py +51 -26
  176. snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
  177. snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
  178. snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
  179. snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
  180. snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
  181. snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
  182. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
  183. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
  184. snowflake/ml/registry/model_registry.py +74 -56
  185. snowflake/ml/version.py +1 -1
  186. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
  187. snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
  188. snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
  189. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -9,6 +9,8 @@ if TYPE_CHECKING:
9
9
  import pandas as pd
10
10
  import sklearn.base
11
11
  import sklearn.pipeline
12
+ import tensorflow
13
+ import torch
12
14
  import xgboost
13
15
 
14
16
  import snowflake.ml.model.custom_model
@@ -34,9 +36,10 @@ _SupportedNumpyDtype = Union[
34
36
  ]
35
37
  _SupportedNumpyArray = npt.NDArray[_SupportedNumpyDtype]
36
38
  _SupportedBuiltinsList = Sequence[_SupportedBuiltins]
39
+ _SupportedArrayLike = Union[_SupportedNumpyArray, "torch.Tensor", "tensorflow.Tensor", "tensorflow.Variable"]
37
40
 
38
41
  SupportedLocalDataType = Union[
39
- "pd.DataFrame", _SupportedNumpyArray, Sequence[_SupportedNumpyArray], _SupportedBuiltinsList
42
+ "pd.DataFrame", _SupportedNumpyArray, Sequence[_SupportedArrayLike], _SupportedBuiltinsList
40
43
  ]
41
44
 
42
45
  SupportedDataType = Union[SupportedLocalDataType, "snowflake.snowpark.DataFrame"]
@@ -51,6 +54,8 @@ SupportedLocalModelType = Union[
51
54
  "sklearn.pipeline.Pipeline",
52
55
  "xgboost.XGBModel",
53
56
  "xgboost.Booster",
57
+ "torch.nn.Module",
58
+ "torch.jit.ScriptModule", # type:ignore[name-defined]
54
59
  ]
55
60
 
56
61
  SupportedSnowMLModelType: TypeAlias = "base.BaseEstimator"
@@ -70,6 +75,8 @@ Here is all acceptable types of Snowflake native model packaging and its handler
70
75
  | xgboost.XGBModel | xgboost.py | _XGBModelHandler |
71
76
  | xgboost.Booster | xgboost.py | _XGBModelHandler |
72
77
  | snowflake.ml.framework.base.BaseEstimator | snowmlmodel.py | _SnowMLModelHandler |
78
+ | torch.nn.Module | pytroch.py | _PyTorchHandler |
79
+ | torch.jit.ScriptModule | torchscript.py | _TorchScripthHandler |
73
80
  """
74
81
 
75
82
 
@@ -79,19 +86,23 @@ _ModelType = TypeVar("_ModelType", bound=SupportedModelType)
79
86
  class DeployOptions(TypedDict):
80
87
  """Common Options for deploying to Snowflake.
81
88
 
82
- output_with_input_features: Whether or not preserve the input columns in the output when predicting.
83
- Defaults to False.
89
+ disable_local_conda_resolver: Set to disable use local conda resolver to do pre-check on environment and rely on
90
+ the information schema only. Defaults to False.
84
91
  keep_order: Whether or not preserve the row order when predicting. Only available for dataframe has fewer than 2**64
85
92
  rows. Defaults to True.
93
+ output_with_input_features: Whether or not preserve the input columns in the output when predicting.
94
+ Defaults to False.
86
95
  """
87
96
 
88
- output_with_input_features: NotRequired[bool]
97
+ disable_local_conda_resolver: NotRequired[bool]
89
98
  keep_order: NotRequired[bool]
99
+ output_with_input_features: NotRequired[bool]
90
100
 
91
101
 
92
102
  class WarehouseDeployOptions(DeployOptions):
93
103
  """Options for deploying to the Snowflake Warehouse.
94
104
 
105
+
95
106
  permanent_udf_stage_location: A Snowflake stage option where the UDF should be persisted. If specified, the model
96
107
  will be deployed as a permanent UDF, otherwise temporary.
97
108
  relax_version: Whether or not relax the version constraints of the dependencies if unresolvable. Defaults to False.
@@ -130,3 +141,11 @@ class XGBModelSaveOptions(ModelSaveOption):
130
141
 
131
142
  class SNOWModelSaveOptions(ModelSaveOption):
132
143
  target_methods: NotRequired[Sequence[str]]
144
+
145
+
146
+ class PyTorchSaveOptions(ModelSaveOption):
147
+ target_methods: NotRequired[Sequence[str]]
148
+
149
+
150
+ class TorchScriptSaveOptions(ModelSaveOption):
151
+ target_methods: NotRequired[Sequence[str]]
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -247,7 +249,6 @@ class CalibratedClassifierCV(BaseTransformer):
247
249
  sample_weight_col: Optional[str] = None,
248
250
  ) -> None:
249
251
  super().__init__()
250
- self.id = str(uuid4()).replace("-", "_").upper()
251
252
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
252
253
  deps = deps | _gather_dependencies(estimator)
253
254
  deps = deps | _gather_dependencies(base_estimator)
@@ -274,6 +275,15 @@ class CalibratedClassifierCV(BaseTransformer):
274
275
  self.set_drop_input_cols(drop_input_cols)
275
276
  self.set_sample_weight_col(sample_weight_col)
276
277
 
278
+ def _get_rand_id(self) -> str:
279
+ """
280
+ Generate random id to be used in sproc and stage names.
281
+
282
+ Returns:
283
+ Random id string usable in sproc, table, and stage names.
284
+ """
285
+ return str(uuid4()).replace("-", "_").upper()
286
+
277
287
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
278
288
  """
279
289
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -352,7 +362,7 @@ class CalibratedClassifierCV(BaseTransformer):
352
362
  cp.dump(self._sklearn_object, local_transform_file)
353
363
 
354
364
  # Create temp stage to run fit.
355
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
365
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
356
366
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
357
367
  SqlResultValidator(
358
368
  session=session,
@@ -365,11 +375,12 @@ class CalibratedClassifierCV(BaseTransformer):
365
375
  expected_value=f"Stage area {transform_stage_name} successfully created."
366
376
  ).validate()
367
377
 
368
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
378
+ # Use posixpath to construct stage paths
379
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
380
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
369
381
  local_result_file_name = get_temp_file_path()
370
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
371
382
 
372
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
383
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
373
384
  statement_params = telemetry.get_function_usage_statement_params(
374
385
  project=_PROJECT,
375
386
  subproject=_SUBPROJECT,
@@ -395,6 +406,7 @@ class CalibratedClassifierCV(BaseTransformer):
395
406
  replace=True,
396
407
  session=session,
397
408
  statement_params=statement_params,
409
+ anonymous=True
398
410
  )
399
411
  def fit_wrapper_sproc(
400
412
  session: Session,
@@ -403,7 +415,8 @@ class CalibratedClassifierCV(BaseTransformer):
403
415
  stage_result_file_name: str,
404
416
  input_cols: List[str],
405
417
  label_cols: List[str],
406
- sample_weight_col: Optional[str]
418
+ sample_weight_col: Optional[str],
419
+ statement_params: Dict[str, str]
407
420
  ) -> str:
408
421
  import cloudpickle as cp
409
422
  import numpy as np
@@ -470,15 +483,15 @@ class CalibratedClassifierCV(BaseTransformer):
470
483
  api_calls=[Session.call],
471
484
  custom_tags=dict([("autogen", True)]),
472
485
  )
473
- sproc_export_file_name = session.call(
474
- fit_sproc_name,
486
+ sproc_export_file_name = fit_wrapper_sproc(
487
+ session,
475
488
  query,
476
489
  stage_transform_file_name,
477
490
  stage_result_file_name,
478
491
  identifier.get_unescaped_names(self.input_cols),
479
492
  identifier.get_unescaped_names(self.label_cols),
480
493
  identifier.get_unescaped_names(self.sample_weight_col),
481
- statement_params=statement_params,
494
+ statement_params,
482
495
  )
483
496
 
484
497
  if "|" in sproc_export_file_name:
@@ -488,7 +501,7 @@ class CalibratedClassifierCV(BaseTransformer):
488
501
  print("\n".join(fields[1:]))
489
502
 
490
503
  session.file.get(
491
- os.path.join(stage_result_file_name, sproc_export_file_name),
504
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
492
505
  local_result_file_name,
493
506
  statement_params=statement_params
494
507
  )
@@ -534,7 +547,7 @@ class CalibratedClassifierCV(BaseTransformer):
534
547
 
535
548
  # Register vectorized UDF for batch inference
536
549
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
537
- safe_id=self.id, method=inference_method)
550
+ safe_id=self._get_rand_id(), method=inference_method)
538
551
 
539
552
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
540
553
  # will try to pickle all of self which fails.
@@ -626,7 +639,7 @@ class CalibratedClassifierCV(BaseTransformer):
626
639
  return transformed_pandas_df.to_dict("records")
627
640
 
628
641
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
629
- safe_id=self.id
642
+ safe_id=self._get_rand_id()
630
643
  )
631
644
 
632
645
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -793,11 +806,18 @@ class CalibratedClassifierCV(BaseTransformer):
793
806
  Transformed dataset.
794
807
  """
795
808
  if isinstance(dataset, DataFrame):
809
+ expected_type_inferred = ""
810
+ # when it is classifier, infer the datatype from label columns
811
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
812
+ expected_type_inferred = convert_sp_to_sf_type(
813
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
814
+ )
815
+
796
816
  output_df = self._batch_inference(
797
817
  dataset=dataset,
798
818
  inference_method="predict",
799
819
  expected_output_cols_list=self.output_cols,
800
- expected_output_cols_type="",
820
+ expected_output_cols_type=expected_type_inferred,
801
821
  )
802
822
  elif isinstance(dataset, pd.DataFrame):
803
823
  output_df = self._sklearn_inference(
@@ -868,10 +888,10 @@ class CalibratedClassifierCV(BaseTransformer):
868
888
 
869
889
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
870
890
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
871
- Returns an empty list if current object is not a classifier or not yet fitted.
891
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
872
892
  """
873
893
  if getattr(self._sklearn_object, "classes_", None) is None:
874
- return []
894
+ return [output_cols_prefix]
875
895
 
876
896
  classes = self._sklearn_object.classes_
877
897
  if isinstance(classes, numpy.ndarray):
@@ -1100,7 +1120,7 @@ class CalibratedClassifierCV(BaseTransformer):
1100
1120
  cp.dump(self._sklearn_object, local_score_file)
1101
1121
 
1102
1122
  # Create temp stage to run score.
1103
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1123
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1104
1124
  session = dataset._session
1105
1125
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1106
1126
  SqlResultValidator(
@@ -1114,8 +1134,9 @@ class CalibratedClassifierCV(BaseTransformer):
1114
1134
  expected_value=f"Stage area {score_stage_name} successfully created."
1115
1135
  ).validate()
1116
1136
 
1117
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1118
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1137
+ # Use posixpath to construct stage paths
1138
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1139
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1119
1140
  statement_params = telemetry.get_function_usage_statement_params(
1120
1141
  project=_PROJECT,
1121
1142
  subproject=_SUBPROJECT,
@@ -1141,6 +1162,7 @@ class CalibratedClassifierCV(BaseTransformer):
1141
1162
  replace=True,
1142
1163
  session=session,
1143
1164
  statement_params=statement_params,
1165
+ anonymous=True
1144
1166
  )
1145
1167
  def score_wrapper_sproc(
1146
1168
  session: Session,
@@ -1148,7 +1170,8 @@ class CalibratedClassifierCV(BaseTransformer):
1148
1170
  stage_score_file_name: str,
1149
1171
  input_cols: List[str],
1150
1172
  label_cols: List[str],
1151
- sample_weight_col: Optional[str]
1173
+ sample_weight_col: Optional[str],
1174
+ statement_params: Dict[str, str]
1152
1175
  ) -> float:
1153
1176
  import cloudpickle as cp
1154
1177
  import numpy as np
@@ -1198,14 +1221,14 @@ class CalibratedClassifierCV(BaseTransformer):
1198
1221
  api_calls=[Session.call],
1199
1222
  custom_tags=dict([("autogen", True)]),
1200
1223
  )
1201
- score = session.call(
1202
- score_sproc_name,
1224
+ score = score_wrapper_sproc(
1225
+ session,
1203
1226
  query,
1204
1227
  stage_score_file_name,
1205
1228
  identifier.get_unescaped_names(self.input_cols),
1206
1229
  identifier.get_unescaped_names(self.label_cols),
1207
1230
  identifier.get_unescaped_names(self.sample_weight_col),
1208
- statement_params=statement_params,
1231
+ statement_params,
1209
1232
  )
1210
1233
 
1211
1234
  cleanup_temp_files([local_score_file_name])
@@ -1223,18 +1246,20 @@ class CalibratedClassifierCV(BaseTransformer):
1223
1246
  if self._sklearn_object._estimator_type == 'classifier':
1224
1247
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1225
1248
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1226
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1249
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1250
+ ([] if self._drop_input_cols else inputs) + outputs)
1227
1251
  # For regressor, the type of predict is float64
1228
1252
  elif self._sklearn_object._estimator_type == 'regressor':
1229
1253
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1230
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1231
-
1254
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1255
+ ([] if self._drop_input_cols else inputs) + outputs)
1232
1256
  for prob_func in PROB_FUNCTIONS:
1233
1257
  if hasattr(self, prob_func):
1234
1258
  output_cols_prefix: str = f"{prob_func}_"
1235
1259
  output_column_names = self._get_output_column_names(output_cols_prefix)
1236
1260
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1237
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1261
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1262
+ ([] if self._drop_input_cols else inputs) + outputs)
1238
1263
 
1239
1264
  @property
1240
1265
  def model_signatures(self) -> Dict[str, ModelSignature]:
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -224,7 +226,6 @@ class AffinityPropagation(BaseTransformer):
224
226
  sample_weight_col: Optional[str] = None,
225
227
  ) -> None:
226
228
  super().__init__()
227
- self.id = str(uuid4()).replace("-", "_").upper()
228
229
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
229
230
 
230
231
  self._deps = list(deps)
@@ -251,6 +252,15 @@ class AffinityPropagation(BaseTransformer):
251
252
  self.set_drop_input_cols(drop_input_cols)
252
253
  self.set_sample_weight_col(sample_weight_col)
253
254
 
255
+ def _get_rand_id(self) -> str:
256
+ """
257
+ Generate random id to be used in sproc and stage names.
258
+
259
+ Returns:
260
+ Random id string usable in sproc, table, and stage names.
261
+ """
262
+ return str(uuid4()).replace("-", "_").upper()
263
+
254
264
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
255
265
  """
256
266
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -329,7 +339,7 @@ class AffinityPropagation(BaseTransformer):
329
339
  cp.dump(self._sklearn_object, local_transform_file)
330
340
 
331
341
  # Create temp stage to run fit.
332
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
342
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
333
343
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
334
344
  SqlResultValidator(
335
345
  session=session,
@@ -342,11 +352,12 @@ class AffinityPropagation(BaseTransformer):
342
352
  expected_value=f"Stage area {transform_stage_name} successfully created."
343
353
  ).validate()
344
354
 
345
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
355
+ # Use posixpath to construct stage paths
356
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
357
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
346
358
  local_result_file_name = get_temp_file_path()
347
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
348
359
 
349
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
360
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
350
361
  statement_params = telemetry.get_function_usage_statement_params(
351
362
  project=_PROJECT,
352
363
  subproject=_SUBPROJECT,
@@ -372,6 +383,7 @@ class AffinityPropagation(BaseTransformer):
372
383
  replace=True,
373
384
  session=session,
374
385
  statement_params=statement_params,
386
+ anonymous=True
375
387
  )
376
388
  def fit_wrapper_sproc(
377
389
  session: Session,
@@ -380,7 +392,8 @@ class AffinityPropagation(BaseTransformer):
380
392
  stage_result_file_name: str,
381
393
  input_cols: List[str],
382
394
  label_cols: List[str],
383
- sample_weight_col: Optional[str]
395
+ sample_weight_col: Optional[str],
396
+ statement_params: Dict[str, str]
384
397
  ) -> str:
385
398
  import cloudpickle as cp
386
399
  import numpy as np
@@ -447,15 +460,15 @@ class AffinityPropagation(BaseTransformer):
447
460
  api_calls=[Session.call],
448
461
  custom_tags=dict([("autogen", True)]),
449
462
  )
450
- sproc_export_file_name = session.call(
451
- fit_sproc_name,
463
+ sproc_export_file_name = fit_wrapper_sproc(
464
+ session,
452
465
  query,
453
466
  stage_transform_file_name,
454
467
  stage_result_file_name,
455
468
  identifier.get_unescaped_names(self.input_cols),
456
469
  identifier.get_unescaped_names(self.label_cols),
457
470
  identifier.get_unescaped_names(self.sample_weight_col),
458
- statement_params=statement_params,
471
+ statement_params,
459
472
  )
460
473
 
461
474
  if "|" in sproc_export_file_name:
@@ -465,7 +478,7 @@ class AffinityPropagation(BaseTransformer):
465
478
  print("\n".join(fields[1:]))
466
479
 
467
480
  session.file.get(
468
- os.path.join(stage_result_file_name, sproc_export_file_name),
481
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
469
482
  local_result_file_name,
470
483
  statement_params=statement_params
471
484
  )
@@ -511,7 +524,7 @@ class AffinityPropagation(BaseTransformer):
511
524
 
512
525
  # Register vectorized UDF for batch inference
513
526
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
514
- safe_id=self.id, method=inference_method)
527
+ safe_id=self._get_rand_id(), method=inference_method)
515
528
 
516
529
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
517
530
  # will try to pickle all of self which fails.
@@ -603,7 +616,7 @@ class AffinityPropagation(BaseTransformer):
603
616
  return transformed_pandas_df.to_dict("records")
604
617
 
605
618
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
606
- safe_id=self.id
619
+ safe_id=self._get_rand_id()
607
620
  )
608
621
 
609
622
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -770,11 +783,18 @@ class AffinityPropagation(BaseTransformer):
770
783
  Transformed dataset.
771
784
  """
772
785
  if isinstance(dataset, DataFrame):
786
+ expected_type_inferred = ""
787
+ # when it is classifier, infer the datatype from label columns
788
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
789
+ expected_type_inferred = convert_sp_to_sf_type(
790
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
791
+ )
792
+
773
793
  output_df = self._batch_inference(
774
794
  dataset=dataset,
775
795
  inference_method="predict",
776
796
  expected_output_cols_list=self.output_cols,
777
- expected_output_cols_type="",
797
+ expected_output_cols_type=expected_type_inferred,
778
798
  )
779
799
  elif isinstance(dataset, pd.DataFrame):
780
800
  output_df = self._sklearn_inference(
@@ -845,10 +865,10 @@ class AffinityPropagation(BaseTransformer):
845
865
 
846
866
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
847
867
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
848
- Returns an empty list if current object is not a classifier or not yet fitted.
868
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
849
869
  """
850
870
  if getattr(self._sklearn_object, "classes_", None) is None:
851
- return []
871
+ return [output_cols_prefix]
852
872
 
853
873
  classes = self._sklearn_object.classes_
854
874
  if isinstance(classes, numpy.ndarray):
@@ -1073,7 +1093,7 @@ class AffinityPropagation(BaseTransformer):
1073
1093
  cp.dump(self._sklearn_object, local_score_file)
1074
1094
 
1075
1095
  # Create temp stage to run score.
1076
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1096
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1077
1097
  session = dataset._session
1078
1098
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1079
1099
  SqlResultValidator(
@@ -1087,8 +1107,9 @@ class AffinityPropagation(BaseTransformer):
1087
1107
  expected_value=f"Stage area {score_stage_name} successfully created."
1088
1108
  ).validate()
1089
1109
 
1090
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1091
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1110
+ # Use posixpath to construct stage paths
1111
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1112
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1092
1113
  statement_params = telemetry.get_function_usage_statement_params(
1093
1114
  project=_PROJECT,
1094
1115
  subproject=_SUBPROJECT,
@@ -1114,6 +1135,7 @@ class AffinityPropagation(BaseTransformer):
1114
1135
  replace=True,
1115
1136
  session=session,
1116
1137
  statement_params=statement_params,
1138
+ anonymous=True
1117
1139
  )
1118
1140
  def score_wrapper_sproc(
1119
1141
  session: Session,
@@ -1121,7 +1143,8 @@ class AffinityPropagation(BaseTransformer):
1121
1143
  stage_score_file_name: str,
1122
1144
  input_cols: List[str],
1123
1145
  label_cols: List[str],
1124
- sample_weight_col: Optional[str]
1146
+ sample_weight_col: Optional[str],
1147
+ statement_params: Dict[str, str]
1125
1148
  ) -> float:
1126
1149
  import cloudpickle as cp
1127
1150
  import numpy as np
@@ -1171,14 +1194,14 @@ class AffinityPropagation(BaseTransformer):
1171
1194
  api_calls=[Session.call],
1172
1195
  custom_tags=dict([("autogen", True)]),
1173
1196
  )
1174
- score = session.call(
1175
- score_sproc_name,
1197
+ score = score_wrapper_sproc(
1198
+ session,
1176
1199
  query,
1177
1200
  stage_score_file_name,
1178
1201
  identifier.get_unescaped_names(self.input_cols),
1179
1202
  identifier.get_unescaped_names(self.label_cols),
1180
1203
  identifier.get_unescaped_names(self.sample_weight_col),
1181
- statement_params=statement_params,
1204
+ statement_params,
1182
1205
  )
1183
1206
 
1184
1207
  cleanup_temp_files([local_score_file_name])
@@ -1196,18 +1219,20 @@ class AffinityPropagation(BaseTransformer):
1196
1219
  if self._sklearn_object._estimator_type == 'classifier':
1197
1220
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1198
1221
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1199
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1222
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1223
+ ([] if self._drop_input_cols else inputs) + outputs)
1200
1224
  # For regressor, the type of predict is float64
1201
1225
  elif self._sklearn_object._estimator_type == 'regressor':
1202
1226
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1203
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1204
-
1227
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1228
+ ([] if self._drop_input_cols else inputs) + outputs)
1205
1229
  for prob_func in PROB_FUNCTIONS:
1206
1230
  if hasattr(self, prob_func):
1207
1231
  output_cols_prefix: str = f"{prob_func}_"
1208
1232
  output_column_names = self._get_output_column_names(output_cols_prefix)
1209
1233
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1210
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1234
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1235
+ ([] if self._drop_input_cols else inputs) + outputs)
1211
1236
 
1212
1237
  @property
1213
1238
  def model_signatures(self) -> Dict[str, ModelSignature]: