snowflake-ml-python 1.0.1__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (196) hide show
  1. snowflake/ml/_internal/env_utils.py +2 -1
  2. snowflake/ml/_internal/file_utils.py +35 -40
  3. snowflake/ml/_internal/telemetry.py +5 -8
  4. snowflake/ml/_internal/utils/identifier.py +74 -7
  5. snowflake/ml/_internal/utils/uri.py +7 -2
  6. snowflake/ml/model/_core_requirements.py +1 -1
  7. snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
  8. snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
  9. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
  10. snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
  11. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
  12. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
  13. snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
  14. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
  15. snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
  16. snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
  17. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
  18. snowflake/ml/model/_deploy_client/warehouse/deploy.py +25 -28
  19. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +7 -4
  20. snowflake/ml/model/_deployer.py +14 -27
  21. snowflake/ml/model/_env.py +4 -4
  22. snowflake/ml/model/_handlers/_base.py +3 -1
  23. snowflake/ml/model/_handlers/custom.py +14 -2
  24. snowflake/ml/model/_handlers/pytorch.py +186 -0
  25. snowflake/ml/model/_handlers/sklearn.py +14 -8
  26. snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
  27. snowflake/ml/model/_handlers/torchscript.py +180 -0
  28. snowflake/ml/model/_handlers/xgboost.py +19 -9
  29. snowflake/ml/model/_model.py +27 -21
  30. snowflake/ml/model/_model_meta.py +33 -19
  31. snowflake/ml/model/model_signature.py +446 -66
  32. snowflake/ml/model/type_hints.py +28 -15
  33. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +79 -43
  34. snowflake/ml/modeling/cluster/affinity_propagation.py +79 -43
  35. snowflake/ml/modeling/cluster/agglomerative_clustering.py +79 -43
  36. snowflake/ml/modeling/cluster/birch.py +79 -43
  37. snowflake/ml/modeling/cluster/bisecting_k_means.py +79 -43
  38. snowflake/ml/modeling/cluster/dbscan.py +79 -43
  39. snowflake/ml/modeling/cluster/feature_agglomeration.py +79 -43
  40. snowflake/ml/modeling/cluster/k_means.py +79 -43
  41. snowflake/ml/modeling/cluster/mean_shift.py +79 -43
  42. snowflake/ml/modeling/cluster/mini_batch_k_means.py +79 -43
  43. snowflake/ml/modeling/cluster/optics.py +79 -43
  44. snowflake/ml/modeling/cluster/spectral_biclustering.py +79 -43
  45. snowflake/ml/modeling/cluster/spectral_clustering.py +79 -43
  46. snowflake/ml/modeling/cluster/spectral_coclustering.py +79 -43
  47. snowflake/ml/modeling/compose/column_transformer.py +79 -43
  48. snowflake/ml/modeling/compose/transformed_target_regressor.py +79 -43
  49. snowflake/ml/modeling/covariance/elliptic_envelope.py +79 -43
  50. snowflake/ml/modeling/covariance/empirical_covariance.py +79 -43
  51. snowflake/ml/modeling/covariance/graphical_lasso.py +79 -43
  52. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +79 -43
  53. snowflake/ml/modeling/covariance/ledoit_wolf.py +79 -43
  54. snowflake/ml/modeling/covariance/min_cov_det.py +79 -43
  55. snowflake/ml/modeling/covariance/oas.py +79 -43
  56. snowflake/ml/modeling/covariance/shrunk_covariance.py +79 -43
  57. snowflake/ml/modeling/decomposition/dictionary_learning.py +79 -43
  58. snowflake/ml/modeling/decomposition/factor_analysis.py +79 -43
  59. snowflake/ml/modeling/decomposition/fast_ica.py +79 -43
  60. snowflake/ml/modeling/decomposition/incremental_pca.py +79 -43
  61. snowflake/ml/modeling/decomposition/kernel_pca.py +79 -43
  62. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +79 -43
  63. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +79 -43
  64. snowflake/ml/modeling/decomposition/pca.py +79 -43
  65. snowflake/ml/modeling/decomposition/sparse_pca.py +79 -43
  66. snowflake/ml/modeling/decomposition/truncated_svd.py +79 -43
  67. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +79 -43
  68. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +79 -43
  69. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +79 -43
  70. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +79 -43
  71. snowflake/ml/modeling/ensemble/bagging_classifier.py +79 -43
  72. snowflake/ml/modeling/ensemble/bagging_regressor.py +79 -43
  73. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +79 -43
  74. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +79 -43
  75. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +79 -43
  76. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +79 -43
  77. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +79 -43
  78. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +79 -43
  79. snowflake/ml/modeling/ensemble/isolation_forest.py +79 -43
  80. snowflake/ml/modeling/ensemble/random_forest_classifier.py +79 -43
  81. snowflake/ml/modeling/ensemble/random_forest_regressor.py +79 -43
  82. snowflake/ml/modeling/ensemble/stacking_regressor.py +79 -43
  83. snowflake/ml/modeling/ensemble/voting_classifier.py +79 -43
  84. snowflake/ml/modeling/ensemble/voting_regressor.py +79 -43
  85. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +79 -43
  86. snowflake/ml/modeling/feature_selection/select_fdr.py +79 -43
  87. snowflake/ml/modeling/feature_selection/select_fpr.py +79 -43
  88. snowflake/ml/modeling/feature_selection/select_fwe.py +79 -43
  89. snowflake/ml/modeling/feature_selection/select_k_best.py +79 -43
  90. snowflake/ml/modeling/feature_selection/select_percentile.py +79 -43
  91. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +79 -43
  92. snowflake/ml/modeling/feature_selection/variance_threshold.py +79 -43
  93. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +79 -43
  94. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +79 -43
  95. snowflake/ml/modeling/impute/iterative_imputer.py +79 -43
  96. snowflake/ml/modeling/impute/knn_imputer.py +79 -43
  97. snowflake/ml/modeling/impute/missing_indicator.py +79 -43
  98. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +79 -43
  99. snowflake/ml/modeling/kernel_approximation/nystroem.py +79 -43
  100. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +79 -43
  101. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +79 -43
  102. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +79 -43
  103. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +79 -43
  104. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +79 -43
  105. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +79 -43
  106. snowflake/ml/modeling/linear_model/ard_regression.py +79 -43
  107. snowflake/ml/modeling/linear_model/bayesian_ridge.py +79 -43
  108. snowflake/ml/modeling/linear_model/elastic_net.py +79 -43
  109. snowflake/ml/modeling/linear_model/elastic_net_cv.py +79 -43
  110. snowflake/ml/modeling/linear_model/gamma_regressor.py +79 -43
  111. snowflake/ml/modeling/linear_model/huber_regressor.py +79 -43
  112. snowflake/ml/modeling/linear_model/lars.py +79 -43
  113. snowflake/ml/modeling/linear_model/lars_cv.py +79 -43
  114. snowflake/ml/modeling/linear_model/lasso.py +79 -43
  115. snowflake/ml/modeling/linear_model/lasso_cv.py +79 -43
  116. snowflake/ml/modeling/linear_model/lasso_lars.py +79 -43
  117. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +79 -43
  118. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +79 -43
  119. snowflake/ml/modeling/linear_model/linear_regression.py +79 -43
  120. snowflake/ml/modeling/linear_model/logistic_regression.py +79 -43
  121. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +79 -43
  122. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +79 -43
  123. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +79 -43
  124. snowflake/ml/modeling/linear_model/multi_task_lasso.py +79 -43
  125. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +79 -43
  126. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +79 -43
  127. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +79 -43
  128. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +79 -43
  129. snowflake/ml/modeling/linear_model/perceptron.py +79 -43
  130. snowflake/ml/modeling/linear_model/poisson_regressor.py +79 -43
  131. snowflake/ml/modeling/linear_model/ransac_regressor.py +79 -43
  132. snowflake/ml/modeling/linear_model/ridge.py +79 -43
  133. snowflake/ml/modeling/linear_model/ridge_classifier.py +79 -43
  134. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +79 -43
  135. snowflake/ml/modeling/linear_model/ridge_cv.py +79 -43
  136. snowflake/ml/modeling/linear_model/sgd_classifier.py +79 -43
  137. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +79 -43
  138. snowflake/ml/modeling/linear_model/sgd_regressor.py +79 -43
  139. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +79 -43
  140. snowflake/ml/modeling/linear_model/tweedie_regressor.py +79 -43
  141. snowflake/ml/modeling/manifold/isomap.py +79 -43
  142. snowflake/ml/modeling/manifold/mds.py +79 -43
  143. snowflake/ml/modeling/manifold/spectral_embedding.py +79 -43
  144. snowflake/ml/modeling/manifold/tsne.py +79 -43
  145. snowflake/ml/modeling/metrics/classification.py +6 -1
  146. snowflake/ml/modeling/metrics/regression.py +517 -9
  147. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +79 -43
  148. snowflake/ml/modeling/mixture/gaussian_mixture.py +79 -43
  149. snowflake/ml/modeling/model_selection/grid_search_cv.py +79 -43
  150. snowflake/ml/modeling/model_selection/randomized_search_cv.py +79 -43
  151. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +79 -43
  152. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +79 -43
  153. snowflake/ml/modeling/multiclass/output_code_classifier.py +79 -43
  154. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +79 -43
  155. snowflake/ml/modeling/naive_bayes/categorical_nb.py +79 -43
  156. snowflake/ml/modeling/naive_bayes/complement_nb.py +79 -43
  157. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +79 -43
  158. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +79 -43
  159. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +79 -43
  160. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +79 -43
  161. snowflake/ml/modeling/neighbors/kernel_density.py +79 -43
  162. snowflake/ml/modeling/neighbors/local_outlier_factor.py +79 -43
  163. snowflake/ml/modeling/neighbors/nearest_centroid.py +79 -43
  164. snowflake/ml/modeling/neighbors/nearest_neighbors.py +79 -43
  165. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +79 -43
  166. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +79 -43
  167. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +79 -43
  168. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +79 -43
  169. snowflake/ml/modeling/neural_network/mlp_classifier.py +79 -43
  170. snowflake/ml/modeling/neural_network/mlp_regressor.py +79 -43
  171. snowflake/ml/modeling/pipeline/pipeline.py +24 -0
  172. snowflake/ml/modeling/preprocessing/one_hot_encoder.py +18 -19
  173. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
  174. snowflake/ml/modeling/preprocessing/polynomial_features.py +79 -43
  175. snowflake/ml/modeling/semi_supervised/label_propagation.py +79 -43
  176. snowflake/ml/modeling/semi_supervised/label_spreading.py +79 -43
  177. snowflake/ml/modeling/svm/linear_svc.py +79 -43
  178. snowflake/ml/modeling/svm/linear_svr.py +79 -43
  179. snowflake/ml/modeling/svm/nu_svc.py +79 -43
  180. snowflake/ml/modeling/svm/nu_svr.py +79 -43
  181. snowflake/ml/modeling/svm/svc.py +79 -43
  182. snowflake/ml/modeling/svm/svr.py +79 -43
  183. snowflake/ml/modeling/tree/decision_tree_classifier.py +79 -43
  184. snowflake/ml/modeling/tree/decision_tree_regressor.py +79 -43
  185. snowflake/ml/modeling/tree/extra_tree_classifier.py +79 -43
  186. snowflake/ml/modeling/tree/extra_tree_regressor.py +79 -43
  187. snowflake/ml/modeling/xgboost/xgb_classifier.py +79 -43
  188. snowflake/ml/modeling/xgboost/xgb_regressor.py +79 -43
  189. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +79 -43
  190. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +79 -43
  191. snowflake/ml/registry/model_registry.py +123 -121
  192. snowflake/ml/version.py +1 -1
  193. {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +50 -8
  194. snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
  195. snowflake_ml_python-1.0.1.dist-info/RECORD +0 -246
  196. {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -4,17 +4,18 @@ from typing import TYPE_CHECKING, Sequence, TypedDict, TypeVar, Union
4
4
  import numpy.typing as npt
5
5
  from typing_extensions import NotRequired, TypeAlias
6
6
 
7
- from snowflake.ml.modeling.framework import base
8
-
9
7
  if TYPE_CHECKING:
10
8
  import numpy as np
11
9
  import pandas as pd
12
10
  import sklearn.base
13
11
  import sklearn.pipeline
12
+ import tensorflow
13
+ import torch
14
14
  import xgboost
15
15
 
16
16
  import snowflake.ml.model.custom_model
17
17
  import snowflake.snowpark
18
+ from snowflake.ml.modeling.framework import base # noqa: F401
18
19
 
19
20
 
20
21
  _SupportedBuiltins = Union[int, float, bool, str, bytes, "_SupportedBuiltinsList"]
@@ -35,9 +36,10 @@ _SupportedNumpyDtype = Union[
35
36
  ]
36
37
  _SupportedNumpyArray = npt.NDArray[_SupportedNumpyDtype]
37
38
  _SupportedBuiltinsList = Sequence[_SupportedBuiltins]
39
+ _SupportedArrayLike = Union[_SupportedNumpyArray, "torch.Tensor", "tensorflow.Tensor", "tensorflow.Variable"]
38
40
 
39
41
  SupportedLocalDataType = Union[
40
- "pd.DataFrame", _SupportedNumpyArray, Sequence[_SupportedNumpyArray], _SupportedBuiltinsList
42
+ "pd.DataFrame", _SupportedNumpyArray, Sequence[_SupportedArrayLike], _SupportedBuiltinsList
41
43
  ]
42
44
 
43
45
  SupportedDataType = Union[SupportedLocalDataType, "snowflake.snowpark.DataFrame"]
@@ -52,9 +54,11 @@ SupportedLocalModelType = Union[
52
54
  "sklearn.pipeline.Pipeline",
53
55
  "xgboost.XGBModel",
54
56
  "xgboost.Booster",
57
+ "torch.nn.Module",
58
+ "torch.jit.ScriptModule", # type:ignore[name-defined]
55
59
  ]
56
60
 
57
- SupportedSnowMLModelType: TypeAlias = base.BaseEstimator
61
+ SupportedSnowMLModelType: TypeAlias = "base.BaseEstimator"
58
62
 
59
63
  SupportedModelType = Union[
60
64
  SupportedLocalModelType,
@@ -71,6 +75,8 @@ Here is all acceptable types of Snowflake native model packaging and its handler
71
75
  | xgboost.XGBModel | xgboost.py | _XGBModelHandler |
72
76
  | xgboost.Booster | xgboost.py | _XGBModelHandler |
73
77
  | snowflake.ml.framework.base.BaseEstimator | snowmlmodel.py | _SnowMLModelHandler |
78
+ | torch.nn.Module | pytroch.py | _PyTorchHandler |
79
+ | torch.jit.ScriptModule | torchscript.py | _TorchScripthHandler |
74
80
  """
75
81
 
76
82
 
@@ -80,26 +86,23 @@ _ModelType = TypeVar("_ModelType", bound=SupportedModelType)
80
86
  class DeployOptions(TypedDict):
81
87
  """Common Options for deploying to Snowflake.
82
88
 
83
- output_with_input_features: Whether or not preserve the input columns in the output when predicting.
84
- Defaults to False.
89
+ disable_local_conda_resolver: Set to disable use local conda resolver to do pre-check on environment and rely on
90
+ the information schema only. Defaults to False.
85
91
  keep_order: Whether or not preserve the row order when predicting. Only available for dataframe has fewer than 2**64
86
92
  rows. Defaults to True.
87
-
88
- Internal-only options
89
- _use_local_snowml: Use local SnowML when as the execution library of the deployment. If set to True, local SnowML
90
- would be packed and uploaded to 1) session stage, if it is a temporary deployment, or 2) the provided stage path
91
- if it is a permanent deployment. It should be set to True before SnowML available in Snowflake Anaconda Channel.
92
- Default to False.
93
+ output_with_input_features: Whether or not preserve the input columns in the output when predicting.
94
+ Defaults to False.
93
95
  """
94
96
 
95
- _use_local_snowml: NotRequired[bool]
96
- output_with_input_features: NotRequired[bool]
97
+ disable_local_conda_resolver: NotRequired[bool]
97
98
  keep_order: NotRequired[bool]
99
+ output_with_input_features: NotRequired[bool]
98
100
 
99
101
 
100
102
  class WarehouseDeployOptions(DeployOptions):
101
103
  """Options for deploying to the Snowflake Warehouse.
102
104
 
105
+
103
106
  permanent_udf_stage_location: A Snowflake stage option where the UDF should be persisted. If specified, the model
104
107
  will be deployed as a permanent UDF, otherwise temporary.
105
108
  relax_version: Whether or not relax the version constraints of the dependencies if unresolvable. Defaults to False.
@@ -115,14 +118,16 @@ class WarehouseDeployOptions(DeployOptions):
115
118
  class ModelSaveOption(TypedDict):
116
119
  """Options for saving the model.
117
120
 
121
+ embed_local_ml_library: Embedding local SnowML into the code directory of the folder.
118
122
  allow_overwritten_stage_file: Flag to indicate when saving the model as a stage file, whether overwriting existed
119
123
  file is allowed. Default to False.
120
124
  """
121
125
 
126
+ embed_local_ml_library: NotRequired[bool]
122
127
  allow_overwritten_stage_file: NotRequired[bool]
123
128
 
124
129
 
125
- class CustomModelSaveOption(TypedDict):
130
+ class CustomModelSaveOption(ModelSaveOption):
126
131
  ...
127
132
 
128
133
 
@@ -136,3 +141,11 @@ class XGBModelSaveOptions(ModelSaveOption):
136
141
 
137
142
  class SNOWModelSaveOptions(ModelSaveOption):
138
143
  target_methods: NotRequired[Sequence[str]]
144
+
145
+
146
+ class PyTorchSaveOptions(ModelSaveOption):
147
+ target_methods: NotRequired[Sequence[str]]
148
+
149
+
150
+ class TorchScriptSaveOptions(ModelSaveOption):
151
+ target_methods: NotRequired[Sequence[str]]
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -247,7 +249,6 @@ class CalibratedClassifierCV(BaseTransformer):
247
249
  sample_weight_col: Optional[str] = None,
248
250
  ) -> None:
249
251
  super().__init__()
250
- self.id = str(uuid4()).replace("-", "_").upper()
251
252
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
252
253
  deps = deps | _gather_dependencies(estimator)
253
254
  deps = deps | _gather_dependencies(base_estimator)
@@ -274,6 +275,15 @@ class CalibratedClassifierCV(BaseTransformer):
274
275
  self.set_drop_input_cols(drop_input_cols)
275
276
  self.set_sample_weight_col(sample_weight_col)
276
277
 
278
+ def _get_rand_id(self) -> str:
279
+ """
280
+ Generate random id to be used in sproc and stage names.
281
+
282
+ Returns:
283
+ Random id string usable in sproc, table, and stage names.
284
+ """
285
+ return str(uuid4()).replace("-", "_").upper()
286
+
277
287
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
278
288
  """
279
289
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -352,7 +362,7 @@ class CalibratedClassifierCV(BaseTransformer):
352
362
  cp.dump(self._sklearn_object, local_transform_file)
353
363
 
354
364
  # Create temp stage to run fit.
355
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
365
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
356
366
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
357
367
  SqlResultValidator(
358
368
  session=session,
@@ -365,11 +375,12 @@ class CalibratedClassifierCV(BaseTransformer):
365
375
  expected_value=f"Stage area {transform_stage_name} successfully created."
366
376
  ).validate()
367
377
 
368
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
378
+ # Use posixpath to construct stage paths
379
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
380
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
369
381
  local_result_file_name = get_temp_file_path()
370
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
371
382
 
372
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
383
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
373
384
  statement_params = telemetry.get_function_usage_statement_params(
374
385
  project=_PROJECT,
375
386
  subproject=_SUBPROJECT,
@@ -395,6 +406,7 @@ class CalibratedClassifierCV(BaseTransformer):
395
406
  replace=True,
396
407
  session=session,
397
408
  statement_params=statement_params,
409
+ anonymous=True
398
410
  )
399
411
  def fit_wrapper_sproc(
400
412
  session: Session,
@@ -403,7 +415,8 @@ class CalibratedClassifierCV(BaseTransformer):
403
415
  stage_result_file_name: str,
404
416
  input_cols: List[str],
405
417
  label_cols: List[str],
406
- sample_weight_col: Optional[str]
418
+ sample_weight_col: Optional[str],
419
+ statement_params: Dict[str, str]
407
420
  ) -> str:
408
421
  import cloudpickle as cp
409
422
  import numpy as np
@@ -470,15 +483,15 @@ class CalibratedClassifierCV(BaseTransformer):
470
483
  api_calls=[Session.call],
471
484
  custom_tags=dict([("autogen", True)]),
472
485
  )
473
- sproc_export_file_name = session.call(
474
- fit_sproc_name,
486
+ sproc_export_file_name = fit_wrapper_sproc(
487
+ session,
475
488
  query,
476
489
  stage_transform_file_name,
477
490
  stage_result_file_name,
478
491
  identifier.get_unescaped_names(self.input_cols),
479
492
  identifier.get_unescaped_names(self.label_cols),
480
493
  identifier.get_unescaped_names(self.sample_weight_col),
481
- statement_params=statement_params,
494
+ statement_params,
482
495
  )
483
496
 
484
497
  if "|" in sproc_export_file_name:
@@ -488,7 +501,7 @@ class CalibratedClassifierCV(BaseTransformer):
488
501
  print("\n".join(fields[1:]))
489
502
 
490
503
  session.file.get(
491
- os.path.join(stage_result_file_name, sproc_export_file_name),
504
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
492
505
  local_result_file_name,
493
506
  statement_params=statement_params
494
507
  )
@@ -534,7 +547,7 @@ class CalibratedClassifierCV(BaseTransformer):
534
547
 
535
548
  # Register vectorized UDF for batch inference
536
549
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
537
- safe_id=self.id, method=inference_method)
550
+ safe_id=self._get_rand_id(), method=inference_method)
538
551
 
539
552
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
540
553
  # will try to pickle all of self which fails.
@@ -626,7 +639,7 @@ class CalibratedClassifierCV(BaseTransformer):
626
639
  return transformed_pandas_df.to_dict("records")
627
640
 
628
641
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
629
- safe_id=self.id
642
+ safe_id=self._get_rand_id()
630
643
  )
631
644
 
632
645
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -682,26 +695,37 @@ class CalibratedClassifierCV(BaseTransformer):
682
695
  # input cols need to match unquoted / quoted
683
696
  input_cols = self.input_cols
684
697
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
698
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
685
699
 
686
700
  estimator = self._sklearn_object
687
701
 
688
- input_df = dataset[input_cols] # Select input columns with quoted column names.
689
- if hasattr(estimator, "feature_names_in_"):
690
- missing_features = []
691
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
692
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
693
- missing_features.append(f)
694
-
695
- if len(missing_features) > 0:
696
- raise ValueError(
697
- "The feature names should match with those that were passed during fit.\n"
698
- f"Features seen during fit call but not present in the input: {missing_features}\n"
699
- f"Features in the input dataframe : {input_cols}\n"
700
- )
701
- input_df.columns = getattr(estimator, "feature_names_in_")
702
- else:
703
- # Just rename the column names to unquoted identifiers.
704
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
702
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
703
+ missing_features = []
704
+ features_in_dataset = set(dataset.columns)
705
+ columns_to_select = []
706
+ for i, f in enumerate(features_required_by_estimator):
707
+ if (
708
+ i >= len(input_cols)
709
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
710
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
711
+ and quoted_input_cols[i] not in features_in_dataset)
712
+ ):
713
+ missing_features.append(f)
714
+ elif input_cols[i] in features_in_dataset:
715
+ columns_to_select.append(input_cols[i])
716
+ elif unquoted_input_cols[i] in features_in_dataset:
717
+ columns_to_select.append(unquoted_input_cols[i])
718
+ else:
719
+ columns_to_select.append(quoted_input_cols[i])
720
+
721
+ if len(missing_features) > 0:
722
+ raise ValueError(
723
+ "The feature names should match with those that were passed during fit.\n"
724
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
725
+ f"Features in the input dataframe : {input_cols}\n"
726
+ )
727
+ input_df = dataset[columns_to_select]
728
+ input_df.columns = features_required_by_estimator
705
729
 
706
730
  transformed_numpy_array = getattr(estimator, inference_method)(
707
731
  input_df
@@ -782,11 +806,18 @@ class CalibratedClassifierCV(BaseTransformer):
782
806
  Transformed dataset.
783
807
  """
784
808
  if isinstance(dataset, DataFrame):
809
+ expected_type_inferred = ""
810
+ # when it is classifier, infer the datatype from label columns
811
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
812
+ expected_type_inferred = convert_sp_to_sf_type(
813
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
814
+ )
815
+
785
816
  output_df = self._batch_inference(
786
817
  dataset=dataset,
787
818
  inference_method="predict",
788
819
  expected_output_cols_list=self.output_cols,
789
- expected_output_cols_type="",
820
+ expected_output_cols_type=expected_type_inferred,
790
821
  )
791
822
  elif isinstance(dataset, pd.DataFrame):
792
823
  output_df = self._sklearn_inference(
@@ -857,10 +888,10 @@ class CalibratedClassifierCV(BaseTransformer):
857
888
 
858
889
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
859
890
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
860
- Returns an empty list if current object is not a classifier or not yet fitted.
891
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
861
892
  """
862
893
  if getattr(self._sklearn_object, "classes_", None) is None:
863
- return []
894
+ return [output_cols_prefix]
864
895
 
865
896
  classes = self._sklearn_object.classes_
866
897
  if isinstance(classes, numpy.ndarray):
@@ -1089,7 +1120,7 @@ class CalibratedClassifierCV(BaseTransformer):
1089
1120
  cp.dump(self._sklearn_object, local_score_file)
1090
1121
 
1091
1122
  # Create temp stage to run score.
1092
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1123
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1093
1124
  session = dataset._session
1094
1125
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1095
1126
  SqlResultValidator(
@@ -1103,8 +1134,9 @@ class CalibratedClassifierCV(BaseTransformer):
1103
1134
  expected_value=f"Stage area {score_stage_name} successfully created."
1104
1135
  ).validate()
1105
1136
 
1106
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1107
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1137
+ # Use posixpath to construct stage paths
1138
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1139
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1108
1140
  statement_params = telemetry.get_function_usage_statement_params(
1109
1141
  project=_PROJECT,
1110
1142
  subproject=_SUBPROJECT,
@@ -1130,6 +1162,7 @@ class CalibratedClassifierCV(BaseTransformer):
1130
1162
  replace=True,
1131
1163
  session=session,
1132
1164
  statement_params=statement_params,
1165
+ anonymous=True
1133
1166
  )
1134
1167
  def score_wrapper_sproc(
1135
1168
  session: Session,
@@ -1137,7 +1170,8 @@ class CalibratedClassifierCV(BaseTransformer):
1137
1170
  stage_score_file_name: str,
1138
1171
  input_cols: List[str],
1139
1172
  label_cols: List[str],
1140
- sample_weight_col: Optional[str]
1173
+ sample_weight_col: Optional[str],
1174
+ statement_params: Dict[str, str]
1141
1175
  ) -> float:
1142
1176
  import cloudpickle as cp
1143
1177
  import numpy as np
@@ -1187,14 +1221,14 @@ class CalibratedClassifierCV(BaseTransformer):
1187
1221
  api_calls=[Session.call],
1188
1222
  custom_tags=dict([("autogen", True)]),
1189
1223
  )
1190
- score = session.call(
1191
- score_sproc_name,
1224
+ score = score_wrapper_sproc(
1225
+ session,
1192
1226
  query,
1193
1227
  stage_score_file_name,
1194
1228
  identifier.get_unescaped_names(self.input_cols),
1195
1229
  identifier.get_unescaped_names(self.label_cols),
1196
1230
  identifier.get_unescaped_names(self.sample_weight_col),
1197
- statement_params=statement_params,
1231
+ statement_params,
1198
1232
  )
1199
1233
 
1200
1234
  cleanup_temp_files([local_score_file_name])
@@ -1212,18 +1246,20 @@ class CalibratedClassifierCV(BaseTransformer):
1212
1246
  if self._sklearn_object._estimator_type == 'classifier':
1213
1247
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1214
1248
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1215
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1249
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1250
+ ([] if self._drop_input_cols else inputs) + outputs)
1216
1251
  # For regressor, the type of predict is float64
1217
1252
  elif self._sklearn_object._estimator_type == 'regressor':
1218
1253
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1219
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1220
-
1254
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1255
+ ([] if self._drop_input_cols else inputs) + outputs)
1221
1256
  for prob_func in PROB_FUNCTIONS:
1222
1257
  if hasattr(self, prob_func):
1223
1258
  output_cols_prefix: str = f"{prob_func}_"
1224
1259
  output_column_names = self._get_output_column_names(output_cols_prefix)
1225
1260
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1226
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1261
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1262
+ ([] if self._drop_input_cols else inputs) + outputs)
1227
1263
 
1228
1264
  @property
1229
1265
  def model_signatures(self) -> Dict[str, ModelSignature]: