snowflake-ml-python 1.0.1__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +2 -1
- snowflake/ml/_internal/file_utils.py +35 -40
- snowflake/ml/_internal/telemetry.py +5 -8
- snowflake/ml/_internal/utils/identifier.py +74 -7
- snowflake/ml/_internal/utils/uri.py +7 -2
- snowflake/ml/model/_core_requirements.py +1 -1
- snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
- snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
- snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
- snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
- snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
- snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +25 -28
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +7 -4
- snowflake/ml/model/_deployer.py +14 -27
- snowflake/ml/model/_env.py +4 -4
- snowflake/ml/model/_handlers/_base.py +3 -1
- snowflake/ml/model/_handlers/custom.py +14 -2
- snowflake/ml/model/_handlers/pytorch.py +186 -0
- snowflake/ml/model/_handlers/sklearn.py +14 -8
- snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
- snowflake/ml/model/_handlers/torchscript.py +180 -0
- snowflake/ml/model/_handlers/xgboost.py +19 -9
- snowflake/ml/model/_model.py +27 -21
- snowflake/ml/model/_model_meta.py +33 -19
- snowflake/ml/model/model_signature.py +446 -66
- snowflake/ml/model/type_hints.py +28 -15
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +79 -43
- snowflake/ml/modeling/cluster/affinity_propagation.py +79 -43
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +79 -43
- snowflake/ml/modeling/cluster/birch.py +79 -43
- snowflake/ml/modeling/cluster/bisecting_k_means.py +79 -43
- snowflake/ml/modeling/cluster/dbscan.py +79 -43
- snowflake/ml/modeling/cluster/feature_agglomeration.py +79 -43
- snowflake/ml/modeling/cluster/k_means.py +79 -43
- snowflake/ml/modeling/cluster/mean_shift.py +79 -43
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +79 -43
- snowflake/ml/modeling/cluster/optics.py +79 -43
- snowflake/ml/modeling/cluster/spectral_biclustering.py +79 -43
- snowflake/ml/modeling/cluster/spectral_clustering.py +79 -43
- snowflake/ml/modeling/cluster/spectral_coclustering.py +79 -43
- snowflake/ml/modeling/compose/column_transformer.py +79 -43
- snowflake/ml/modeling/compose/transformed_target_regressor.py +79 -43
- snowflake/ml/modeling/covariance/elliptic_envelope.py +79 -43
- snowflake/ml/modeling/covariance/empirical_covariance.py +79 -43
- snowflake/ml/modeling/covariance/graphical_lasso.py +79 -43
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +79 -43
- snowflake/ml/modeling/covariance/ledoit_wolf.py +79 -43
- snowflake/ml/modeling/covariance/min_cov_det.py +79 -43
- snowflake/ml/modeling/covariance/oas.py +79 -43
- snowflake/ml/modeling/covariance/shrunk_covariance.py +79 -43
- snowflake/ml/modeling/decomposition/dictionary_learning.py +79 -43
- snowflake/ml/modeling/decomposition/factor_analysis.py +79 -43
- snowflake/ml/modeling/decomposition/fast_ica.py +79 -43
- snowflake/ml/modeling/decomposition/incremental_pca.py +79 -43
- snowflake/ml/modeling/decomposition/kernel_pca.py +79 -43
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +79 -43
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +79 -43
- snowflake/ml/modeling/decomposition/pca.py +79 -43
- snowflake/ml/modeling/decomposition/sparse_pca.py +79 -43
- snowflake/ml/modeling/decomposition/truncated_svd.py +79 -43
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +79 -43
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +79 -43
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/bagging_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/bagging_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/isolation_forest.py +79 -43
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/stacking_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/voting_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/voting_regressor.py +79 -43
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +79 -43
- snowflake/ml/modeling/feature_selection/select_fdr.py +79 -43
- snowflake/ml/modeling/feature_selection/select_fpr.py +79 -43
- snowflake/ml/modeling/feature_selection/select_fwe.py +79 -43
- snowflake/ml/modeling/feature_selection/select_k_best.py +79 -43
- snowflake/ml/modeling/feature_selection/select_percentile.py +79 -43
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +79 -43
- snowflake/ml/modeling/feature_selection/variance_threshold.py +79 -43
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +79 -43
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +79 -43
- snowflake/ml/modeling/impute/iterative_imputer.py +79 -43
- snowflake/ml/modeling/impute/knn_imputer.py +79 -43
- snowflake/ml/modeling/impute/missing_indicator.py +79 -43
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +79 -43
- snowflake/ml/modeling/kernel_approximation/nystroem.py +79 -43
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +79 -43
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +79 -43
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +79 -43
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +79 -43
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +79 -43
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/ard_regression.py +79 -43
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +79 -43
- snowflake/ml/modeling/linear_model/elastic_net.py +79 -43
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +79 -43
- snowflake/ml/modeling/linear_model/gamma_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/huber_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/lars.py +79 -43
- snowflake/ml/modeling/linear_model/lars_cv.py +79 -43
- snowflake/ml/modeling/linear_model/lasso.py +79 -43
- snowflake/ml/modeling/linear_model/lasso_cv.py +79 -43
- snowflake/ml/modeling/linear_model/lasso_lars.py +79 -43
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +79 -43
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +79 -43
- snowflake/ml/modeling/linear_model/linear_regression.py +79 -43
- snowflake/ml/modeling/linear_model/logistic_regression.py +79 -43
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +79 -43
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +79 -43
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +79 -43
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +79 -43
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +79 -43
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +79 -43
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +79 -43
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/perceptron.py +79 -43
- snowflake/ml/modeling/linear_model/poisson_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/ransac_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/ridge.py +79 -43
- snowflake/ml/modeling/linear_model/ridge_classifier.py +79 -43
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +79 -43
- snowflake/ml/modeling/linear_model/ridge_cv.py +79 -43
- snowflake/ml/modeling/linear_model/sgd_classifier.py +79 -43
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +79 -43
- snowflake/ml/modeling/linear_model/sgd_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +79 -43
- snowflake/ml/modeling/manifold/isomap.py +79 -43
- snowflake/ml/modeling/manifold/mds.py +79 -43
- snowflake/ml/modeling/manifold/spectral_embedding.py +79 -43
- snowflake/ml/modeling/manifold/tsne.py +79 -43
- snowflake/ml/modeling/metrics/classification.py +6 -1
- snowflake/ml/modeling/metrics/regression.py +517 -9
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +79 -43
- snowflake/ml/modeling/mixture/gaussian_mixture.py +79 -43
- snowflake/ml/modeling/model_selection/grid_search_cv.py +79 -43
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +79 -43
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +79 -43
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +79 -43
- snowflake/ml/modeling/multiclass/output_code_classifier.py +79 -43
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +79 -43
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +79 -43
- snowflake/ml/modeling/naive_bayes/complement_nb.py +79 -43
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +79 -43
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +79 -43
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +79 -43
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +79 -43
- snowflake/ml/modeling/neighbors/kernel_density.py +79 -43
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +79 -43
- snowflake/ml/modeling/neighbors/nearest_centroid.py +79 -43
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +79 -43
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +79 -43
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +79 -43
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +79 -43
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +79 -43
- snowflake/ml/modeling/neural_network/mlp_classifier.py +79 -43
- snowflake/ml/modeling/neural_network/mlp_regressor.py +79 -43
- snowflake/ml/modeling/pipeline/pipeline.py +24 -0
- snowflake/ml/modeling/preprocessing/one_hot_encoder.py +18 -19
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
- snowflake/ml/modeling/preprocessing/polynomial_features.py +79 -43
- snowflake/ml/modeling/semi_supervised/label_propagation.py +79 -43
- snowflake/ml/modeling/semi_supervised/label_spreading.py +79 -43
- snowflake/ml/modeling/svm/linear_svc.py +79 -43
- snowflake/ml/modeling/svm/linear_svr.py +79 -43
- snowflake/ml/modeling/svm/nu_svc.py +79 -43
- snowflake/ml/modeling/svm/nu_svr.py +79 -43
- snowflake/ml/modeling/svm/svc.py +79 -43
- snowflake/ml/modeling/svm/svr.py +79 -43
- snowflake/ml/modeling/tree/decision_tree_classifier.py +79 -43
- snowflake/ml/modeling/tree/decision_tree_regressor.py +79 -43
- snowflake/ml/modeling/tree/extra_tree_classifier.py +79 -43
- snowflake/ml/modeling/tree/extra_tree_regressor.py +79 -43
- snowflake/ml/modeling/xgboost/xgb_classifier.py +79 -43
- snowflake/ml/modeling/xgboost/xgb_regressor.py +79 -43
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +79 -43
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +79 -43
- snowflake/ml/registry/model_registry.py +123 -121
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +50 -8
- snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
- snowflake_ml_python-1.0.1.dist-info/RECORD +0 -246
- {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
snowflake/ml/model/type_hints.py
CHANGED
@@ -4,17 +4,18 @@ from typing import TYPE_CHECKING, Sequence, TypedDict, TypeVar, Union
|
|
4
4
|
import numpy.typing as npt
|
5
5
|
from typing_extensions import NotRequired, TypeAlias
|
6
6
|
|
7
|
-
from snowflake.ml.modeling.framework import base
|
8
|
-
|
9
7
|
if TYPE_CHECKING:
|
10
8
|
import numpy as np
|
11
9
|
import pandas as pd
|
12
10
|
import sklearn.base
|
13
11
|
import sklearn.pipeline
|
12
|
+
import tensorflow
|
13
|
+
import torch
|
14
14
|
import xgboost
|
15
15
|
|
16
16
|
import snowflake.ml.model.custom_model
|
17
17
|
import snowflake.snowpark
|
18
|
+
from snowflake.ml.modeling.framework import base # noqa: F401
|
18
19
|
|
19
20
|
|
20
21
|
_SupportedBuiltins = Union[int, float, bool, str, bytes, "_SupportedBuiltinsList"]
|
@@ -35,9 +36,10 @@ _SupportedNumpyDtype = Union[
|
|
35
36
|
]
|
36
37
|
_SupportedNumpyArray = npt.NDArray[_SupportedNumpyDtype]
|
37
38
|
_SupportedBuiltinsList = Sequence[_SupportedBuiltins]
|
39
|
+
_SupportedArrayLike = Union[_SupportedNumpyArray, "torch.Tensor", "tensorflow.Tensor", "tensorflow.Variable"]
|
38
40
|
|
39
41
|
SupportedLocalDataType = Union[
|
40
|
-
"pd.DataFrame", _SupportedNumpyArray, Sequence[
|
42
|
+
"pd.DataFrame", _SupportedNumpyArray, Sequence[_SupportedArrayLike], _SupportedBuiltinsList
|
41
43
|
]
|
42
44
|
|
43
45
|
SupportedDataType = Union[SupportedLocalDataType, "snowflake.snowpark.DataFrame"]
|
@@ -52,9 +54,11 @@ SupportedLocalModelType = Union[
|
|
52
54
|
"sklearn.pipeline.Pipeline",
|
53
55
|
"xgboost.XGBModel",
|
54
56
|
"xgboost.Booster",
|
57
|
+
"torch.nn.Module",
|
58
|
+
"torch.jit.ScriptModule", # type:ignore[name-defined]
|
55
59
|
]
|
56
60
|
|
57
|
-
SupportedSnowMLModelType: TypeAlias = base.BaseEstimator
|
61
|
+
SupportedSnowMLModelType: TypeAlias = "base.BaseEstimator"
|
58
62
|
|
59
63
|
SupportedModelType = Union[
|
60
64
|
SupportedLocalModelType,
|
@@ -71,6 +75,8 @@ Here is all acceptable types of Snowflake native model packaging and its handler
|
|
71
75
|
| xgboost.XGBModel | xgboost.py | _XGBModelHandler |
|
72
76
|
| xgboost.Booster | xgboost.py | _XGBModelHandler |
|
73
77
|
| snowflake.ml.framework.base.BaseEstimator | snowmlmodel.py | _SnowMLModelHandler |
|
78
|
+
| torch.nn.Module | pytroch.py | _PyTorchHandler |
|
79
|
+
| torch.jit.ScriptModule | torchscript.py | _TorchScripthHandler |
|
74
80
|
"""
|
75
81
|
|
76
82
|
|
@@ -80,26 +86,23 @@ _ModelType = TypeVar("_ModelType", bound=SupportedModelType)
|
|
80
86
|
class DeployOptions(TypedDict):
|
81
87
|
"""Common Options for deploying to Snowflake.
|
82
88
|
|
83
|
-
|
84
|
-
Defaults to False.
|
89
|
+
disable_local_conda_resolver: Set to disable use local conda resolver to do pre-check on environment and rely on
|
90
|
+
the information schema only. Defaults to False.
|
85
91
|
keep_order: Whether or not preserve the row order when predicting. Only available for dataframe has fewer than 2**64
|
86
92
|
rows. Defaults to True.
|
87
|
-
|
88
|
-
|
89
|
-
_use_local_snowml: Use local SnowML when as the execution library of the deployment. If set to True, local SnowML
|
90
|
-
would be packed and uploaded to 1) session stage, if it is a temporary deployment, or 2) the provided stage path
|
91
|
-
if it is a permanent deployment. It should be set to True before SnowML available in Snowflake Anaconda Channel.
|
92
|
-
Default to False.
|
93
|
+
output_with_input_features: Whether or not preserve the input columns in the output when predicting.
|
94
|
+
Defaults to False.
|
93
95
|
"""
|
94
96
|
|
95
|
-
|
96
|
-
output_with_input_features: NotRequired[bool]
|
97
|
+
disable_local_conda_resolver: NotRequired[bool]
|
97
98
|
keep_order: NotRequired[bool]
|
99
|
+
output_with_input_features: NotRequired[bool]
|
98
100
|
|
99
101
|
|
100
102
|
class WarehouseDeployOptions(DeployOptions):
|
101
103
|
"""Options for deploying to the Snowflake Warehouse.
|
102
104
|
|
105
|
+
|
103
106
|
permanent_udf_stage_location: A Snowflake stage option where the UDF should be persisted. If specified, the model
|
104
107
|
will be deployed as a permanent UDF, otherwise temporary.
|
105
108
|
relax_version: Whether or not relax the version constraints of the dependencies if unresolvable. Defaults to False.
|
@@ -115,14 +118,16 @@ class WarehouseDeployOptions(DeployOptions):
|
|
115
118
|
class ModelSaveOption(TypedDict):
|
116
119
|
"""Options for saving the model.
|
117
120
|
|
121
|
+
embed_local_ml_library: Embedding local SnowML into the code directory of the folder.
|
118
122
|
allow_overwritten_stage_file: Flag to indicate when saving the model as a stage file, whether overwriting existed
|
119
123
|
file is allowed. Default to False.
|
120
124
|
"""
|
121
125
|
|
126
|
+
embed_local_ml_library: NotRequired[bool]
|
122
127
|
allow_overwritten_stage_file: NotRequired[bool]
|
123
128
|
|
124
129
|
|
125
|
-
class CustomModelSaveOption(
|
130
|
+
class CustomModelSaveOption(ModelSaveOption):
|
126
131
|
...
|
127
132
|
|
128
133
|
|
@@ -136,3 +141,11 @@ class XGBModelSaveOptions(ModelSaveOption):
|
|
136
141
|
|
137
142
|
class SNOWModelSaveOptions(ModelSaveOption):
|
138
143
|
target_methods: NotRequired[Sequence[str]]
|
144
|
+
|
145
|
+
|
146
|
+
class PyTorchSaveOptions(ModelSaveOption):
|
147
|
+
target_methods: NotRequired[Sequence[str]]
|
148
|
+
|
149
|
+
|
150
|
+
class TorchScriptSaveOptions(ModelSaveOption):
|
151
|
+
target_methods: NotRequired[Sequence[str]]
|
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -247,7 +249,6 @@ class CalibratedClassifierCV(BaseTransformer):
|
|
247
249
|
sample_weight_col: Optional[str] = None,
|
248
250
|
) -> None:
|
249
251
|
super().__init__()
|
250
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
251
252
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
252
253
|
deps = deps | _gather_dependencies(estimator)
|
253
254
|
deps = deps | _gather_dependencies(base_estimator)
|
@@ -274,6 +275,15 @@ class CalibratedClassifierCV(BaseTransformer):
|
|
274
275
|
self.set_drop_input_cols(drop_input_cols)
|
275
276
|
self.set_sample_weight_col(sample_weight_col)
|
276
277
|
|
278
|
+
def _get_rand_id(self) -> str:
|
279
|
+
"""
|
280
|
+
Generate random id to be used in sproc and stage names.
|
281
|
+
|
282
|
+
Returns:
|
283
|
+
Random id string usable in sproc, table, and stage names.
|
284
|
+
"""
|
285
|
+
return str(uuid4()).replace("-", "_").upper()
|
286
|
+
|
277
287
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
278
288
|
"""
|
279
289
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -352,7 +362,7 @@ class CalibratedClassifierCV(BaseTransformer):
|
|
352
362
|
cp.dump(self._sklearn_object, local_transform_file)
|
353
363
|
|
354
364
|
# Create temp stage to run fit.
|
355
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
365
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
356
366
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
357
367
|
SqlResultValidator(
|
358
368
|
session=session,
|
@@ -365,11 +375,12 @@ class CalibratedClassifierCV(BaseTransformer):
|
|
365
375
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
366
376
|
).validate()
|
367
377
|
|
368
|
-
|
378
|
+
# Use posixpath to construct stage paths
|
379
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
380
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
369
381
|
local_result_file_name = get_temp_file_path()
|
370
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
371
382
|
|
372
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
383
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
373
384
|
statement_params = telemetry.get_function_usage_statement_params(
|
374
385
|
project=_PROJECT,
|
375
386
|
subproject=_SUBPROJECT,
|
@@ -395,6 +406,7 @@ class CalibratedClassifierCV(BaseTransformer):
|
|
395
406
|
replace=True,
|
396
407
|
session=session,
|
397
408
|
statement_params=statement_params,
|
409
|
+
anonymous=True
|
398
410
|
)
|
399
411
|
def fit_wrapper_sproc(
|
400
412
|
session: Session,
|
@@ -403,7 +415,8 @@ class CalibratedClassifierCV(BaseTransformer):
|
|
403
415
|
stage_result_file_name: str,
|
404
416
|
input_cols: List[str],
|
405
417
|
label_cols: List[str],
|
406
|
-
sample_weight_col: Optional[str]
|
418
|
+
sample_weight_col: Optional[str],
|
419
|
+
statement_params: Dict[str, str]
|
407
420
|
) -> str:
|
408
421
|
import cloudpickle as cp
|
409
422
|
import numpy as np
|
@@ -470,15 +483,15 @@ class CalibratedClassifierCV(BaseTransformer):
|
|
470
483
|
api_calls=[Session.call],
|
471
484
|
custom_tags=dict([("autogen", True)]),
|
472
485
|
)
|
473
|
-
sproc_export_file_name =
|
474
|
-
|
486
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
487
|
+
session,
|
475
488
|
query,
|
476
489
|
stage_transform_file_name,
|
477
490
|
stage_result_file_name,
|
478
491
|
identifier.get_unescaped_names(self.input_cols),
|
479
492
|
identifier.get_unescaped_names(self.label_cols),
|
480
493
|
identifier.get_unescaped_names(self.sample_weight_col),
|
481
|
-
statement_params
|
494
|
+
statement_params,
|
482
495
|
)
|
483
496
|
|
484
497
|
if "|" in sproc_export_file_name:
|
@@ -488,7 +501,7 @@ class CalibratedClassifierCV(BaseTransformer):
|
|
488
501
|
print("\n".join(fields[1:]))
|
489
502
|
|
490
503
|
session.file.get(
|
491
|
-
|
504
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
492
505
|
local_result_file_name,
|
493
506
|
statement_params=statement_params
|
494
507
|
)
|
@@ -534,7 +547,7 @@ class CalibratedClassifierCV(BaseTransformer):
|
|
534
547
|
|
535
548
|
# Register vectorized UDF for batch inference
|
536
549
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
537
|
-
safe_id=self.
|
550
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
538
551
|
|
539
552
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
540
553
|
# will try to pickle all of self which fails.
|
@@ -626,7 +639,7 @@ class CalibratedClassifierCV(BaseTransformer):
|
|
626
639
|
return transformed_pandas_df.to_dict("records")
|
627
640
|
|
628
641
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
629
|
-
safe_id=self.
|
642
|
+
safe_id=self._get_rand_id()
|
630
643
|
)
|
631
644
|
|
632
645
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -682,26 +695,37 @@ class CalibratedClassifierCV(BaseTransformer):
|
|
682
695
|
# input cols need to match unquoted / quoted
|
683
696
|
input_cols = self.input_cols
|
684
697
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
698
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
685
699
|
|
686
700
|
estimator = self._sklearn_object
|
687
701
|
|
688
|
-
|
689
|
-
|
690
|
-
|
691
|
-
|
692
|
-
|
693
|
-
|
694
|
-
|
695
|
-
|
696
|
-
|
697
|
-
|
698
|
-
|
699
|
-
|
700
|
-
|
701
|
-
|
702
|
-
|
703
|
-
|
704
|
-
|
702
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
703
|
+
missing_features = []
|
704
|
+
features_in_dataset = set(dataset.columns)
|
705
|
+
columns_to_select = []
|
706
|
+
for i, f in enumerate(features_required_by_estimator):
|
707
|
+
if (
|
708
|
+
i >= len(input_cols)
|
709
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
710
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
711
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
712
|
+
):
|
713
|
+
missing_features.append(f)
|
714
|
+
elif input_cols[i] in features_in_dataset:
|
715
|
+
columns_to_select.append(input_cols[i])
|
716
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
717
|
+
columns_to_select.append(unquoted_input_cols[i])
|
718
|
+
else:
|
719
|
+
columns_to_select.append(quoted_input_cols[i])
|
720
|
+
|
721
|
+
if len(missing_features) > 0:
|
722
|
+
raise ValueError(
|
723
|
+
"The feature names should match with those that were passed during fit.\n"
|
724
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
725
|
+
f"Features in the input dataframe : {input_cols}\n"
|
726
|
+
)
|
727
|
+
input_df = dataset[columns_to_select]
|
728
|
+
input_df.columns = features_required_by_estimator
|
705
729
|
|
706
730
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
707
731
|
input_df
|
@@ -782,11 +806,18 @@ class CalibratedClassifierCV(BaseTransformer):
|
|
782
806
|
Transformed dataset.
|
783
807
|
"""
|
784
808
|
if isinstance(dataset, DataFrame):
|
809
|
+
expected_type_inferred = ""
|
810
|
+
# when it is classifier, infer the datatype from label columns
|
811
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
812
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
813
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
814
|
+
)
|
815
|
+
|
785
816
|
output_df = self._batch_inference(
|
786
817
|
dataset=dataset,
|
787
818
|
inference_method="predict",
|
788
819
|
expected_output_cols_list=self.output_cols,
|
789
|
-
expected_output_cols_type=
|
820
|
+
expected_output_cols_type=expected_type_inferred,
|
790
821
|
)
|
791
822
|
elif isinstance(dataset, pd.DataFrame):
|
792
823
|
output_df = self._sklearn_inference(
|
@@ -857,10 +888,10 @@ class CalibratedClassifierCV(BaseTransformer):
|
|
857
888
|
|
858
889
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
859
890
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
860
|
-
Returns
|
891
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
861
892
|
"""
|
862
893
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
863
|
-
return []
|
894
|
+
return [output_cols_prefix]
|
864
895
|
|
865
896
|
classes = self._sklearn_object.classes_
|
866
897
|
if isinstance(classes, numpy.ndarray):
|
@@ -1089,7 +1120,7 @@ class CalibratedClassifierCV(BaseTransformer):
|
|
1089
1120
|
cp.dump(self._sklearn_object, local_score_file)
|
1090
1121
|
|
1091
1122
|
# Create temp stage to run score.
|
1092
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1123
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1093
1124
|
session = dataset._session
|
1094
1125
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1095
1126
|
SqlResultValidator(
|
@@ -1103,8 +1134,9 @@ class CalibratedClassifierCV(BaseTransformer):
|
|
1103
1134
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1104
1135
|
).validate()
|
1105
1136
|
|
1106
|
-
|
1107
|
-
|
1137
|
+
# Use posixpath to construct stage paths
|
1138
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1139
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1108
1140
|
statement_params = telemetry.get_function_usage_statement_params(
|
1109
1141
|
project=_PROJECT,
|
1110
1142
|
subproject=_SUBPROJECT,
|
@@ -1130,6 +1162,7 @@ class CalibratedClassifierCV(BaseTransformer):
|
|
1130
1162
|
replace=True,
|
1131
1163
|
session=session,
|
1132
1164
|
statement_params=statement_params,
|
1165
|
+
anonymous=True
|
1133
1166
|
)
|
1134
1167
|
def score_wrapper_sproc(
|
1135
1168
|
session: Session,
|
@@ -1137,7 +1170,8 @@ class CalibratedClassifierCV(BaseTransformer):
|
|
1137
1170
|
stage_score_file_name: str,
|
1138
1171
|
input_cols: List[str],
|
1139
1172
|
label_cols: List[str],
|
1140
|
-
sample_weight_col: Optional[str]
|
1173
|
+
sample_weight_col: Optional[str],
|
1174
|
+
statement_params: Dict[str, str]
|
1141
1175
|
) -> float:
|
1142
1176
|
import cloudpickle as cp
|
1143
1177
|
import numpy as np
|
@@ -1187,14 +1221,14 @@ class CalibratedClassifierCV(BaseTransformer):
|
|
1187
1221
|
api_calls=[Session.call],
|
1188
1222
|
custom_tags=dict([("autogen", True)]),
|
1189
1223
|
)
|
1190
|
-
score =
|
1191
|
-
|
1224
|
+
score = score_wrapper_sproc(
|
1225
|
+
session,
|
1192
1226
|
query,
|
1193
1227
|
stage_score_file_name,
|
1194
1228
|
identifier.get_unescaped_names(self.input_cols),
|
1195
1229
|
identifier.get_unescaped_names(self.label_cols),
|
1196
1230
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1197
|
-
statement_params
|
1231
|
+
statement_params,
|
1198
1232
|
)
|
1199
1233
|
|
1200
1234
|
cleanup_temp_files([local_score_file_name])
|
@@ -1212,18 +1246,20 @@ class CalibratedClassifierCV(BaseTransformer):
|
|
1212
1246
|
if self._sklearn_object._estimator_type == 'classifier':
|
1213
1247
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1214
1248
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1215
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1249
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1250
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1216
1251
|
# For regressor, the type of predict is float64
|
1217
1252
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1218
1253
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1219
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1220
|
-
|
1254
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1255
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1221
1256
|
for prob_func in PROB_FUNCTIONS:
|
1222
1257
|
if hasattr(self, prob_func):
|
1223
1258
|
output_cols_prefix: str = f"{prob_func}_"
|
1224
1259
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1225
1260
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1226
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1261
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1262
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1227
1263
|
|
1228
1264
|
@property
|
1229
1265
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|