snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +2 -1
- snowflake/ml/_internal/file_utils.py +29 -7
- snowflake/ml/_internal/telemetry.py +5 -8
- snowflake/ml/_internal/utils/uri.py +7 -2
- snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
- snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
- snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
- snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
- snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
- snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
- snowflake/ml/model/_deployer.py +14 -27
- snowflake/ml/model/_env.py +4 -4
- snowflake/ml/model/_handlers/custom.py +14 -2
- snowflake/ml/model/_handlers/pytorch.py +186 -0
- snowflake/ml/model/_handlers/sklearn.py +14 -9
- snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
- snowflake/ml/model/_handlers/torchscript.py +180 -0
- snowflake/ml/model/_handlers/xgboost.py +19 -9
- snowflake/ml/model/_model.py +3 -2
- snowflake/ml/model/_model_meta.py +12 -7
- snowflake/ml/model/model_signature.py +446 -66
- snowflake/ml/model/type_hints.py +23 -4
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
- snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
- snowflake/ml/modeling/cluster/birch.py +51 -26
- snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
- snowflake/ml/modeling/cluster/dbscan.py +51 -26
- snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
- snowflake/ml/modeling/cluster/k_means.py +51 -26
- snowflake/ml/modeling/cluster/mean_shift.py +51 -26
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
- snowflake/ml/modeling/cluster/optics.py +51 -26
- snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
- snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
- snowflake/ml/modeling/compose/column_transformer.py +51 -26
- snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
- snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
- snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
- snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
- snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
- snowflake/ml/modeling/covariance/oas.py +51 -26
- snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
- snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
- snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
- snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
- snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/pca.py +51 -26
- snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
- snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
- snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
- snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
- snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
- snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
- snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
- snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
- snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
- snowflake/ml/modeling/impute/knn_imputer.py +51 -26
- snowflake/ml/modeling/impute/missing_indicator.py +51 -26
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/lars.py +51 -26
- snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
- snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/perceptron.py +51 -26
- snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/ridge.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
- snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
- snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
- snowflake/ml/modeling/manifold/isomap.py +51 -26
- snowflake/ml/modeling/manifold/mds.py +51 -26
- snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
- snowflake/ml/modeling/manifold/tsne.py +51 -26
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
- snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
- snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
- snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
- snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
- snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
- snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
- snowflake/ml/modeling/svm/linear_svc.py +51 -26
- snowflake/ml/modeling/svm/linear_svr.py +51 -26
- snowflake/ml/modeling/svm/nu_svc.py +51 -26
- snowflake/ml/modeling/svm/nu_svr.py +51 -26
- snowflake/ml/modeling/svm/svc.py +51 -26
- snowflake/ml/modeling/svm/svr.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
- snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
- snowflake/ml/registry/model_registry.py +74 -56
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
- snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
- snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
- {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
snowflake/ml/model/type_hints.py
CHANGED
@@ -9,6 +9,8 @@ if TYPE_CHECKING:
|
|
9
9
|
import pandas as pd
|
10
10
|
import sklearn.base
|
11
11
|
import sklearn.pipeline
|
12
|
+
import tensorflow
|
13
|
+
import torch
|
12
14
|
import xgboost
|
13
15
|
|
14
16
|
import snowflake.ml.model.custom_model
|
@@ -34,9 +36,10 @@ _SupportedNumpyDtype = Union[
|
|
34
36
|
]
|
35
37
|
_SupportedNumpyArray = npt.NDArray[_SupportedNumpyDtype]
|
36
38
|
_SupportedBuiltinsList = Sequence[_SupportedBuiltins]
|
39
|
+
_SupportedArrayLike = Union[_SupportedNumpyArray, "torch.Tensor", "tensorflow.Tensor", "tensorflow.Variable"]
|
37
40
|
|
38
41
|
SupportedLocalDataType = Union[
|
39
|
-
"pd.DataFrame", _SupportedNumpyArray, Sequence[
|
42
|
+
"pd.DataFrame", _SupportedNumpyArray, Sequence[_SupportedArrayLike], _SupportedBuiltinsList
|
40
43
|
]
|
41
44
|
|
42
45
|
SupportedDataType = Union[SupportedLocalDataType, "snowflake.snowpark.DataFrame"]
|
@@ -51,6 +54,8 @@ SupportedLocalModelType = Union[
|
|
51
54
|
"sklearn.pipeline.Pipeline",
|
52
55
|
"xgboost.XGBModel",
|
53
56
|
"xgboost.Booster",
|
57
|
+
"torch.nn.Module",
|
58
|
+
"torch.jit.ScriptModule", # type:ignore[name-defined]
|
54
59
|
]
|
55
60
|
|
56
61
|
SupportedSnowMLModelType: TypeAlias = "base.BaseEstimator"
|
@@ -70,6 +75,8 @@ Here is all acceptable types of Snowflake native model packaging and its handler
|
|
70
75
|
| xgboost.XGBModel | xgboost.py | _XGBModelHandler |
|
71
76
|
| xgboost.Booster | xgboost.py | _XGBModelHandler |
|
72
77
|
| snowflake.ml.framework.base.BaseEstimator | snowmlmodel.py | _SnowMLModelHandler |
|
78
|
+
| torch.nn.Module | pytroch.py | _PyTorchHandler |
|
79
|
+
| torch.jit.ScriptModule | torchscript.py | _TorchScripthHandler |
|
73
80
|
"""
|
74
81
|
|
75
82
|
|
@@ -79,19 +86,23 @@ _ModelType = TypeVar("_ModelType", bound=SupportedModelType)
|
|
79
86
|
class DeployOptions(TypedDict):
|
80
87
|
"""Common Options for deploying to Snowflake.
|
81
88
|
|
82
|
-
|
83
|
-
Defaults to False.
|
89
|
+
disable_local_conda_resolver: Set to disable use local conda resolver to do pre-check on environment and rely on
|
90
|
+
the information schema only. Defaults to False.
|
84
91
|
keep_order: Whether or not preserve the row order when predicting. Only available for dataframe has fewer than 2**64
|
85
92
|
rows. Defaults to True.
|
93
|
+
output_with_input_features: Whether or not preserve the input columns in the output when predicting.
|
94
|
+
Defaults to False.
|
86
95
|
"""
|
87
96
|
|
88
|
-
|
97
|
+
disable_local_conda_resolver: NotRequired[bool]
|
89
98
|
keep_order: NotRequired[bool]
|
99
|
+
output_with_input_features: NotRequired[bool]
|
90
100
|
|
91
101
|
|
92
102
|
class WarehouseDeployOptions(DeployOptions):
|
93
103
|
"""Options for deploying to the Snowflake Warehouse.
|
94
104
|
|
105
|
+
|
95
106
|
permanent_udf_stage_location: A Snowflake stage option where the UDF should be persisted. If specified, the model
|
96
107
|
will be deployed as a permanent UDF, otherwise temporary.
|
97
108
|
relax_version: Whether or not relax the version constraints of the dependencies if unresolvable. Defaults to False.
|
@@ -130,3 +141,11 @@ class XGBModelSaveOptions(ModelSaveOption):
|
|
130
141
|
|
131
142
|
class SNOWModelSaveOptions(ModelSaveOption):
|
132
143
|
target_methods: NotRequired[Sequence[str]]
|
144
|
+
|
145
|
+
|
146
|
+
class PyTorchSaveOptions(ModelSaveOption):
|
147
|
+
target_methods: NotRequired[Sequence[str]]
|
148
|
+
|
149
|
+
|
150
|
+
class TorchScriptSaveOptions(ModelSaveOption):
|
151
|
+
target_methods: NotRequired[Sequence[str]]
|
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -247,7 +249,6 @@ class CalibratedClassifierCV(BaseTransformer):
|
|
247
249
|
sample_weight_col: Optional[str] = None,
|
248
250
|
) -> None:
|
249
251
|
super().__init__()
|
250
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
251
252
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
252
253
|
deps = deps | _gather_dependencies(estimator)
|
253
254
|
deps = deps | _gather_dependencies(base_estimator)
|
@@ -274,6 +275,15 @@ class CalibratedClassifierCV(BaseTransformer):
|
|
274
275
|
self.set_drop_input_cols(drop_input_cols)
|
275
276
|
self.set_sample_weight_col(sample_weight_col)
|
276
277
|
|
278
|
+
def _get_rand_id(self) -> str:
|
279
|
+
"""
|
280
|
+
Generate random id to be used in sproc and stage names.
|
281
|
+
|
282
|
+
Returns:
|
283
|
+
Random id string usable in sproc, table, and stage names.
|
284
|
+
"""
|
285
|
+
return str(uuid4()).replace("-", "_").upper()
|
286
|
+
|
277
287
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
278
288
|
"""
|
279
289
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -352,7 +362,7 @@ class CalibratedClassifierCV(BaseTransformer):
|
|
352
362
|
cp.dump(self._sklearn_object, local_transform_file)
|
353
363
|
|
354
364
|
# Create temp stage to run fit.
|
355
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
365
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
356
366
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
357
367
|
SqlResultValidator(
|
358
368
|
session=session,
|
@@ -365,11 +375,12 @@ class CalibratedClassifierCV(BaseTransformer):
|
|
365
375
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
366
376
|
).validate()
|
367
377
|
|
368
|
-
|
378
|
+
# Use posixpath to construct stage paths
|
379
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
380
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
369
381
|
local_result_file_name = get_temp_file_path()
|
370
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
371
382
|
|
372
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
383
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
373
384
|
statement_params = telemetry.get_function_usage_statement_params(
|
374
385
|
project=_PROJECT,
|
375
386
|
subproject=_SUBPROJECT,
|
@@ -395,6 +406,7 @@ class CalibratedClassifierCV(BaseTransformer):
|
|
395
406
|
replace=True,
|
396
407
|
session=session,
|
397
408
|
statement_params=statement_params,
|
409
|
+
anonymous=True
|
398
410
|
)
|
399
411
|
def fit_wrapper_sproc(
|
400
412
|
session: Session,
|
@@ -403,7 +415,8 @@ class CalibratedClassifierCV(BaseTransformer):
|
|
403
415
|
stage_result_file_name: str,
|
404
416
|
input_cols: List[str],
|
405
417
|
label_cols: List[str],
|
406
|
-
sample_weight_col: Optional[str]
|
418
|
+
sample_weight_col: Optional[str],
|
419
|
+
statement_params: Dict[str, str]
|
407
420
|
) -> str:
|
408
421
|
import cloudpickle as cp
|
409
422
|
import numpy as np
|
@@ -470,15 +483,15 @@ class CalibratedClassifierCV(BaseTransformer):
|
|
470
483
|
api_calls=[Session.call],
|
471
484
|
custom_tags=dict([("autogen", True)]),
|
472
485
|
)
|
473
|
-
sproc_export_file_name =
|
474
|
-
|
486
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
487
|
+
session,
|
475
488
|
query,
|
476
489
|
stage_transform_file_name,
|
477
490
|
stage_result_file_name,
|
478
491
|
identifier.get_unescaped_names(self.input_cols),
|
479
492
|
identifier.get_unescaped_names(self.label_cols),
|
480
493
|
identifier.get_unescaped_names(self.sample_weight_col),
|
481
|
-
statement_params
|
494
|
+
statement_params,
|
482
495
|
)
|
483
496
|
|
484
497
|
if "|" in sproc_export_file_name:
|
@@ -488,7 +501,7 @@ class CalibratedClassifierCV(BaseTransformer):
|
|
488
501
|
print("\n".join(fields[1:]))
|
489
502
|
|
490
503
|
session.file.get(
|
491
|
-
|
504
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
492
505
|
local_result_file_name,
|
493
506
|
statement_params=statement_params
|
494
507
|
)
|
@@ -534,7 +547,7 @@ class CalibratedClassifierCV(BaseTransformer):
|
|
534
547
|
|
535
548
|
# Register vectorized UDF for batch inference
|
536
549
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
537
|
-
safe_id=self.
|
550
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
538
551
|
|
539
552
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
540
553
|
# will try to pickle all of self which fails.
|
@@ -626,7 +639,7 @@ class CalibratedClassifierCV(BaseTransformer):
|
|
626
639
|
return transformed_pandas_df.to_dict("records")
|
627
640
|
|
628
641
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
629
|
-
safe_id=self.
|
642
|
+
safe_id=self._get_rand_id()
|
630
643
|
)
|
631
644
|
|
632
645
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -793,11 +806,18 @@ class CalibratedClassifierCV(BaseTransformer):
|
|
793
806
|
Transformed dataset.
|
794
807
|
"""
|
795
808
|
if isinstance(dataset, DataFrame):
|
809
|
+
expected_type_inferred = ""
|
810
|
+
# when it is classifier, infer the datatype from label columns
|
811
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
812
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
813
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
814
|
+
)
|
815
|
+
|
796
816
|
output_df = self._batch_inference(
|
797
817
|
dataset=dataset,
|
798
818
|
inference_method="predict",
|
799
819
|
expected_output_cols_list=self.output_cols,
|
800
|
-
expected_output_cols_type=
|
820
|
+
expected_output_cols_type=expected_type_inferred,
|
801
821
|
)
|
802
822
|
elif isinstance(dataset, pd.DataFrame):
|
803
823
|
output_df = self._sklearn_inference(
|
@@ -868,10 +888,10 @@ class CalibratedClassifierCV(BaseTransformer):
|
|
868
888
|
|
869
889
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
870
890
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
871
|
-
Returns
|
891
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
872
892
|
"""
|
873
893
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
874
|
-
return []
|
894
|
+
return [output_cols_prefix]
|
875
895
|
|
876
896
|
classes = self._sklearn_object.classes_
|
877
897
|
if isinstance(classes, numpy.ndarray):
|
@@ -1100,7 +1120,7 @@ class CalibratedClassifierCV(BaseTransformer):
|
|
1100
1120
|
cp.dump(self._sklearn_object, local_score_file)
|
1101
1121
|
|
1102
1122
|
# Create temp stage to run score.
|
1103
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1123
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1104
1124
|
session = dataset._session
|
1105
1125
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1106
1126
|
SqlResultValidator(
|
@@ -1114,8 +1134,9 @@ class CalibratedClassifierCV(BaseTransformer):
|
|
1114
1134
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1115
1135
|
).validate()
|
1116
1136
|
|
1117
|
-
|
1118
|
-
|
1137
|
+
# Use posixpath to construct stage paths
|
1138
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1139
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1119
1140
|
statement_params = telemetry.get_function_usage_statement_params(
|
1120
1141
|
project=_PROJECT,
|
1121
1142
|
subproject=_SUBPROJECT,
|
@@ -1141,6 +1162,7 @@ class CalibratedClassifierCV(BaseTransformer):
|
|
1141
1162
|
replace=True,
|
1142
1163
|
session=session,
|
1143
1164
|
statement_params=statement_params,
|
1165
|
+
anonymous=True
|
1144
1166
|
)
|
1145
1167
|
def score_wrapper_sproc(
|
1146
1168
|
session: Session,
|
@@ -1148,7 +1170,8 @@ class CalibratedClassifierCV(BaseTransformer):
|
|
1148
1170
|
stage_score_file_name: str,
|
1149
1171
|
input_cols: List[str],
|
1150
1172
|
label_cols: List[str],
|
1151
|
-
sample_weight_col: Optional[str]
|
1173
|
+
sample_weight_col: Optional[str],
|
1174
|
+
statement_params: Dict[str, str]
|
1152
1175
|
) -> float:
|
1153
1176
|
import cloudpickle as cp
|
1154
1177
|
import numpy as np
|
@@ -1198,14 +1221,14 @@ class CalibratedClassifierCV(BaseTransformer):
|
|
1198
1221
|
api_calls=[Session.call],
|
1199
1222
|
custom_tags=dict([("autogen", True)]),
|
1200
1223
|
)
|
1201
|
-
score =
|
1202
|
-
|
1224
|
+
score = score_wrapper_sproc(
|
1225
|
+
session,
|
1203
1226
|
query,
|
1204
1227
|
stage_score_file_name,
|
1205
1228
|
identifier.get_unescaped_names(self.input_cols),
|
1206
1229
|
identifier.get_unescaped_names(self.label_cols),
|
1207
1230
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1208
|
-
statement_params
|
1231
|
+
statement_params,
|
1209
1232
|
)
|
1210
1233
|
|
1211
1234
|
cleanup_temp_files([local_score_file_name])
|
@@ -1223,18 +1246,20 @@ class CalibratedClassifierCV(BaseTransformer):
|
|
1223
1246
|
if self._sklearn_object._estimator_type == 'classifier':
|
1224
1247
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1225
1248
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1226
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1249
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1250
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1227
1251
|
# For regressor, the type of predict is float64
|
1228
1252
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1229
1253
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1230
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1231
|
-
|
1254
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1255
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1232
1256
|
for prob_func in PROB_FUNCTIONS:
|
1233
1257
|
if hasattr(self, prob_func):
|
1234
1258
|
output_cols_prefix: str = f"{prob_func}_"
|
1235
1259
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1236
1260
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1237
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1261
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1262
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1238
1263
|
|
1239
1264
|
@property
|
1240
1265
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|
@@ -7,6 +7,7 @@
|
|
7
7
|
#
|
8
8
|
import inspect
|
9
9
|
import os
|
10
|
+
import posixpath
|
10
11
|
from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
|
11
12
|
from uuid import uuid4
|
12
13
|
|
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
|
|
27
28
|
from snowflake.snowpark import DataFrame, Session
|
28
29
|
from snowflake.snowpark.functions import pandas_udf, sproc
|
29
30
|
from snowflake.snowpark.types import PandasSeries
|
31
|
+
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
30
32
|
|
31
33
|
from snowflake.ml.model.model_signature import (
|
32
34
|
DataType,
|
@@ -224,7 +226,6 @@ class AffinityPropagation(BaseTransformer):
|
|
224
226
|
sample_weight_col: Optional[str] = None,
|
225
227
|
) -> None:
|
226
228
|
super().__init__()
|
227
|
-
self.id = str(uuid4()).replace("-", "_").upper()
|
228
229
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
229
230
|
|
230
231
|
self._deps = list(deps)
|
@@ -251,6 +252,15 @@ class AffinityPropagation(BaseTransformer):
|
|
251
252
|
self.set_drop_input_cols(drop_input_cols)
|
252
253
|
self.set_sample_weight_col(sample_weight_col)
|
253
254
|
|
255
|
+
def _get_rand_id(self) -> str:
|
256
|
+
"""
|
257
|
+
Generate random id to be used in sproc and stage names.
|
258
|
+
|
259
|
+
Returns:
|
260
|
+
Random id string usable in sproc, table, and stage names.
|
261
|
+
"""
|
262
|
+
return str(uuid4()).replace("-", "_").upper()
|
263
|
+
|
254
264
|
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
255
265
|
"""
|
256
266
|
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
@@ -329,7 +339,7 @@ class AffinityPropagation(BaseTransformer):
|
|
329
339
|
cp.dump(self._sklearn_object, local_transform_file)
|
330
340
|
|
331
341
|
# Create temp stage to run fit.
|
332
|
-
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.
|
342
|
+
transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
|
333
343
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
|
334
344
|
SqlResultValidator(
|
335
345
|
session=session,
|
@@ -342,11 +352,12 @@ class AffinityPropagation(BaseTransformer):
|
|
342
352
|
expected_value=f"Stage area {transform_stage_name} successfully created."
|
343
353
|
).validate()
|
344
354
|
|
345
|
-
|
355
|
+
# Use posixpath to construct stage paths
|
356
|
+
stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
357
|
+
stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
346
358
|
local_result_file_name = get_temp_file_path()
|
347
|
-
stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
|
348
359
|
|
349
|
-
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.
|
360
|
+
fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
|
350
361
|
statement_params = telemetry.get_function_usage_statement_params(
|
351
362
|
project=_PROJECT,
|
352
363
|
subproject=_SUBPROJECT,
|
@@ -372,6 +383,7 @@ class AffinityPropagation(BaseTransformer):
|
|
372
383
|
replace=True,
|
373
384
|
session=session,
|
374
385
|
statement_params=statement_params,
|
386
|
+
anonymous=True
|
375
387
|
)
|
376
388
|
def fit_wrapper_sproc(
|
377
389
|
session: Session,
|
@@ -380,7 +392,8 @@ class AffinityPropagation(BaseTransformer):
|
|
380
392
|
stage_result_file_name: str,
|
381
393
|
input_cols: List[str],
|
382
394
|
label_cols: List[str],
|
383
|
-
sample_weight_col: Optional[str]
|
395
|
+
sample_weight_col: Optional[str],
|
396
|
+
statement_params: Dict[str, str]
|
384
397
|
) -> str:
|
385
398
|
import cloudpickle as cp
|
386
399
|
import numpy as np
|
@@ -447,15 +460,15 @@ class AffinityPropagation(BaseTransformer):
|
|
447
460
|
api_calls=[Session.call],
|
448
461
|
custom_tags=dict([("autogen", True)]),
|
449
462
|
)
|
450
|
-
sproc_export_file_name =
|
451
|
-
|
463
|
+
sproc_export_file_name = fit_wrapper_sproc(
|
464
|
+
session,
|
452
465
|
query,
|
453
466
|
stage_transform_file_name,
|
454
467
|
stage_result_file_name,
|
455
468
|
identifier.get_unescaped_names(self.input_cols),
|
456
469
|
identifier.get_unescaped_names(self.label_cols),
|
457
470
|
identifier.get_unescaped_names(self.sample_weight_col),
|
458
|
-
statement_params
|
471
|
+
statement_params,
|
459
472
|
)
|
460
473
|
|
461
474
|
if "|" in sproc_export_file_name:
|
@@ -465,7 +478,7 @@ class AffinityPropagation(BaseTransformer):
|
|
465
478
|
print("\n".join(fields[1:]))
|
466
479
|
|
467
480
|
session.file.get(
|
468
|
-
|
481
|
+
posixpath.join(stage_result_file_name, sproc_export_file_name),
|
469
482
|
local_result_file_name,
|
470
483
|
statement_params=statement_params
|
471
484
|
)
|
@@ -511,7 +524,7 @@ class AffinityPropagation(BaseTransformer):
|
|
511
524
|
|
512
525
|
# Register vectorized UDF for batch inference
|
513
526
|
batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
|
514
|
-
safe_id=self.
|
527
|
+
safe_id=self._get_rand_id(), method=inference_method)
|
515
528
|
|
516
529
|
# Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
|
517
530
|
# will try to pickle all of self which fails.
|
@@ -603,7 +616,7 @@ class AffinityPropagation(BaseTransformer):
|
|
603
616
|
return transformed_pandas_df.to_dict("records")
|
604
617
|
|
605
618
|
batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
|
606
|
-
safe_id=self.
|
619
|
+
safe_id=self._get_rand_id()
|
607
620
|
)
|
608
621
|
|
609
622
|
pass_through_columns = self._get_pass_through_columns(dataset)
|
@@ -770,11 +783,18 @@ class AffinityPropagation(BaseTransformer):
|
|
770
783
|
Transformed dataset.
|
771
784
|
"""
|
772
785
|
if isinstance(dataset, DataFrame):
|
786
|
+
expected_type_inferred = ""
|
787
|
+
# when it is classifier, infer the datatype from label columns
|
788
|
+
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
789
|
+
expected_type_inferred = convert_sp_to_sf_type(
|
790
|
+
self.model_signatures['predict'].outputs[0].as_snowpark_type()
|
791
|
+
)
|
792
|
+
|
773
793
|
output_df = self._batch_inference(
|
774
794
|
dataset=dataset,
|
775
795
|
inference_method="predict",
|
776
796
|
expected_output_cols_list=self.output_cols,
|
777
|
-
expected_output_cols_type=
|
797
|
+
expected_output_cols_type=expected_type_inferred,
|
778
798
|
)
|
779
799
|
elif isinstance(dataset, pd.DataFrame):
|
780
800
|
output_df = self._sklearn_inference(
|
@@ -845,10 +865,10 @@ class AffinityPropagation(BaseTransformer):
|
|
845
865
|
|
846
866
|
def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
|
847
867
|
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
848
|
-
Returns
|
868
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
849
869
|
"""
|
850
870
|
if getattr(self._sklearn_object, "classes_", None) is None:
|
851
|
-
return []
|
871
|
+
return [output_cols_prefix]
|
852
872
|
|
853
873
|
classes = self._sklearn_object.classes_
|
854
874
|
if isinstance(classes, numpy.ndarray):
|
@@ -1073,7 +1093,7 @@ class AffinityPropagation(BaseTransformer):
|
|
1073
1093
|
cp.dump(self._sklearn_object, local_score_file)
|
1074
1094
|
|
1075
1095
|
# Create temp stage to run score.
|
1076
|
-
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.
|
1096
|
+
score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1077
1097
|
session = dataset._session
|
1078
1098
|
stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
|
1079
1099
|
SqlResultValidator(
|
@@ -1087,8 +1107,9 @@ class AffinityPropagation(BaseTransformer):
|
|
1087
1107
|
expected_value=f"Stage area {score_stage_name} successfully created."
|
1088
1108
|
).validate()
|
1089
1109
|
|
1090
|
-
|
1091
|
-
|
1110
|
+
# Use posixpath to construct stage paths
|
1111
|
+
stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
|
1112
|
+
score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
|
1092
1113
|
statement_params = telemetry.get_function_usage_statement_params(
|
1093
1114
|
project=_PROJECT,
|
1094
1115
|
subproject=_SUBPROJECT,
|
@@ -1114,6 +1135,7 @@ class AffinityPropagation(BaseTransformer):
|
|
1114
1135
|
replace=True,
|
1115
1136
|
session=session,
|
1116
1137
|
statement_params=statement_params,
|
1138
|
+
anonymous=True
|
1117
1139
|
)
|
1118
1140
|
def score_wrapper_sproc(
|
1119
1141
|
session: Session,
|
@@ -1121,7 +1143,8 @@ class AffinityPropagation(BaseTransformer):
|
|
1121
1143
|
stage_score_file_name: str,
|
1122
1144
|
input_cols: List[str],
|
1123
1145
|
label_cols: List[str],
|
1124
|
-
sample_weight_col: Optional[str]
|
1146
|
+
sample_weight_col: Optional[str],
|
1147
|
+
statement_params: Dict[str, str]
|
1125
1148
|
) -> float:
|
1126
1149
|
import cloudpickle as cp
|
1127
1150
|
import numpy as np
|
@@ -1171,14 +1194,14 @@ class AffinityPropagation(BaseTransformer):
|
|
1171
1194
|
api_calls=[Session.call],
|
1172
1195
|
custom_tags=dict([("autogen", True)]),
|
1173
1196
|
)
|
1174
|
-
score =
|
1175
|
-
|
1197
|
+
score = score_wrapper_sproc(
|
1198
|
+
session,
|
1176
1199
|
query,
|
1177
1200
|
stage_score_file_name,
|
1178
1201
|
identifier.get_unescaped_names(self.input_cols),
|
1179
1202
|
identifier.get_unescaped_names(self.label_cols),
|
1180
1203
|
identifier.get_unescaped_names(self.sample_weight_col),
|
1181
|
-
statement_params
|
1204
|
+
statement_params,
|
1182
1205
|
)
|
1183
1206
|
|
1184
1207
|
cleanup_temp_files([local_score_file_name])
|
@@ -1196,18 +1219,20 @@ class AffinityPropagation(BaseTransformer):
|
|
1196
1219
|
if self._sklearn_object._estimator_type == 'classifier':
|
1197
1220
|
outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
|
1198
1221
|
outputs = _rename_features(outputs, self.output_cols) # rename the output columns
|
1199
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1222
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1223
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1200
1224
|
# For regressor, the type of predict is float64
|
1201
1225
|
elif self._sklearn_object._estimator_type == 'regressor':
|
1202
1226
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1203
|
-
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1204
|
-
|
1227
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
1228
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1205
1229
|
for prob_func in PROB_FUNCTIONS:
|
1206
1230
|
if hasattr(self, prob_func):
|
1207
1231
|
output_cols_prefix: str = f"{prob_func}_"
|
1208
1232
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1209
1233
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1210
|
-
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1234
|
+
self._model_signature_dict[prob_func] = ModelSignature(inputs,
|
1235
|
+
([] if self._drop_input_cols else inputs) + outputs)
|
1211
1236
|
|
1212
1237
|
@property
|
1213
1238
|
def model_signatures(self) -> Dict[str, ModelSignature]:
|