snowflake-ml-python 1.2.0__py3-none-any.whl → 1.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +16 -13
- snowflake/ml/_internal/exceptions/modeling_error_messages.py +5 -1
- snowflake/ml/_internal/telemetry.py +19 -0
- snowflake/ml/feature_store/__init__.py +9 -0
- snowflake/ml/feature_store/entity.py +73 -0
- snowflake/ml/feature_store/feature_store.py +1657 -0
- snowflake/ml/feature_store/feature_view.py +459 -0
- snowflake/ml/model/_client/ops/model_ops.py +16 -38
- snowflake/ml/model/_client/sql/model.py +1 -7
- snowflake/ml/model/_client/sql/model_version.py +20 -15
- snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +9 -1
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +2 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +12 -2
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +7 -3
- snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +1 -6
- snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +0 -2
- snowflake/ml/model/_model_composer/model_runtime/_runtime_requirements.py +10 -1
- snowflake/ml/model/_model_composer/model_runtime/model_runtime.py +0 -2
- snowflake/ml/model/_packager/model_meta/_core_requirements.py +11 -1
- snowflake/ml/model/_packager/model_meta/_packaging_requirements.py +3 -0
- snowflake/ml/model/_packager/model_meta/model_meta.py +17 -3
- snowflake/ml/model/model_signature.py +72 -16
- snowflake/ml/model/type_hints.py +12 -0
- snowflake/ml/modeling/_internal/estimator_protocols.py +1 -41
- snowflake/ml/modeling/_internal/model_trainer_builder.py +13 -9
- snowflake/ml/modeling/_internal/{distributed_hpo_trainer.py → snowpark_implementations/distributed_hpo_trainer.py} +66 -96
- snowflake/ml/modeling/_internal/{snowpark_handlers.py → snowpark_implementations/snowpark_handlers.py} +9 -6
- snowflake/ml/modeling/_internal/{xgboost_external_memory_trainer.py → snowpark_implementations/xgboost_external_memory_trainer.py} +3 -1
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +19 -3
- snowflake/ml/modeling/cluster/affinity_propagation.py +19 -3
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +19 -3
- snowflake/ml/modeling/cluster/birch.py +19 -3
- snowflake/ml/modeling/cluster/bisecting_k_means.py +19 -3
- snowflake/ml/modeling/cluster/dbscan.py +19 -3
- snowflake/ml/modeling/cluster/feature_agglomeration.py +19 -3
- snowflake/ml/modeling/cluster/k_means.py +19 -3
- snowflake/ml/modeling/cluster/mean_shift.py +19 -3
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +19 -3
- snowflake/ml/modeling/cluster/optics.py +19 -3
- snowflake/ml/modeling/cluster/spectral_biclustering.py +19 -3
- snowflake/ml/modeling/cluster/spectral_clustering.py +19 -3
- snowflake/ml/modeling/cluster/spectral_coclustering.py +19 -3
- snowflake/ml/modeling/compose/column_transformer.py +19 -3
- snowflake/ml/modeling/compose/transformed_target_regressor.py +19 -3
- snowflake/ml/modeling/covariance/elliptic_envelope.py +19 -3
- snowflake/ml/modeling/covariance/empirical_covariance.py +19 -3
- snowflake/ml/modeling/covariance/graphical_lasso.py +19 -3
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +19 -3
- snowflake/ml/modeling/covariance/ledoit_wolf.py +19 -3
- snowflake/ml/modeling/covariance/min_cov_det.py +19 -3
- snowflake/ml/modeling/covariance/oas.py +19 -3
- snowflake/ml/modeling/covariance/shrunk_covariance.py +19 -3
- snowflake/ml/modeling/decomposition/dictionary_learning.py +19 -3
- snowflake/ml/modeling/decomposition/factor_analysis.py +19 -3
- snowflake/ml/modeling/decomposition/fast_ica.py +19 -3
- snowflake/ml/modeling/decomposition/incremental_pca.py +19 -3
- snowflake/ml/modeling/decomposition/kernel_pca.py +19 -3
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +19 -3
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +19 -3
- snowflake/ml/modeling/decomposition/pca.py +19 -3
- snowflake/ml/modeling/decomposition/sparse_pca.py +19 -3
- snowflake/ml/modeling/decomposition/truncated_svd.py +19 -3
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +19 -3
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +19 -3
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +19 -3
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +19 -3
- snowflake/ml/modeling/ensemble/bagging_classifier.py +19 -3
- snowflake/ml/modeling/ensemble/bagging_regressor.py +19 -3
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +19 -3
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +19 -3
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +19 -3
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +19 -3
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +19 -3
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +19 -3
- snowflake/ml/modeling/ensemble/isolation_forest.py +19 -3
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +19 -3
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +19 -3
- snowflake/ml/modeling/ensemble/stacking_regressor.py +19 -3
- snowflake/ml/modeling/ensemble/voting_classifier.py +19 -3
- snowflake/ml/modeling/ensemble/voting_regressor.py +19 -3
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +19 -3
- snowflake/ml/modeling/feature_selection/select_fdr.py +19 -3
- snowflake/ml/modeling/feature_selection/select_fpr.py +19 -3
- snowflake/ml/modeling/feature_selection/select_fwe.py +19 -3
- snowflake/ml/modeling/feature_selection/select_k_best.py +19 -3
- snowflake/ml/modeling/feature_selection/select_percentile.py +19 -3
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +19 -3
- snowflake/ml/modeling/feature_selection/variance_threshold.py +19 -3
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +19 -3
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +19 -3
- snowflake/ml/modeling/impute/iterative_imputer.py +19 -3
- snowflake/ml/modeling/impute/knn_imputer.py +19 -3
- snowflake/ml/modeling/impute/missing_indicator.py +19 -3
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +19 -3
- snowflake/ml/modeling/kernel_approximation/nystroem.py +19 -3
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +19 -3
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +19 -3
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +19 -3
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +19 -3
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +19 -3
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +19 -3
- snowflake/ml/modeling/linear_model/ard_regression.py +19 -3
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +19 -3
- snowflake/ml/modeling/linear_model/elastic_net.py +19 -3
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +19 -3
- snowflake/ml/modeling/linear_model/gamma_regressor.py +19 -3
- snowflake/ml/modeling/linear_model/huber_regressor.py +19 -3
- snowflake/ml/modeling/linear_model/lars.py +19 -3
- snowflake/ml/modeling/linear_model/lars_cv.py +19 -3
- snowflake/ml/modeling/linear_model/lasso.py +19 -3
- snowflake/ml/modeling/linear_model/lasso_cv.py +19 -3
- snowflake/ml/modeling/linear_model/lasso_lars.py +19 -3
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +19 -3
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +19 -3
- snowflake/ml/modeling/linear_model/linear_regression.py +19 -3
- snowflake/ml/modeling/linear_model/logistic_regression.py +19 -3
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +19 -3
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +19 -3
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +19 -3
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +19 -3
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +19 -3
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +19 -3
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +19 -3
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +19 -3
- snowflake/ml/modeling/linear_model/perceptron.py +19 -3
- snowflake/ml/modeling/linear_model/poisson_regressor.py +19 -3
- snowflake/ml/modeling/linear_model/ransac_regressor.py +19 -3
- snowflake/ml/modeling/linear_model/ridge.py +19 -3
- snowflake/ml/modeling/linear_model/ridge_classifier.py +19 -3
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +19 -3
- snowflake/ml/modeling/linear_model/ridge_cv.py +19 -3
- snowflake/ml/modeling/linear_model/sgd_classifier.py +19 -3
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +19 -3
- snowflake/ml/modeling/linear_model/sgd_regressor.py +19 -3
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +19 -3
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +19 -3
- snowflake/ml/modeling/manifold/isomap.py +19 -3
- snowflake/ml/modeling/manifold/mds.py +19 -3
- snowflake/ml/modeling/manifold/spectral_embedding.py +19 -3
- snowflake/ml/modeling/manifold/tsne.py +19 -3
- snowflake/ml/modeling/metrics/classification.py +5 -6
- snowflake/ml/modeling/metrics/metrics_utils.py +5 -3
- snowflake/ml/modeling/metrics/ranking.py +7 -3
- snowflake/ml/modeling/metrics/regression.py +6 -3
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +19 -3
- snowflake/ml/modeling/mixture/gaussian_mixture.py +19 -3
- snowflake/ml/modeling/model_selection/grid_search_cv.py +3 -13
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +3 -13
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +19 -3
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +19 -3
- snowflake/ml/modeling/multiclass/output_code_classifier.py +19 -3
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +19 -3
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +19 -3
- snowflake/ml/modeling/naive_bayes/complement_nb.py +19 -3
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +19 -3
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +19 -3
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +19 -3
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +19 -3
- snowflake/ml/modeling/neighbors/kernel_density.py +19 -3
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +19 -3
- snowflake/ml/modeling/neighbors/nearest_centroid.py +19 -3
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +19 -3
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +19 -3
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +19 -3
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +19 -3
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +19 -3
- snowflake/ml/modeling/neural_network/mlp_classifier.py +19 -3
- snowflake/ml/modeling/neural_network/mlp_regressor.py +19 -3
- snowflake/ml/modeling/preprocessing/polynomial_features.py +19 -3
- snowflake/ml/modeling/semi_supervised/label_propagation.py +19 -3
- snowflake/ml/modeling/semi_supervised/label_spreading.py +19 -3
- snowflake/ml/modeling/svm/linear_svc.py +19 -3
- snowflake/ml/modeling/svm/linear_svr.py +19 -3
- snowflake/ml/modeling/svm/nu_svc.py +19 -3
- snowflake/ml/modeling/svm/nu_svr.py +19 -3
- snowflake/ml/modeling/svm/svc.py +19 -3
- snowflake/ml/modeling/svm/svr.py +19 -3
- snowflake/ml/modeling/tree/decision_tree_classifier.py +19 -3
- snowflake/ml/modeling/tree/decision_tree_regressor.py +19 -3
- snowflake/ml/modeling/tree/extra_tree_classifier.py +19 -3
- snowflake/ml/modeling/tree/extra_tree_regressor.py +19 -3
- snowflake/ml/modeling/xgboost/xgb_classifier.py +19 -3
- snowflake/ml/modeling/xgboost/xgb_regressor.py +19 -3
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +19 -3
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +19 -3
- snowflake/ml/registry/registry.py +2 -0
- snowflake/ml/version.py +1 -1
- snowflake_ml_python-1.2.2.dist-info/LICENSE.txt +202 -0
- {snowflake_ml_python-1.2.0.dist-info → snowflake_ml_python-1.2.2.dist-info}/METADATA +276 -50
- {snowflake_ml_python-1.2.0.dist-info → snowflake_ml_python-1.2.2.dist-info}/RECORD +204 -197
- {snowflake_ml_python-1.2.0.dist-info → snowflake_ml_python-1.2.2.dist-info}/WHEEL +2 -1
- snowflake_ml_python-1.2.2.dist-info/top_level.txt +1 -0
- /snowflake/ml/modeling/_internal/{pandas_trainer.py → local_implementations/pandas_trainer.py} +0 -0
- /snowflake/ml/modeling/_internal/{snowpark_trainer.py → snowpark_implementations/snowpark_trainer.py} +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
import inspect
|
2
2
|
import logging
|
3
|
-
from typing import Any, Dict, Optional
|
3
|
+
from typing import Any, Dict, List, Optional
|
4
4
|
|
5
5
|
from snowflake.ml._internal.exceptions import (
|
6
6
|
error_codes,
|
@@ -16,6 +16,7 @@ class SnowServiceDeployOptions:
|
|
16
16
|
self,
|
17
17
|
compute_pool: str,
|
18
18
|
*,
|
19
|
+
external_access_integrations: List[str],
|
19
20
|
image_repo: Optional[str] = None,
|
20
21
|
min_instances: Optional[int] = 1,
|
21
22
|
max_instances: Optional[int] = 1,
|
@@ -34,7 +35,15 @@ class SnowServiceDeployOptions:
|
|
34
35
|
|
35
36
|
Args:
|
36
37
|
compute_pool: SnowService compute pool name. Please refer to official doc for how to create a
|
37
|
-
compute pool:
|
38
|
+
compute pool:
|
39
|
+
https://docs.snowflake.com/en/developer-guide/snowpark-container-services/working-with-compute-pool
|
40
|
+
external_access_integrations: External Access Integrations name used to build image and deploy the model.
|
41
|
+
Please refer to the doc for how to create an External Access Integrations: https://docs.snowflake.com/
|
42
|
+
developer-guide/snowpark-container-services/additional-considerations-services-jobs
|
43
|
+
#configuring-network-capabilities .
|
44
|
+
To make sure your image could be built, access to the following endpoint must be allowed.
|
45
|
+
docker.com:80, docker.com:443, anaconda.com:80, anaconda.com:443, anaconda.org:80, anaconda.org:443,
|
46
|
+
pypi.org:80, pypi.org:443
|
38
47
|
image_repo: SnowService image repo path. e.g. "<image_registry>/<db>/<schema>/<repo>". Default to auto
|
39
48
|
inferred based on session information.
|
40
49
|
min_instances: Minimum number of service replicas. Default to 1.
|
@@ -70,6 +79,7 @@ class SnowServiceDeployOptions:
|
|
70
79
|
self.model_in_image = model_in_image
|
71
80
|
self.debug_mode = debug_mode
|
72
81
|
self.enable_ingress = enable_ingress
|
82
|
+
self.external_access_integrations = external_access_integrations
|
73
83
|
|
74
84
|
if self.num_workers is None and self.use_gpu:
|
75
85
|
logger.info("num_workers has been defaulted to 1 when using GPU.")
|
@@ -2,7 +2,7 @@ import json
|
|
2
2
|
import logging
|
3
3
|
import textwrap
|
4
4
|
import time
|
5
|
-
from typing import Optional
|
5
|
+
from typing import List, Optional
|
6
6
|
|
7
7
|
from snowflake.ml._internal.exceptions import (
|
8
8
|
error_codes,
|
@@ -36,6 +36,7 @@ class SnowServiceClient:
|
|
36
36
|
service_name: str,
|
37
37
|
compute_pool: str,
|
38
38
|
spec_stage_location: str,
|
39
|
+
external_access_integrations: List[str],
|
39
40
|
*,
|
40
41
|
min_instances: Optional[int] = 1,
|
41
42
|
max_instances: Optional[int] = 1,
|
@@ -48,6 +49,7 @@ class SnowServiceClient:
|
|
48
49
|
service_name: Name of the service.
|
49
50
|
min_instances: Minimum number of service replicas.
|
50
51
|
max_instances: Maximum number of service replicas.
|
52
|
+
external_access_integrations: EAIs for network connection.
|
51
53
|
compute_pool: Name of the compute pool.
|
52
54
|
spec_stage_location: Stage path for the service spec.
|
53
55
|
"""
|
@@ -61,13 +63,14 @@ class SnowServiceClient:
|
|
61
63
|
SPEC = '{path}'
|
62
64
|
MIN_INSTANCES={min_instances}
|
63
65
|
MAX_INSTANCES={max_instances}
|
66
|
+
EXTERNAL_ACCESS_INTEGRATIONS = ({', '.join(external_access_integrations)})
|
64
67
|
"""
|
65
68
|
)
|
66
69
|
logger.info(f"Creating service {service_name}")
|
67
70
|
logger.debug(f"Create service with SQL: \n {sql}")
|
68
71
|
self.session.sql(sql).collect()
|
69
72
|
|
70
|
-
def create_job(self, compute_pool: str, spec_stage_location: str) -> None:
|
73
|
+
def create_job(self, compute_pool: str, spec_stage_location: str, external_access_integrations: List[str]) -> None:
|
71
74
|
"""Execute the job creation SQL command. Note that the job creation is synchronous, hence we execute it in a
|
72
75
|
async way so that we can query the log in the meantime.
|
73
76
|
|
@@ -76,7 +79,7 @@ class SnowServiceClient:
|
|
76
79
|
Args:
|
77
80
|
compute_pool: name of the compute pool
|
78
81
|
spec_stage_location: path to the stage location where the spec is located at.
|
79
|
-
|
82
|
+
external_access_integrations: EAIs for network connection.
|
80
83
|
"""
|
81
84
|
stage, path = uri.get_stage_and_path(spec_stage_location)
|
82
85
|
sql = textwrap.dedent(
|
@@ -85,6 +88,7 @@ class SnowServiceClient:
|
|
85
88
|
IN COMPUTE POOL {compute_pool}
|
86
89
|
FROM {stage}
|
87
90
|
SPEC = '{path}'
|
91
|
+
EXTERNAL_ACCESS_INTEGRATIONS = ({', '.join(external_access_integrations)})
|
88
92
|
"""
|
89
93
|
)
|
90
94
|
logger.debug(f"Create job with SQL: \n {sql}")
|
@@ -4,7 +4,6 @@ from typing import Any, Dict, List, Optional, cast
|
|
4
4
|
|
5
5
|
import yaml
|
6
6
|
|
7
|
-
from snowflake.ml._internal.utils import snowflake_env
|
8
7
|
from snowflake.ml.model import type_hints
|
9
8
|
from snowflake.ml.model._model_composer.model_manifest import model_manifest_schema
|
10
9
|
from snowflake.ml.model._model_composer.model_method import (
|
@@ -84,11 +83,7 @@ class ModelManifest:
|
|
84
83
|
],
|
85
84
|
)
|
86
85
|
|
87
|
-
|
88
|
-
snowflake_env.get_current_snowflake_version(session)
|
89
|
-
>= model_manifest_schema.MANIFEST_USER_DATA_ENABLE_VERSION
|
90
|
-
):
|
91
|
-
manifest_dict["user_data"] = self.generate_user_data_with_client_data(model_meta)
|
86
|
+
manifest_dict["user_data"] = self.generate_user_data_with_client_data(model_meta)
|
92
87
|
|
93
88
|
with (self.workspace_path / ModelManifest.MANIFEST_FILE_REL_PATH).open("w", encoding="utf-8") as f:
|
94
89
|
# Anchors are not supported in the server, avoid that.
|
@@ -2,14 +2,12 @@
|
|
2
2
|
|
3
3
|
from typing import Any, Dict, List, Literal, TypedDict
|
4
4
|
|
5
|
-
from packaging import version
|
6
5
|
from typing_extensions import NotRequired, Required
|
7
6
|
|
8
7
|
from snowflake.ml.model import model_signature
|
9
8
|
|
10
9
|
MODEL_MANIFEST_VERSION = "1.0"
|
11
10
|
|
12
|
-
MANIFEST_USER_DATA_ENABLE_VERSION = version.parse("8.2.0")
|
13
11
|
MANIFEST_CLIENT_DATA_KEY_NAME = "snowpark_ml_data"
|
14
12
|
MANIFEST_CLIENT_DATA_SCHEMA_VERSION = "2024-02-01"
|
15
13
|
|
@@ -1 +1,10 @@
|
|
1
|
-
REQUIREMENTS = [
|
1
|
+
REQUIREMENTS = [
|
2
|
+
"absl-py>=0.15,<2",
|
3
|
+
"anyio>=3.5.0,<4",
|
4
|
+
"numpy>=1.23,<2",
|
5
|
+
"packaging>=20.9,<24",
|
6
|
+
"pandas>=1.0.0,<2",
|
7
|
+
"pyyaml>=6.0,<7",
|
8
|
+
"snowflake-snowpark-python>=1.8.0,<2",
|
9
|
+
"typing-extensions>=4.1.0,<5"
|
10
|
+
]
|
@@ -62,7 +62,6 @@ class ModelRuntime:
|
|
62
62
|
model_env.ModelDependency(requirement=dep, pip_name=requirements.Requirement(dep).name)
|
63
63
|
for dep in _UDF_INFERENCE_DEPENDENCIES
|
64
64
|
],
|
65
|
-
check_local_version=True,
|
66
65
|
)
|
67
66
|
else:
|
68
67
|
self.runtime_env.include_if_absent(
|
@@ -70,7 +69,6 @@ class ModelRuntime:
|
|
70
69
|
model_env.ModelDependency(requirement=dep, pip_name=requirements.Requirement(dep).name)
|
71
70
|
for dep in _UDF_INFERENCE_DEPENDENCIES + [snowml_pkg_spec]
|
72
71
|
],
|
73
|
-
check_local_version=True,
|
74
72
|
)
|
75
73
|
|
76
74
|
def save(self, workspace_path: pathlib.Path) -> model_manifest_schema.ModelRuntimeDict:
|
@@ -1 +1,11 @@
|
|
1
|
-
REQUIREMENTS = [
|
1
|
+
REQUIREMENTS = [
|
2
|
+
"absl-py>=0.15,<2",
|
3
|
+
"anyio>=3.5.0,<4",
|
4
|
+
"cloudpickle>=2.0.0",
|
5
|
+
"numpy>=1.23,<2",
|
6
|
+
"packaging>=20.9,<24",
|
7
|
+
"pandas>=1.0.0,<2",
|
8
|
+
"pyyaml>=6.0,<7",
|
9
|
+
"snowflake-snowpark-python>=1.8.0,<2",
|
10
|
+
"typing-extensions>=4.1.0,<5"
|
11
|
+
]
|
@@ -18,6 +18,7 @@ from snowflake.ml.model import model_signature, type_hints as model_types
|
|
18
18
|
from snowflake.ml.model._packager.model_env import model_env
|
19
19
|
from snowflake.ml.model._packager.model_meta import (
|
20
20
|
_core_requirements,
|
21
|
+
_packaging_requirements,
|
21
22
|
model_blob_meta,
|
22
23
|
model_meta_schema,
|
23
24
|
)
|
@@ -26,7 +27,8 @@ from snowflake.ml.model._packager.model_meta_migrator import migrator_plans
|
|
26
27
|
MODEL_METADATA_FILE = "model.yaml"
|
27
28
|
MODEL_CODE_DIR = "code"
|
28
29
|
|
29
|
-
_PACKAGING_CORE_DEPENDENCIES = _core_requirements.REQUIREMENTS
|
30
|
+
_PACKAGING_CORE_DEPENDENCIES = _core_requirements.REQUIREMENTS # Legacy Model only
|
31
|
+
_PACKAGING_REQUIREMENTS = _packaging_requirements.REQUIREMENTS # New Model only
|
30
32
|
_SNOWFLAKE_PKG_NAME = "snowflake"
|
31
33
|
_SNOWFLAKE_ML_PKG_NAME = f"{_SNOWFLAKE_PKG_NAME}.ml"
|
32
34
|
|
@@ -73,6 +75,8 @@ def create_model_metadata(
|
|
73
75
|
model_dir_path = os.path.normpath(model_dir_path)
|
74
76
|
embed_local_ml_library = kwargs.pop("embed_local_ml_library", False)
|
75
77
|
legacy_save = kwargs.pop("_legacy_save", False)
|
78
|
+
relax_version = kwargs.pop("relax_version", False)
|
79
|
+
|
76
80
|
if embed_local_ml_library:
|
77
81
|
# Use the last one which is loaded first, that is mean, it is loaded from site-packages.
|
78
82
|
# We could make sure that user does not overwrite our library with their code follow the same naming.
|
@@ -94,6 +98,8 @@ def create_model_metadata(
|
|
94
98
|
pip_requirements=pip_requirements,
|
95
99
|
python_version=python_version,
|
96
100
|
embed_local_ml_library=embed_local_ml_library,
|
101
|
+
legacy_save=legacy_save,
|
102
|
+
relax_version=relax_version,
|
97
103
|
)
|
98
104
|
|
99
105
|
if embed_local_ml_library:
|
@@ -146,6 +152,8 @@ def _create_env_for_model_metadata(
|
|
146
152
|
pip_requirements: Optional[List[str]] = None,
|
147
153
|
python_version: Optional[str] = None,
|
148
154
|
embed_local_ml_library: bool = False,
|
155
|
+
legacy_save: bool = False,
|
156
|
+
relax_version: bool = False,
|
149
157
|
) -> model_env.ModelEnv:
|
150
158
|
env = model_env.ModelEnv()
|
151
159
|
|
@@ -154,11 +162,14 @@ def _create_env_for_model_metadata(
|
|
154
162
|
env.pip_requirements = pip_requirements # type: ignore[assignment]
|
155
163
|
env.python_version = python_version # type: ignore[assignment]
|
156
164
|
env.snowpark_ml_version = snowml_env.VERSION
|
165
|
+
|
166
|
+
requirements_to_add = _PACKAGING_CORE_DEPENDENCIES if legacy_save else _PACKAGING_REQUIREMENTS
|
167
|
+
|
157
168
|
if embed_local_ml_library:
|
158
169
|
env.include_if_absent(
|
159
170
|
[
|
160
171
|
model_env.ModelDependency(requirement=dep, pip_name=requirements.Requirement(dep).name)
|
161
|
-
for dep in
|
172
|
+
for dep in requirements_to_add
|
162
173
|
],
|
163
174
|
check_local_version=True,
|
164
175
|
)
|
@@ -166,11 +177,14 @@ def _create_env_for_model_metadata(
|
|
166
177
|
env.include_if_absent(
|
167
178
|
[
|
168
179
|
model_env.ModelDependency(requirement=dep, pip_name=requirements.Requirement(dep).name)
|
169
|
-
for dep in
|
180
|
+
for dep in requirements_to_add + [env_utils.SNOWPARK_ML_PKG_NAME]
|
170
181
|
],
|
171
182
|
check_local_version=True,
|
172
183
|
)
|
173
184
|
|
185
|
+
if relax_version:
|
186
|
+
env.relax_version()
|
187
|
+
|
174
188
|
return env
|
175
189
|
|
176
190
|
|
@@ -1,6 +1,18 @@
|
|
1
1
|
import enum
|
2
|
+
import json
|
2
3
|
import warnings
|
3
|
-
from typing import
|
4
|
+
from typing import (
|
5
|
+
Any,
|
6
|
+
Dict,
|
7
|
+
List,
|
8
|
+
Literal,
|
9
|
+
Optional,
|
10
|
+
Sequence,
|
11
|
+
Tuple,
|
12
|
+
Type,
|
13
|
+
Union,
|
14
|
+
cast,
|
15
|
+
)
|
4
16
|
|
5
17
|
import numpy as np
|
6
18
|
import pandas as pd
|
@@ -337,6 +349,31 @@ class SnowparkIdentifierRule(enum.Enum):
|
|
337
349
|
assert_never(self)
|
338
350
|
|
339
351
|
|
352
|
+
def _get_dataframe_values_range(
|
353
|
+
df: snowflake.snowpark.DataFrame,
|
354
|
+
) -> Dict[str, Union[Tuple[int, int], Tuple[float, float]]]:
|
355
|
+
columns = [
|
356
|
+
F.array_construct(F.min(field.name), F.max(field.name)).as_(field.name)
|
357
|
+
for field in df.schema.fields
|
358
|
+
if isinstance(field.datatype, spt._NumericType)
|
359
|
+
]
|
360
|
+
if not columns:
|
361
|
+
return {}
|
362
|
+
res = df.select(columns).collect()
|
363
|
+
if len(res) != 1:
|
364
|
+
raise snowml_exceptions.SnowflakeMLException(
|
365
|
+
error_code=error_codes.INTERNAL_SNOWML_ERROR,
|
366
|
+
original_exception=ValueError(f"Unable to get the value range of fields {df.columns}"),
|
367
|
+
)
|
368
|
+
return cast(
|
369
|
+
Dict[str, Union[Tuple[int, int], Tuple[float, float]]],
|
370
|
+
{
|
371
|
+
sql_identifier.SqlIdentifier(k, case_sensitive=True).identifier(): (json.loads(v)[0], json.loads(v)[1])
|
372
|
+
for k, v in res[0].as_dict().items()
|
373
|
+
},
|
374
|
+
)
|
375
|
+
|
376
|
+
|
340
377
|
def _validate_snowpark_data(
|
341
378
|
data: snowflake.snowpark.DataFrame, features: Sequence[core.BaseFeatureSpec]
|
342
379
|
) -> SnowparkIdentifierRule:
|
@@ -361,6 +398,7 @@ def _validate_snowpark_data(
|
|
361
398
|
SnowparkIdentifierRule.NORMALIZED: [],
|
362
399
|
}
|
363
400
|
schema = data.schema
|
401
|
+
values_range = _get_dataframe_values_range(data)
|
364
402
|
for identifier_rule in errors.keys():
|
365
403
|
for feature in features:
|
366
404
|
try:
|
@@ -401,8 +439,11 @@ def _validate_snowpark_data(
|
|
401
439
|
+ f"Feature is a scalar feature, while {field.name} is not."
|
402
440
|
),
|
403
441
|
)
|
442
|
+
continue
|
404
443
|
try:
|
405
|
-
_validate_snowpark_type_feature(
|
444
|
+
_validate_snowpark_type_feature(
|
445
|
+
data, field, ft_type, feature.name, values_range.get(field.name, None)
|
446
|
+
)
|
406
447
|
except snowml_exceptions.SnowflakeMLException as e:
|
407
448
|
errors[identifier_rule].append(e.original_exception)
|
408
449
|
break
|
@@ -433,17 +474,12 @@ If using the inferred names from model signatures, there are the following error
|
|
433
474
|
|
434
475
|
|
435
476
|
def _validate_snowpark_type_feature(
|
436
|
-
df: snowflake.snowpark.DataFrame,
|
477
|
+
df: snowflake.snowpark.DataFrame,
|
478
|
+
field: spt.StructField,
|
479
|
+
ft_type: DataType,
|
480
|
+
ft_name: str,
|
481
|
+
value_range: Optional[Union[Tuple[int, int], Tuple[float, float]]],
|
437
482
|
) -> None:
|
438
|
-
def get_value_range(field_name: str) -> Tuple[int, int]:
|
439
|
-
res = df.select(F.min(field_name).as_("MIN"), F.max(field_name).as_("MAX")).collect()
|
440
|
-
if len(res) != 1:
|
441
|
-
raise snowml_exceptions.SnowflakeMLException(
|
442
|
-
error_code=error_codes.INTERNAL_SNOWML_ERROR,
|
443
|
-
original_exception=ValueError(f"Unable to get the value range of field {field_name}"),
|
444
|
-
)
|
445
|
-
return res[0].MIN, res[0].MAX
|
446
|
-
|
447
483
|
field_data_type = field.datatype
|
448
484
|
col_name = identifier.get_unescaped_names(field.name)
|
449
485
|
|
@@ -465,16 +501,27 @@ def _validate_snowpark_type_feature(
|
|
465
501
|
error_code=error_codes.INVALID_DATA,
|
466
502
|
original_exception=ValueError(
|
467
503
|
f"Data Validation Error in feature {ft_name}: "
|
468
|
-
|
504
|
+
f"Feature type {ft_type} is not met by column {col_name} "
|
505
|
+
f"because of its original type {field_data_type}"
|
506
|
+
),
|
507
|
+
)
|
508
|
+
if value_range is None:
|
509
|
+
raise snowml_exceptions.SnowflakeMLException(
|
510
|
+
error_code=error_codes.INVALID_DATA,
|
511
|
+
original_exception=ValueError(
|
512
|
+
f"Data Validation Error in feature {ft_name}: "
|
513
|
+
f"Feature type {ft_type} is not met by column {col_name} "
|
514
|
+
f"because of its original type {field_data_type} is non-Numeric."
|
469
515
|
),
|
470
516
|
)
|
471
|
-
min_v, max_v =
|
517
|
+
min_v, max_v = value_range
|
472
518
|
if max_v > np.iinfo(ft_type._numpy_type).max or min_v < np.iinfo(ft_type._numpy_type).min:
|
473
519
|
raise snowml_exceptions.SnowflakeMLException(
|
474
520
|
error_code=error_codes.INVALID_DATA,
|
475
521
|
original_exception=ValueError(
|
476
522
|
f"Data Validation Error in feature {ft_name}: "
|
477
|
-
|
523
|
+
f"Feature type {ft_type} is not met by column {col_name} "
|
524
|
+
f"because it overflows with min"
|
478
525
|
),
|
479
526
|
)
|
480
527
|
elif ft_type in [core.DataType.FLOAT, core.DataType.DOUBLE]:
|
@@ -494,7 +541,16 @@ def _validate_snowpark_type_feature(
|
|
494
541
|
+ f"Feature type {ft_type} is not met by column {col_name}."
|
495
542
|
),
|
496
543
|
)
|
497
|
-
|
544
|
+
if value_range is None:
|
545
|
+
raise snowml_exceptions.SnowflakeMLException(
|
546
|
+
error_code=error_codes.INVALID_DATA,
|
547
|
+
original_exception=ValueError(
|
548
|
+
f"Data Validation Error in feature {ft_name}: "
|
549
|
+
f"Feature type {ft_type} is not met by column {col_name} "
|
550
|
+
f"because of its original type {field_data_type} is non-Numeric."
|
551
|
+
),
|
552
|
+
)
|
553
|
+
min_v, max_v = value_range
|
498
554
|
if (
|
499
555
|
max_v > np.finfo(ft_type._numpy_type).max # type: ignore[arg-type]
|
500
556
|
or min_v < np.finfo(ft_type._numpy_type).min # type: ignore[arg-type]
|
snowflake/ml/model/type_hints.py
CHANGED
@@ -3,6 +3,7 @@ from typing import (
|
|
3
3
|
TYPE_CHECKING,
|
4
4
|
Any,
|
5
5
|
Dict,
|
6
|
+
List,
|
6
7
|
Literal,
|
7
8
|
Optional,
|
8
9
|
Sequence,
|
@@ -173,6 +174,13 @@ class SnowparkContainerServiceDeployOptions(DeployOptions):
|
|
173
174
|
debug_mode: When set to True, deployment artifacts will be persisted in a local temp directory.
|
174
175
|
enable_ingress: When set to True, will expose HTTP endpoint for access to the predict method of the created
|
175
176
|
service.
|
177
|
+
external_access_integrations: External Access Integrations name used to build image and deploy the model.
|
178
|
+
Please refer to the doc for how to create an External Access Integrations: https://docs.snowflake.com/
|
179
|
+
developer-guide/snowpark-container-services/additional-considerations-services-jobs
|
180
|
+
#configuring-network-capabilities .
|
181
|
+
To make sure your image could be built, access to the following endpoint must be allowed.
|
182
|
+
docker.com:80, docker.com:443, anaconda.com:80, anaconda.com:443, anaconda.org:80, anaconda.org:443,
|
183
|
+
pypi.org:80, pypi.org:443
|
176
184
|
"""
|
177
185
|
|
178
186
|
compute_pool: str
|
@@ -187,6 +195,7 @@ class SnowparkContainerServiceDeployOptions(DeployOptions):
|
|
187
195
|
model_in_image: NotRequired[bool]
|
188
196
|
debug_mode: NotRequired[bool]
|
189
197
|
enable_ingress: NotRequired[bool]
|
198
|
+
external_access_integrations: List[str]
|
190
199
|
|
191
200
|
|
192
201
|
class ModelMethodSaveOptions(TypedDict):
|
@@ -198,9 +207,12 @@ class BaseModelSaveOption(TypedDict):
|
|
198
207
|
"""Options for saving the model.
|
199
208
|
|
200
209
|
embed_local_ml_library: Embedding local SnowML into the code directory of the folder.
|
210
|
+
relax_version: Whether or not relax the version constraints of the dependencies if unresolvable. It detects any
|
211
|
+
==x.y.z in specifiers and replaced with >=x.y, <(x+1). Defaults to False.
|
201
212
|
"""
|
202
213
|
|
203
214
|
embed_local_ml_library: NotRequired[bool]
|
215
|
+
relax_version: NotRequired[bool]
|
204
216
|
_legacy_save: NotRequired[bool]
|
205
217
|
method_options: NotRequired[Dict[str, ModelMethodSaveOptions]]
|
206
218
|
|
@@ -6,47 +6,7 @@ from snowflake.snowpark import DataFrame, Session
|
|
6
6
|
|
7
7
|
|
8
8
|
# TODO: Add more specific entities to type hint estimators instead of using `object`.
|
9
|
-
class
|
10
|
-
def batch_inference(
|
11
|
-
self,
|
12
|
-
dataset: DataFrame,
|
13
|
-
session: Session,
|
14
|
-
estimator: object,
|
15
|
-
dependencies: List[str],
|
16
|
-
inference_method: str,
|
17
|
-
input_cols: List[str],
|
18
|
-
pass_through_columns: List[str],
|
19
|
-
expected_output_cols_list: List[str],
|
20
|
-
expected_output_cols_type: str = "",
|
21
|
-
) -> DataFrame:
|
22
|
-
raise NotImplementedError
|
23
|
-
|
24
|
-
def score_pandas(
|
25
|
-
self,
|
26
|
-
dataset: pd.DataFrame,
|
27
|
-
estimator: object,
|
28
|
-
input_cols: List[str],
|
29
|
-
label_cols: List[str],
|
30
|
-
sample_weight_col: Optional[str],
|
31
|
-
) -> float:
|
32
|
-
raise NotImplementedError
|
33
|
-
|
34
|
-
def score_snowpark(
|
35
|
-
self,
|
36
|
-
dataset: DataFrame,
|
37
|
-
session: Session,
|
38
|
-
estimator: object,
|
39
|
-
dependencies: List[str],
|
40
|
-
score_sproc_imports: List[str],
|
41
|
-
input_cols: List[str],
|
42
|
-
label_cols: List[str],
|
43
|
-
sample_weight_col: Optional[str],
|
44
|
-
) -> float:
|
45
|
-
raise NotImplementedError
|
46
|
-
|
47
|
-
|
48
|
-
# TODO: Add more specific entities to type hint estimators instead of using `object`.
|
49
|
-
class CVHandlers(Protocol):
|
9
|
+
class TransformerHandlers(Protocol):
|
50
10
|
def batch_inference(
|
51
11
|
self,
|
52
12
|
dataset: DataFrame,
|
@@ -4,17 +4,21 @@ import pandas as pd
|
|
4
4
|
from sklearn import model_selection
|
5
5
|
|
6
6
|
from snowflake.ml._internal.exceptions import error_codes, exceptions
|
7
|
-
from snowflake.ml.modeling._internal.distributed_hpo_trainer import (
|
8
|
-
DistributedHPOTrainer,
|
9
|
-
)
|
10
7
|
from snowflake.ml.modeling._internal.estimator_utils import (
|
11
8
|
get_module_name,
|
12
9
|
is_single_node,
|
13
10
|
)
|
11
|
+
from snowflake.ml.modeling._internal.local_implementations.pandas_trainer import (
|
12
|
+
PandasModelTrainer,
|
13
|
+
)
|
14
14
|
from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
|
15
|
-
from snowflake.ml.modeling._internal.
|
16
|
-
|
17
|
-
|
15
|
+
from snowflake.ml.modeling._internal.snowpark_implementations.distributed_hpo_trainer import (
|
16
|
+
DistributedHPOTrainer,
|
17
|
+
)
|
18
|
+
from snowflake.ml.modeling._internal.snowpark_implementations.snowpark_trainer import (
|
19
|
+
SnowparkModelTrainer,
|
20
|
+
)
|
21
|
+
from snowflake.ml.modeling._internal.snowpark_implementations.xgboost_external_memory_trainer import (
|
18
22
|
XGBoostExternalMemoryTrainer,
|
19
23
|
)
|
20
24
|
from snowflake.snowpark import DataFrame, Session
|
@@ -76,9 +80,9 @@ class ModelTrainerBuilder:
|
|
76
80
|
batch_size: int = -1,
|
77
81
|
) -> ModelTrainer:
|
78
82
|
"""
|
79
|
-
Builder method that creates an
|
83
|
+
Builder method that creates an appropriate ModelTrainer instance based on the given params.
|
80
84
|
"""
|
81
|
-
assert input_cols is not None # Make MyPy
|
85
|
+
assert input_cols is not None # Make MyPy happy
|
82
86
|
if isinstance(dataset, pd.DataFrame):
|
83
87
|
return PandasModelTrainer(
|
84
88
|
estimator=estimator,
|
@@ -100,7 +104,7 @@ class ModelTrainerBuilder:
|
|
100
104
|
"subproject": subproject,
|
101
105
|
}
|
102
106
|
|
103
|
-
assert dataset._session is not None # Make MyPy
|
107
|
+
assert dataset._session is not None # Make MyPy happy
|
104
108
|
if isinstance(estimator, model_selection.GridSearchCV) or isinstance(
|
105
109
|
estimator, model_selection.RandomizedSearchCV
|
106
110
|
):
|