snowflake-ml-python 1.15.0__py3-none-any.whl → 1.16.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/platform_capabilities.py +4 -0
- snowflake/ml/_internal/utils/mixins.py +24 -9
- snowflake/ml/experiment/experiment_tracking.py +63 -19
- snowflake/ml/jobs/_utils/spec_utils.py +49 -11
- snowflake/ml/jobs/manager.py +20 -0
- snowflake/ml/model/__init__.py +16 -2
- snowflake/ml/model/_client/model/batch_inference_specs.py +18 -2
- snowflake/ml/model/_client/model/model_version_impl.py +5 -0
- snowflake/ml/model/_client/ops/service_ops.py +50 -5
- snowflake/ml/model/_client/service/model_deployment_spec.py +1 -1
- snowflake/ml/model/_client/sql/stage.py +8 -0
- snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +1 -0
- snowflake/ml/model/_model_composer/model_method/model_method.py +25 -2
- snowflake/ml/model/_packager/model_env/model_env.py +26 -16
- snowflake/ml/model/_packager/model_runtime/_snowml_inference_alternative_requirements.py +1 -1
- snowflake/ml/model/type_hints.py +13 -0
- snowflake/ml/model/volatility.py +34 -0
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +1 -1
- snowflake/ml/modeling/cluster/affinity_propagation.py +1 -1
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +1 -1
- snowflake/ml/modeling/cluster/birch.py +1 -1
- snowflake/ml/modeling/cluster/bisecting_k_means.py +1 -1
- snowflake/ml/modeling/cluster/dbscan.py +1 -1
- snowflake/ml/modeling/cluster/feature_agglomeration.py +1 -1
- snowflake/ml/modeling/cluster/k_means.py +1 -1
- snowflake/ml/modeling/cluster/mean_shift.py +1 -1
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +1 -1
- snowflake/ml/modeling/cluster/optics.py +1 -1
- snowflake/ml/modeling/cluster/spectral_biclustering.py +1 -1
- snowflake/ml/modeling/cluster/spectral_clustering.py +1 -1
- snowflake/ml/modeling/cluster/spectral_coclustering.py +1 -1
- snowflake/ml/modeling/compose/column_transformer.py +1 -1
- snowflake/ml/modeling/compose/transformed_target_regressor.py +1 -1
- snowflake/ml/modeling/covariance/elliptic_envelope.py +1 -1
- snowflake/ml/modeling/covariance/empirical_covariance.py +1 -1
- snowflake/ml/modeling/covariance/graphical_lasso.py +1 -1
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +1 -1
- snowflake/ml/modeling/covariance/ledoit_wolf.py +1 -1
- snowflake/ml/modeling/covariance/min_cov_det.py +1 -1
- snowflake/ml/modeling/covariance/oas.py +1 -1
- snowflake/ml/modeling/covariance/shrunk_covariance.py +1 -1
- snowflake/ml/modeling/decomposition/dictionary_learning.py +1 -1
- snowflake/ml/modeling/decomposition/factor_analysis.py +1 -1
- snowflake/ml/modeling/decomposition/fast_ica.py +1 -1
- snowflake/ml/modeling/decomposition/incremental_pca.py +1 -1
- snowflake/ml/modeling/decomposition/kernel_pca.py +1 -1
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +1 -1
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +1 -1
- snowflake/ml/modeling/decomposition/pca.py +1 -1
- snowflake/ml/modeling/decomposition/sparse_pca.py +1 -1
- snowflake/ml/modeling/decomposition/truncated_svd.py +1 -1
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +1 -1
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +1 -1
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +1 -1
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +1 -1
- snowflake/ml/modeling/ensemble/bagging_classifier.py +1 -1
- snowflake/ml/modeling/ensemble/bagging_regressor.py +1 -1
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +1 -1
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +1 -1
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +1 -1
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +1 -1
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +1 -1
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +1 -1
- snowflake/ml/modeling/ensemble/isolation_forest.py +1 -1
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +1 -1
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +1 -1
- snowflake/ml/modeling/ensemble/stacking_regressor.py +1 -1
- snowflake/ml/modeling/ensemble/voting_classifier.py +1 -1
- snowflake/ml/modeling/ensemble/voting_regressor.py +1 -1
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +1 -1
- snowflake/ml/modeling/feature_selection/select_fdr.py +1 -1
- snowflake/ml/modeling/feature_selection/select_fpr.py +1 -1
- snowflake/ml/modeling/feature_selection/select_fwe.py +1 -1
- snowflake/ml/modeling/feature_selection/select_k_best.py +1 -1
- snowflake/ml/modeling/feature_selection/select_percentile.py +1 -1
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +1 -1
- snowflake/ml/modeling/feature_selection/variance_threshold.py +1 -1
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +1 -1
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +1 -1
- snowflake/ml/modeling/impute/iterative_imputer.py +1 -1
- snowflake/ml/modeling/impute/knn_imputer.py +1 -1
- snowflake/ml/modeling/impute/missing_indicator.py +1 -1
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +1 -1
- snowflake/ml/modeling/kernel_approximation/nystroem.py +1 -1
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +1 -1
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +1 -1
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +1 -1
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +1 -1
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +1 -1
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +1 -1
- snowflake/ml/modeling/linear_model/ard_regression.py +1 -1
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +1 -1
- snowflake/ml/modeling/linear_model/elastic_net.py +1 -1
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +1 -1
- snowflake/ml/modeling/linear_model/gamma_regressor.py +1 -1
- snowflake/ml/modeling/linear_model/huber_regressor.py +1 -1
- snowflake/ml/modeling/linear_model/lars.py +1 -1
- snowflake/ml/modeling/linear_model/lars_cv.py +1 -1
- snowflake/ml/modeling/linear_model/lasso.py +1 -1
- snowflake/ml/modeling/linear_model/lasso_cv.py +1 -1
- snowflake/ml/modeling/linear_model/lasso_lars.py +1 -1
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +1 -1
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +1 -1
- snowflake/ml/modeling/linear_model/linear_regression.py +1 -1
- snowflake/ml/modeling/linear_model/logistic_regression.py +1 -1
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +1 -1
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +1 -1
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +1 -1
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +1 -1
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +1 -1
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +1 -1
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +1 -1
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +1 -1
- snowflake/ml/modeling/linear_model/perceptron.py +1 -1
- snowflake/ml/modeling/linear_model/poisson_regressor.py +1 -1
- snowflake/ml/modeling/linear_model/ransac_regressor.py +1 -1
- snowflake/ml/modeling/linear_model/ridge.py +1 -1
- snowflake/ml/modeling/linear_model/ridge_classifier.py +1 -1
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +1 -1
- snowflake/ml/modeling/linear_model/ridge_cv.py +1 -1
- snowflake/ml/modeling/linear_model/sgd_classifier.py +1 -1
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +1 -1
- snowflake/ml/modeling/linear_model/sgd_regressor.py +1 -1
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +1 -1
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +1 -1
- snowflake/ml/modeling/manifold/isomap.py +1 -1
- snowflake/ml/modeling/manifold/mds.py +1 -1
- snowflake/ml/modeling/manifold/spectral_embedding.py +1 -1
- snowflake/ml/modeling/manifold/tsne.py +1 -1
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +1 -1
- snowflake/ml/modeling/mixture/gaussian_mixture.py +1 -1
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +1 -1
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +1 -1
- snowflake/ml/modeling/multiclass/output_code_classifier.py +1 -1
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +1 -1
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +1 -1
- snowflake/ml/modeling/naive_bayes/complement_nb.py +1 -1
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +1 -1
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +1 -1
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +1 -1
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +1 -1
- snowflake/ml/modeling/neighbors/kernel_density.py +1 -1
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +1 -1
- snowflake/ml/modeling/neighbors/nearest_centroid.py +1 -1
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +1 -1
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +1 -1
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +1 -1
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +1 -1
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +1 -1
- snowflake/ml/modeling/neural_network/mlp_classifier.py +1 -1
- snowflake/ml/modeling/neural_network/mlp_regressor.py +1 -1
- snowflake/ml/modeling/preprocessing/polynomial_features.py +1 -1
- snowflake/ml/modeling/semi_supervised/label_propagation.py +1 -1
- snowflake/ml/modeling/semi_supervised/label_spreading.py +1 -1
- snowflake/ml/modeling/svm/linear_svc.py +1 -1
- snowflake/ml/modeling/svm/linear_svr.py +1 -1
- snowflake/ml/modeling/svm/nu_svc.py +1 -1
- snowflake/ml/modeling/svm/nu_svr.py +1 -1
- snowflake/ml/modeling/svm/svc.py +1 -1
- snowflake/ml/modeling/svm/svr.py +1 -1
- snowflake/ml/modeling/tree/decision_tree_classifier.py +1 -1
- snowflake/ml/modeling/tree/decision_tree_regressor.py +1 -1
- snowflake/ml/modeling/tree/extra_tree_classifier.py +1 -1
- snowflake/ml/modeling/tree/extra_tree_regressor.py +1 -1
- snowflake/ml/modeling/xgboost/xgb_classifier.py +1 -1
- snowflake/ml/modeling/xgboost/xgb_regressor.py +1 -1
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +1 -1
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +1 -1
- snowflake/ml/registry/_manager/model_manager.py +1 -0
- snowflake/ml/registry/_manager/model_parameter_reconciler.py +27 -0
- snowflake/ml/registry/registry.py +15 -0
- snowflake/ml/utils/authentication.py +16 -0
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.15.0.dist-info → snowflake_ml_python-1.16.0.dist-info}/METADATA +41 -3
- {snowflake_ml_python-1.15.0.dist-info → snowflake_ml_python-1.16.0.dist-info}/RECORD +178 -177
- {snowflake_ml_python-1.15.0.dist-info → snowflake_ml_python-1.16.0.dist-info}/WHEEL +0 -0
- {snowflake_ml_python-1.15.0.dist-info → snowflake_ml_python-1.16.0.dist-info}/licenses/LICENSE.txt +0 -0
- {snowflake_ml_python-1.15.0.dist-info → snowflake_ml_python-1.16.0.dist-info}/top_level.txt +0 -0
|
@@ -17,6 +17,7 @@ logger = logging.getLogger(__name__)
|
|
|
17
17
|
|
|
18
18
|
LIVE_COMMIT_PARAMETER = "ENABLE_LIVE_VERSION_IN_SDK"
|
|
19
19
|
INLINE_DEPLOYMENT_SPEC_PARAMETER = "ENABLE_INLINE_DEPLOYMENT_SPEC_FROM_CLIENT_VERSION"
|
|
20
|
+
SET_MODULE_FUNCTIONS_VOLATILITY_FROM_MANIFEST = "SET_MODULE_FUNCTIONS_VOLATILITY_FROM_MANIFEST"
|
|
20
21
|
|
|
21
22
|
|
|
22
23
|
class PlatformCapabilities:
|
|
@@ -73,6 +74,9 @@ class PlatformCapabilities:
|
|
|
73
74
|
def is_inlined_deployment_spec_enabled(self) -> bool:
|
|
74
75
|
return self._is_version_feature_enabled(INLINE_DEPLOYMENT_SPEC_PARAMETER)
|
|
75
76
|
|
|
77
|
+
def is_set_module_functions_volatility_from_manifest(self) -> bool:
|
|
78
|
+
return self._get_bool_feature(SET_MODULE_FUNCTIONS_VOLATILITY_FROM_MANIFEST, False)
|
|
79
|
+
|
|
76
80
|
def is_live_commit_enabled(self) -> bool:
|
|
77
81
|
return self._get_bool_feature(LIVE_COMMIT_PARAMETER, False)
|
|
78
82
|
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
1
2
|
from typing import Any, Optional
|
|
2
3
|
|
|
3
4
|
from snowflake.ml._internal.utils import identifier
|
|
@@ -16,6 +17,14 @@ def _identifiers_match(saved: Optional[str], current: Optional[str]) -> bool:
|
|
|
16
17
|
return saved_resolved == current_resolved
|
|
17
18
|
|
|
18
19
|
|
|
20
|
+
@dataclass(frozen=True)
|
|
21
|
+
class _SessionState:
|
|
22
|
+
account: Optional[str]
|
|
23
|
+
role: Optional[str]
|
|
24
|
+
database: Optional[str]
|
|
25
|
+
schema: Optional[str]
|
|
26
|
+
|
|
27
|
+
|
|
19
28
|
class SerializableSessionMixin:
|
|
20
29
|
"""Mixin that provides pickling capabilities for objects with Snowpark sessions."""
|
|
21
30
|
|
|
@@ -40,17 +49,23 @@ class SerializableSessionMixin:
|
|
|
40
49
|
|
|
41
50
|
def __setstate__(self, state: dict[str, Any]) -> None:
|
|
42
51
|
"""Restore session from context during unpickling."""
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
52
|
+
session_state = _SessionState(
|
|
53
|
+
account=state.pop(_SESSION_ACCOUNT_KEY, None),
|
|
54
|
+
role=state.pop(_SESSION_ROLE_KEY, None),
|
|
55
|
+
database=state.pop(_SESSION_DATABASE_KEY, None),
|
|
56
|
+
schema=state.pop(_SESSION_SCHEMA_KEY, None),
|
|
57
|
+
)
|
|
47
58
|
|
|
48
59
|
if hasattr(super(), "__setstate__"):
|
|
49
60
|
super().__setstate__(state) # type: ignore[misc]
|
|
50
61
|
else:
|
|
51
62
|
self.__dict__.update(state)
|
|
52
63
|
|
|
53
|
-
|
|
64
|
+
self._set_session(session_state)
|
|
65
|
+
|
|
66
|
+
def _set_session(self, session_state: _SessionState) -> None:
|
|
67
|
+
|
|
68
|
+
if session_state.account is not None:
|
|
54
69
|
active_sessions = snowpark_session._get_active_sessions()
|
|
55
70
|
if len(active_sessions) == 0:
|
|
56
71
|
raise RuntimeError("No active Snowpark session available. Please create a session.")
|
|
@@ -63,10 +78,10 @@ class SerializableSessionMixin:
|
|
|
63
78
|
active_sessions,
|
|
64
79
|
key=lambda s: sum(
|
|
65
80
|
(
|
|
66
|
-
_identifiers_match(
|
|
67
|
-
_identifiers_match(
|
|
68
|
-
_identifiers_match(
|
|
69
|
-
_identifiers_match(
|
|
81
|
+
_identifiers_match(session_state.account, s.get_current_account()),
|
|
82
|
+
_identifiers_match(session_state.role, s.get_current_role()),
|
|
83
|
+
_identifiers_match(session_state.database, s.get_current_database()),
|
|
84
|
+
_identifiers_match(session_state.schema, s.get_current_schema()),
|
|
70
85
|
)
|
|
71
86
|
),
|
|
72
87
|
),
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
import functools
|
|
2
2
|
import json
|
|
3
3
|
import sys
|
|
4
|
-
from typing import Any, Optional, Union
|
|
4
|
+
from typing import Any, Callable, Concatenate, Optional, ParamSpec, TypeVar, Union
|
|
5
5
|
from urllib.parse import quote
|
|
6
6
|
|
|
7
|
-
|
|
7
|
+
from snowflake import snowpark
|
|
8
8
|
from snowflake.ml import model as ml_model, registry
|
|
9
9
|
from snowflake.ml._internal.human_readable_id import hrid_generator
|
|
10
10
|
from snowflake.ml._internal.utils import mixins, sql_identifier
|
|
@@ -18,20 +18,40 @@ from snowflake.ml.experiment._client import (
|
|
|
18
18
|
)
|
|
19
19
|
from snowflake.ml.model import type_hints
|
|
20
20
|
from snowflake.ml.utils import sql_client as sql_client_utils
|
|
21
|
-
from snowflake.snowpark import session
|
|
22
21
|
|
|
23
22
|
DEFAULT_EXPERIMENT_NAME = sql_identifier.SqlIdentifier("DEFAULT")
|
|
24
23
|
|
|
24
|
+
P = ParamSpec("P")
|
|
25
|
+
T = TypeVar("T")
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _restore_session(
|
|
29
|
+
func: Callable[Concatenate["ExperimentTracking", P], T],
|
|
30
|
+
) -> Callable[Concatenate["ExperimentTracking", P], T]:
|
|
31
|
+
@functools.wraps(func)
|
|
32
|
+
def wrapper(self: "ExperimentTracking", /, *args: P.args, **kwargs: P.kwargs) -> T:
|
|
33
|
+
if self._session is None:
|
|
34
|
+
if self._session_state is None:
|
|
35
|
+
raise RuntimeError(
|
|
36
|
+
f"Session is not set before calling {func.__name__}, and there is no session state to restore from"
|
|
37
|
+
)
|
|
38
|
+
self._set_session(self._session_state)
|
|
39
|
+
if self._session is None:
|
|
40
|
+
raise RuntimeError(f"Failed to restore session before calling {func.__name__}")
|
|
41
|
+
return func(self, *args, **kwargs)
|
|
42
|
+
|
|
43
|
+
return wrapper
|
|
44
|
+
|
|
25
45
|
|
|
26
46
|
class ExperimentTracking(mixins.SerializableSessionMixin):
|
|
27
47
|
"""
|
|
28
48
|
Class to manage experiments in Snowflake.
|
|
29
49
|
"""
|
|
30
50
|
|
|
31
|
-
@
|
|
51
|
+
@snowpark._internal.utils.private_preview(version="1.9.1")
|
|
32
52
|
def __init__(
|
|
33
53
|
self,
|
|
34
|
-
session:
|
|
54
|
+
session: snowpark.Session,
|
|
35
55
|
*,
|
|
36
56
|
database_name: Optional[str] = None,
|
|
37
57
|
schema_name: Optional[str] = None,
|
|
@@ -73,7 +93,10 @@ class ExperimentTracking(mixins.SerializableSessionMixin):
|
|
|
73
93
|
database_name=self._database_name,
|
|
74
94
|
schema_name=self._schema_name,
|
|
75
95
|
)
|
|
76
|
-
self._session = session
|
|
96
|
+
self._session: Optional[snowpark.Session] = session
|
|
97
|
+
# Used to store information about the session if the session could not be restored during unpickling
|
|
98
|
+
# _session_state is None if and only if _session is not None
|
|
99
|
+
self._session_state: Optional[mixins._SessionState] = None
|
|
77
100
|
|
|
78
101
|
# The experiment in context
|
|
79
102
|
self._experiment: Optional[entities.Experiment] = None
|
|
@@ -87,20 +110,29 @@ class ExperimentTracking(mixins.SerializableSessionMixin):
|
|
|
87
110
|
state["_registry"] = None
|
|
88
111
|
return state
|
|
89
112
|
|
|
90
|
-
def
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
session
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
113
|
+
def _set_session(self, session_state: mixins._SessionState) -> None:
|
|
114
|
+
try:
|
|
115
|
+
super()._set_session(session_state)
|
|
116
|
+
assert self._session is not None
|
|
117
|
+
except (snowpark.exceptions.SnowparkSessionException, AssertionError):
|
|
118
|
+
# If session was not set, store the session state
|
|
119
|
+
self._session = None
|
|
120
|
+
self._session_state = session_state
|
|
121
|
+
else:
|
|
122
|
+
# If session was set, clear the session state, and reinitialize the SQL client and registry
|
|
123
|
+
self._session_state = None
|
|
124
|
+
self._sql_client = sql_client.ExperimentTrackingSQLClient(
|
|
125
|
+
session=self._session,
|
|
126
|
+
database_name=self._database_name,
|
|
127
|
+
schema_name=self._schema_name,
|
|
128
|
+
)
|
|
129
|
+
self._registry = registry.Registry(
|
|
130
|
+
session=self._session,
|
|
131
|
+
database_name=self._database_name,
|
|
132
|
+
schema_name=self._schema_name,
|
|
133
|
+
)
|
|
103
134
|
|
|
135
|
+
@_restore_session
|
|
104
136
|
def set_experiment(
|
|
105
137
|
self,
|
|
106
138
|
experiment_name: str,
|
|
@@ -125,6 +157,7 @@ class ExperimentTracking(mixins.SerializableSessionMixin):
|
|
|
125
157
|
self._run = None
|
|
126
158
|
return self._experiment
|
|
127
159
|
|
|
160
|
+
@_restore_session
|
|
128
161
|
def delete_experiment(
|
|
129
162
|
self,
|
|
130
163
|
experiment_name: str,
|
|
@@ -141,8 +174,10 @@ class ExperimentTracking(mixins.SerializableSessionMixin):
|
|
|
141
174
|
self._run = None
|
|
142
175
|
|
|
143
176
|
@functools.wraps(registry.Registry.log_model)
|
|
177
|
+
@_restore_session
|
|
144
178
|
def log_model(
|
|
145
179
|
self,
|
|
180
|
+
/, # self needs to be a positional argument to stop mypy from complaining
|
|
146
181
|
model: Union[type_hints.SupportedModelType, ml_model.ModelVersion],
|
|
147
182
|
*,
|
|
148
183
|
model_name: str,
|
|
@@ -152,6 +187,7 @@ class ExperimentTracking(mixins.SerializableSessionMixin):
|
|
|
152
187
|
with experiment_info.ExperimentInfoPatcher(experiment_info=run._get_experiment_info()):
|
|
153
188
|
return self._registry.log_model(model, model_name=model_name, **kwargs)
|
|
154
189
|
|
|
190
|
+
@_restore_session
|
|
155
191
|
def start_run(
|
|
156
192
|
self,
|
|
157
193
|
run_name: Optional[str] = None,
|
|
@@ -181,6 +217,7 @@ class ExperimentTracking(mixins.SerializableSessionMixin):
|
|
|
181
217
|
self._run = entities.Run(experiment_tracking=self, experiment_name=experiment.name, run_name=run_name)
|
|
182
218
|
return self._run
|
|
183
219
|
|
|
220
|
+
@_restore_session
|
|
184
221
|
def end_run(self, run_name: Optional[str] = None) -> None:
|
|
185
222
|
"""
|
|
186
223
|
End the current run if no run name is provided. Otherwise, the specified run is ended.
|
|
@@ -210,6 +247,7 @@ class ExperimentTracking(mixins.SerializableSessionMixin):
|
|
|
210
247
|
self._run = None
|
|
211
248
|
self._print_urls(experiment_name=experiment_name, run_name=run_name)
|
|
212
249
|
|
|
250
|
+
@_restore_session
|
|
213
251
|
def delete_run(
|
|
214
252
|
self,
|
|
215
253
|
run_name: str,
|
|
@@ -248,6 +286,7 @@ class ExperimentTracking(mixins.SerializableSessionMixin):
|
|
|
248
286
|
"""
|
|
249
287
|
self.log_metrics(metrics={key: value}, step=step)
|
|
250
288
|
|
|
289
|
+
@_restore_session
|
|
251
290
|
def log_metrics(
|
|
252
291
|
self,
|
|
253
292
|
metrics: dict[str, float],
|
|
@@ -284,6 +323,7 @@ class ExperimentTracking(mixins.SerializableSessionMixin):
|
|
|
284
323
|
"""
|
|
285
324
|
self.log_params({key: value})
|
|
286
325
|
|
|
326
|
+
@_restore_session
|
|
287
327
|
def log_params(
|
|
288
328
|
self,
|
|
289
329
|
params: dict[str, Any],
|
|
@@ -305,6 +345,7 @@ class ExperimentTracking(mixins.SerializableSessionMixin):
|
|
|
305
345
|
params=json.dumps([param.to_dict() for param in params_list]),
|
|
306
346
|
)
|
|
307
347
|
|
|
348
|
+
@_restore_session
|
|
308
349
|
def log_artifact(
|
|
309
350
|
self,
|
|
310
351
|
local_path: str,
|
|
@@ -328,6 +369,7 @@ class ExperimentTracking(mixins.SerializableSessionMixin):
|
|
|
328
369
|
file_path=file_path,
|
|
329
370
|
)
|
|
330
371
|
|
|
372
|
+
@_restore_session
|
|
331
373
|
def list_artifacts(
|
|
332
374
|
self,
|
|
333
375
|
run_name: str,
|
|
@@ -356,6 +398,7 @@ class ExperimentTracking(mixins.SerializableSessionMixin):
|
|
|
356
398
|
artifact_path=artifact_path or "",
|
|
357
399
|
)
|
|
358
400
|
|
|
401
|
+
@_restore_session
|
|
359
402
|
def download_artifacts(
|
|
360
403
|
self,
|
|
361
404
|
run_name: str,
|
|
@@ -397,6 +440,7 @@ class ExperimentTracking(mixins.SerializableSessionMixin):
|
|
|
397
440
|
return self._run
|
|
398
441
|
return self.start_run()
|
|
399
442
|
|
|
443
|
+
@_restore_session
|
|
400
444
|
def _generate_run_name(self, experiment: entities.Experiment) -> sql_identifier.SqlIdentifier:
|
|
401
445
|
generator = hrid_generator.HRID16()
|
|
402
446
|
existing_runs = self._sql_client.show_runs_in_experiment(experiment_name=experiment.name)
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import os
|
|
3
|
+
import re
|
|
3
4
|
import sys
|
|
4
5
|
from math import ceil
|
|
5
6
|
from pathlib import PurePath
|
|
@@ -10,6 +11,8 @@ from snowflake.ml._internal.utils import snowflake_env
|
|
|
10
11
|
from snowflake.ml.jobs._utils import constants, feature_flags, query_helper, types
|
|
11
12
|
from snowflake.ml.jobs._utils.runtime_env_utils import RuntimeEnvironmentsDict
|
|
12
13
|
|
|
14
|
+
_OCI_TAG_REGEX = re.compile("^[a-zA-Z0-9._-]{1,128}$")
|
|
15
|
+
|
|
13
16
|
|
|
14
17
|
def _get_node_resources(session: snowpark.Session, compute_pool: str) -> types.ComputeResources:
|
|
15
18
|
"""Extract resource information for the specified compute pool"""
|
|
@@ -56,22 +59,55 @@ def _get_runtime_image(session: snowpark.Session, target_hardware: Literal["CPU"
|
|
|
56
59
|
return selected_runtime.runtime_container_image if selected_runtime else None
|
|
57
60
|
|
|
58
61
|
|
|
59
|
-
def
|
|
62
|
+
def _check_image_tag_valid(tag: Optional[str]) -> bool:
|
|
63
|
+
if tag is None:
|
|
64
|
+
return False
|
|
65
|
+
|
|
66
|
+
return _OCI_TAG_REGEX.fullmatch(tag) is not None
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _get_image_spec(
|
|
70
|
+
session: snowpark.Session, compute_pool: str, runtime_environment: Optional[str] = None
|
|
71
|
+
) -> types.ImageSpec:
|
|
72
|
+
"""
|
|
73
|
+
Resolve image specification (container image and resources) for the job.
|
|
74
|
+
|
|
75
|
+
Behavior:
|
|
76
|
+
- If `runtime_environment` is empty or the feature flag is disabled, use the
|
|
77
|
+
default image tag and image name.
|
|
78
|
+
- If `runtime_environment` is a valid image tag, use that tag with the default
|
|
79
|
+
repository/name.
|
|
80
|
+
- If `runtime_environment` is a full image URL, use it directly.
|
|
81
|
+
- If the feature flag is enabled and `runtime_environment` is not provided,
|
|
82
|
+
select an ML Runtime image matching the local Python major.minor
|
|
83
|
+
- When multiple inputs are provided, `runtime_environment` takes priority.
|
|
84
|
+
|
|
85
|
+
Args:
|
|
86
|
+
session: Snowflake session.
|
|
87
|
+
compute_pool: Compute pool used to infer CPU/GPU resources.
|
|
88
|
+
runtime_environment: Optional image tag or full image URL to override.
|
|
89
|
+
|
|
90
|
+
Returns:
|
|
91
|
+
Image spec including container image and resource requests/limits.
|
|
92
|
+
"""
|
|
60
93
|
# Retrieve compute pool node resources
|
|
61
94
|
resources = _get_node_resources(session, compute_pool=compute_pool)
|
|
95
|
+
hardware = "GPU" if resources.gpu > 0 else "CPU"
|
|
96
|
+
image_tag = _get_runtime_image_tag()
|
|
97
|
+
image_repo = constants.DEFAULT_IMAGE_REPO
|
|
98
|
+
image_name = constants.DEFAULT_IMAGE_GPU if resources.gpu > 0 else constants.DEFAULT_IMAGE_CPU
|
|
62
99
|
|
|
63
100
|
# Use MLRuntime image
|
|
64
|
-
hardware = "GPU" if resources.gpu > 0 else "CPU"
|
|
65
101
|
container_image = None
|
|
66
|
-
if
|
|
102
|
+
if runtime_environment:
|
|
103
|
+
if _check_image_tag_valid(runtime_environment):
|
|
104
|
+
image_tag = runtime_environment
|
|
105
|
+
else:
|
|
106
|
+
container_image = runtime_environment
|
|
107
|
+
elif feature_flags.FeatureFlags.ENABLE_IMAGE_VERSION_ENV_VAR.is_enabled():
|
|
67
108
|
container_image = _get_runtime_image(session, hardware) # type: ignore[arg-type]
|
|
68
109
|
|
|
69
|
-
|
|
70
|
-
image_repo = constants.DEFAULT_IMAGE_REPO
|
|
71
|
-
image_name = constants.DEFAULT_IMAGE_GPU if resources.gpu > 0 else constants.DEFAULT_IMAGE_CPU
|
|
72
|
-
image_tag = _get_runtime_image_tag()
|
|
73
|
-
container_image = f"{image_repo}/{image_name}:{image_tag}"
|
|
74
|
-
|
|
110
|
+
container_image = container_image or f"{image_repo}/{image_name}:{image_tag}"
|
|
75
111
|
# TODO: Should each instance consume the entire pod?
|
|
76
112
|
return types.ImageSpec(
|
|
77
113
|
resource_requests=resources,
|
|
@@ -127,6 +163,7 @@ def generate_service_spec(
|
|
|
127
163
|
target_instances: int = 1,
|
|
128
164
|
min_instances: int = 1,
|
|
129
165
|
enable_metrics: bool = False,
|
|
166
|
+
runtime_environment: Optional[str] = None,
|
|
130
167
|
) -> dict[str, Any]:
|
|
131
168
|
"""
|
|
132
169
|
Generate a service specification for a job.
|
|
@@ -139,11 +176,12 @@ def generate_service_spec(
|
|
|
139
176
|
target_instances: Number of instances for multi-node job
|
|
140
177
|
enable_metrics: Enable platform metrics for the job
|
|
141
178
|
min_instances: Minimum number of instances required to start the job
|
|
179
|
+
runtime_environment: The runtime image to use. Only support image tag or full image URL.
|
|
142
180
|
|
|
143
181
|
Returns:
|
|
144
182
|
Job service specification
|
|
145
183
|
"""
|
|
146
|
-
image_spec = _get_image_spec(session, compute_pool)
|
|
184
|
+
image_spec = _get_image_spec(session, compute_pool, runtime_environment)
|
|
147
185
|
|
|
148
186
|
# Set resource requests/limits, including nvidia.com/gpu quantity if applicable
|
|
149
187
|
resource_requests: dict[str, Union[str, int]] = {
|
|
@@ -317,7 +355,7 @@ def merge_patch(base: Any, patch: Any, display_name: str = "") -> Any:
|
|
|
317
355
|
Returns:
|
|
318
356
|
The patched object.
|
|
319
357
|
"""
|
|
320
|
-
if
|
|
358
|
+
if type(base) is not type(patch):
|
|
321
359
|
if base is not None:
|
|
322
360
|
logging.warning(f"Type mismatch while merging {display_name} (base={type(base)}, patch={type(patch)})")
|
|
323
361
|
return patch
|
snowflake/ml/jobs/manager.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import logging
|
|
3
3
|
import pathlib
|
|
4
|
+
import sys
|
|
4
5
|
import textwrap
|
|
5
6
|
from pathlib import PurePath
|
|
6
7
|
from typing import Any, Callable, Optional, TypeVar, Union, cast, overload
|
|
@@ -344,6 +345,9 @@ def submit_from_stage(
|
|
|
344
345
|
query_warehouse (str): The query warehouse to use. Defaults to session warehouse.
|
|
345
346
|
spec_overrides (dict): A dictionary of overrides for the service spec.
|
|
346
347
|
imports (list[Union[tuple[str, str], tuple[str]]]): A list of additional payloads used in the job.
|
|
348
|
+
runtime_environment (str): The runtime image to use. Only support image tag or full image URL,
|
|
349
|
+
e.g. "1.7.1" or "image_repo/image_name:image_tag". When it refers to a full image URL,
|
|
350
|
+
it should contain image repository, image name and image tag.
|
|
347
351
|
|
|
348
352
|
Returns:
|
|
349
353
|
An object representing the submitted job.
|
|
@@ -409,6 +413,7 @@ def _submit_job(
|
|
|
409
413
|
"min_instances",
|
|
410
414
|
"enable_metrics",
|
|
411
415
|
"query_warehouse",
|
|
416
|
+
"runtime_environment",
|
|
412
417
|
],
|
|
413
418
|
)
|
|
414
419
|
def _submit_job(
|
|
@@ -459,6 +464,9 @@ def _submit_job(
|
|
|
459
464
|
)
|
|
460
465
|
imports = kwargs.pop("additional_payloads")
|
|
461
466
|
|
|
467
|
+
if "runtime_environment" in kwargs:
|
|
468
|
+
logger.warning("'runtime_environment' is in private preview since 1.15.0, do not use it in production.")
|
|
469
|
+
|
|
462
470
|
# Use kwargs for less common optional parameters
|
|
463
471
|
database = kwargs.pop("database", None)
|
|
464
472
|
schema = kwargs.pop("schema", None)
|
|
@@ -470,6 +478,7 @@ def _submit_job(
|
|
|
470
478
|
enable_metrics = kwargs.pop("enable_metrics", True)
|
|
471
479
|
query_warehouse = kwargs.pop("query_warehouse", session.get_current_warehouse())
|
|
472
480
|
imports = kwargs.pop("imports", None) or imports
|
|
481
|
+
runtime_environment = kwargs.pop("runtime_environment", None)
|
|
473
482
|
|
|
474
483
|
# Warn if there are unknown kwargs
|
|
475
484
|
if kwargs:
|
|
@@ -544,6 +553,7 @@ def _submit_job(
|
|
|
544
553
|
min_instances=min_instances,
|
|
545
554
|
enable_metrics=enable_metrics,
|
|
546
555
|
use_async=True,
|
|
556
|
+
runtime_environment=runtime_environment,
|
|
547
557
|
)
|
|
548
558
|
|
|
549
559
|
# Fall back to v1
|
|
@@ -556,6 +566,7 @@ def _submit_job(
|
|
|
556
566
|
target_instances=target_instances,
|
|
557
567
|
min_instances=min_instances,
|
|
558
568
|
enable_metrics=enable_metrics,
|
|
569
|
+
runtime_environment=runtime_environment,
|
|
559
570
|
)
|
|
560
571
|
|
|
561
572
|
# Generate spec overrides
|
|
@@ -639,6 +650,7 @@ def _do_submit_job_v2(
|
|
|
639
650
|
min_instances: int = 1,
|
|
640
651
|
enable_metrics: bool = True,
|
|
641
652
|
use_async: bool = True,
|
|
653
|
+
runtime_environment: Optional[str] = None,
|
|
642
654
|
) -> jb.MLJob[Any]:
|
|
643
655
|
"""
|
|
644
656
|
Generate the SQL query for job submission.
|
|
@@ -657,6 +669,7 @@ def _do_submit_job_v2(
|
|
|
657
669
|
min_instances: Minimum number of instances required to start the job.
|
|
658
670
|
enable_metrics: Whether to enable platform metrics for the job.
|
|
659
671
|
use_async: Whether to run the job asynchronously.
|
|
672
|
+
runtime_environment: image tag or full image URL to use for the job.
|
|
660
673
|
|
|
661
674
|
Returns:
|
|
662
675
|
The job object.
|
|
@@ -672,6 +685,13 @@ def _do_submit_job_v2(
|
|
|
672
685
|
"ENABLE_METRICS": enable_metrics,
|
|
673
686
|
"SPEC_OVERRIDES": spec_overrides,
|
|
674
687
|
}
|
|
688
|
+
# for the image tag or full image URL, we use that directly
|
|
689
|
+
if runtime_environment:
|
|
690
|
+
spec_options["RUNTIME"] = runtime_environment
|
|
691
|
+
elif feature_flags.FeatureFlags.ENABLE_IMAGE_VERSION_ENV_VAR.is_enabled():
|
|
692
|
+
# when feature flag is enabled, we get the local python version and wrap it in a dict
|
|
693
|
+
# in system function, we can know whether it is python version or image tag or full image URL through the format
|
|
694
|
+
spec_options["RUNTIME"] = json.dumps({"pythonVersion": f"{sys.version_info.major}.{sys.version_info.minor}"})
|
|
675
695
|
job_options = {
|
|
676
696
|
"EXTERNAL_ACCESS_INTEGRATIONS": external_access_integrations,
|
|
677
697
|
"QUERY_WAREHOUSE": query_warehouse,
|
snowflake/ml/model/__init__.py
CHANGED
|
@@ -1,6 +1,20 @@
|
|
|
1
|
-
from snowflake.ml.model._client.model.batch_inference_specs import
|
|
1
|
+
from snowflake.ml.model._client.model.batch_inference_specs import (
|
|
2
|
+
JobSpec,
|
|
3
|
+
OutputSpec,
|
|
4
|
+
SaveMode,
|
|
5
|
+
)
|
|
2
6
|
from snowflake.ml.model._client.model.model_impl import Model
|
|
3
7
|
from snowflake.ml.model._client.model.model_version_impl import ExportMode, ModelVersion
|
|
4
8
|
from snowflake.ml.model.models.huggingface_pipeline import HuggingFacePipelineModel
|
|
9
|
+
from snowflake.ml.model.volatility import Volatility
|
|
5
10
|
|
|
6
|
-
__all__ = [
|
|
11
|
+
__all__ = [
|
|
12
|
+
"Model",
|
|
13
|
+
"ModelVersion",
|
|
14
|
+
"ExportMode",
|
|
15
|
+
"HuggingFacePipelineModel",
|
|
16
|
+
"JobSpec",
|
|
17
|
+
"OutputSpec",
|
|
18
|
+
"SaveMode",
|
|
19
|
+
"Volatility",
|
|
20
|
+
]
|
|
@@ -1,10 +1,26 @@
|
|
|
1
|
-
from
|
|
1
|
+
from enum import Enum
|
|
2
|
+
from typing import Optional
|
|
2
3
|
|
|
3
4
|
from pydantic import BaseModel
|
|
4
5
|
|
|
5
6
|
|
|
7
|
+
class SaveMode(str, Enum):
|
|
8
|
+
"""Save mode options for batch inference output.
|
|
9
|
+
|
|
10
|
+
Determines the behavior when files already exist in the output location.
|
|
11
|
+
|
|
12
|
+
OVERWRITE: Remove existing files and write new results.
|
|
13
|
+
|
|
14
|
+
ERROR: Raise an error if files already exist in the output location.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
OVERWRITE = "overwrite"
|
|
18
|
+
ERROR = "error"
|
|
19
|
+
|
|
20
|
+
|
|
6
21
|
class OutputSpec(BaseModel):
|
|
7
22
|
stage_location: str
|
|
23
|
+
mode: SaveMode = SaveMode.ERROR
|
|
8
24
|
|
|
9
25
|
|
|
10
26
|
class JobSpec(BaseModel):
|
|
@@ -12,10 +28,10 @@ class JobSpec(BaseModel):
|
|
|
12
28
|
job_name: Optional[str] = None
|
|
13
29
|
num_workers: Optional[int] = None
|
|
14
30
|
function_name: Optional[str] = None
|
|
15
|
-
gpu: Optional[Union[str, int]] = None
|
|
16
31
|
force_rebuild: bool = False
|
|
17
32
|
max_batch_rows: int = 1024
|
|
18
33
|
warehouse: Optional[str] = None
|
|
19
34
|
cpu_requests: Optional[str] = None
|
|
20
35
|
memory_requests: Optional[str] = None
|
|
36
|
+
gpu_requests: Optional[str] = None
|
|
21
37
|
replicas: Optional[int] = None
|
|
@@ -551,6 +551,8 @@ class ModelVersion(lineage_node.LineageNode):
|
|
|
551
551
|
subproject=_TELEMETRY_SUBPROJECT,
|
|
552
552
|
func_params_to_log=[
|
|
553
553
|
"compute_pool",
|
|
554
|
+
"output_spec",
|
|
555
|
+
"job_spec",
|
|
554
556
|
],
|
|
555
557
|
)
|
|
556
558
|
def _run_batch(
|
|
@@ -579,6 +581,8 @@ class ModelVersion(lineage_node.LineageNode):
|
|
|
579
581
|
output_stage_location += "/"
|
|
580
582
|
input_stage_location = f"{output_stage_location}{_BATCH_INFERENCE_TEMPORARY_FOLDER}/"
|
|
581
583
|
|
|
584
|
+
self._service_ops._enforce_save_mode(output_spec.mode, output_stage_location)
|
|
585
|
+
|
|
582
586
|
try:
|
|
583
587
|
input_spec.write.copy_into_location(location=input_stage_location, file_format_type="parquet", header=True)
|
|
584
588
|
# todo: be specific about the type of errors to provide better error messages.
|
|
@@ -605,6 +609,7 @@ class ModelVersion(lineage_node.LineageNode):
|
|
|
605
609
|
warehouse=sql_identifier.SqlIdentifier(warehouse),
|
|
606
610
|
cpu_requests=job_spec.cpu_requests,
|
|
607
611
|
memory_requests=job_spec.memory_requests,
|
|
612
|
+
gpu_requests=job_spec.gpu_requests,
|
|
608
613
|
job_name=job_name,
|
|
609
614
|
replicas=job_spec.replicas,
|
|
610
615
|
# input and output
|
|
@@ -7,6 +7,7 @@ import re
|
|
|
7
7
|
import tempfile
|
|
8
8
|
import threading
|
|
9
9
|
import time
|
|
10
|
+
import warnings
|
|
10
11
|
from typing import Any, Optional, Union, cast
|
|
11
12
|
|
|
12
13
|
from snowflake import snowpark
|
|
@@ -14,6 +15,7 @@ from snowflake.ml import jobs
|
|
|
14
15
|
from snowflake.ml._internal import file_utils, platform_capabilities as pc
|
|
15
16
|
from snowflake.ml._internal.utils import identifier, service_logger, sql_identifier
|
|
16
17
|
from snowflake.ml.model import inference_engine as inference_engine_module, type_hints
|
|
18
|
+
from snowflake.ml.model._client.model import batch_inference_specs
|
|
17
19
|
from snowflake.ml.model._client.service import model_deployment_spec
|
|
18
20
|
from snowflake.ml.model._client.sql import service as service_sql, stage as stage_sql
|
|
19
21
|
from snowflake.snowpark import async_job, exceptions, row, session
|
|
@@ -155,17 +157,17 @@ class ServiceOperator:
|
|
|
155
157
|
database_name=database_name,
|
|
156
158
|
schema_name=schema_name,
|
|
157
159
|
)
|
|
160
|
+
self._stage_client = stage_sql.StageSQLClient(
|
|
161
|
+
session,
|
|
162
|
+
database_name=database_name,
|
|
163
|
+
schema_name=schema_name,
|
|
164
|
+
)
|
|
158
165
|
self._use_inlined_deployment_spec = pc.PlatformCapabilities.get_instance().is_inlined_deployment_spec_enabled()
|
|
159
166
|
if self._use_inlined_deployment_spec:
|
|
160
167
|
self._workspace = None
|
|
161
168
|
self._model_deployment_spec = model_deployment_spec.ModelDeploymentSpec()
|
|
162
169
|
else:
|
|
163
170
|
self._workspace = tempfile.TemporaryDirectory()
|
|
164
|
-
self._stage_client = stage_sql.StageSQLClient(
|
|
165
|
-
session,
|
|
166
|
-
database_name=database_name,
|
|
167
|
-
schema_name=schema_name,
|
|
168
|
-
)
|
|
169
171
|
self._model_deployment_spec = model_deployment_spec.ModelDeploymentSpec(
|
|
170
172
|
workspace_path=pathlib.Path(self._workspace.name)
|
|
171
173
|
)
|
|
@@ -651,6 +653,47 @@ class ServiceOperator:
|
|
|
651
653
|
else:
|
|
652
654
|
module_logger.warning(f"Service {service.display_service_name} is done, but not transitioning.")
|
|
653
655
|
|
|
656
|
+
def _enforce_save_mode(self, output_mode: batch_inference_specs.SaveMode, output_stage_location: str) -> None:
|
|
657
|
+
"""Enforce the save mode for the output stage location.
|
|
658
|
+
|
|
659
|
+
Args:
|
|
660
|
+
output_mode: The output mode
|
|
661
|
+
output_stage_location: The output stage location to check/clean.
|
|
662
|
+
|
|
663
|
+
Raises:
|
|
664
|
+
FileExistsError: When ERROR mode is specified and files exist in the output location.
|
|
665
|
+
RuntimeError: When operations fail (checking files or removing files).
|
|
666
|
+
ValueError: When an invalid SaveMode is specified.
|
|
667
|
+
"""
|
|
668
|
+
list_results = self._stage_client.list_stage(output_stage_location)
|
|
669
|
+
|
|
670
|
+
if output_mode == batch_inference_specs.SaveMode.ERROR:
|
|
671
|
+
if len(list_results) > 0:
|
|
672
|
+
raise FileExistsError(
|
|
673
|
+
f"Output stage location '{output_stage_location}' is not empty. "
|
|
674
|
+
f"Found {len(list_results)} existing files. When using ERROR mode, the output location "
|
|
675
|
+
f"must be empty. Please clear the existing files or use OVERWRITE mode."
|
|
676
|
+
)
|
|
677
|
+
elif output_mode == batch_inference_specs.SaveMode.OVERWRITE:
|
|
678
|
+
if len(list_results) > 0:
|
|
679
|
+
warnings.warn(
|
|
680
|
+
f"Output stage location '{output_stage_location}' is not empty. "
|
|
681
|
+
f"Found {len(list_results)} existing files. OVERWRITE mode will remove all existing files "
|
|
682
|
+
f"in the output location before running the batch inference job.",
|
|
683
|
+
stacklevel=2,
|
|
684
|
+
)
|
|
685
|
+
try:
|
|
686
|
+
self._session.sql(f"REMOVE {output_stage_location}").collect()
|
|
687
|
+
except Exception as e:
|
|
688
|
+
raise RuntimeError(
|
|
689
|
+
f"OVERWRITE was specified. However, failed to remove existing files in output stage "
|
|
690
|
+
f"{output_stage_location}: {e}. Please clear up the existing files manually and retry "
|
|
691
|
+
f"the operation."
|
|
692
|
+
)
|
|
693
|
+
else:
|
|
694
|
+
valid_modes = list(batch_inference_specs.SaveMode)
|
|
695
|
+
raise ValueError(f"Invalid SaveMode: {output_mode}. Must be one of {valid_modes}")
|
|
696
|
+
|
|
654
697
|
def _stream_service_logs(
|
|
655
698
|
self,
|
|
656
699
|
async_job: snowpark.AsyncJob,
|
|
@@ -927,6 +970,7 @@ class ServiceOperator:
|
|
|
927
970
|
max_batch_rows: Optional[int],
|
|
928
971
|
cpu_requests: Optional[str],
|
|
929
972
|
memory_requests: Optional[str],
|
|
973
|
+
gpu_requests: Optional[str],
|
|
930
974
|
replicas: Optional[int],
|
|
931
975
|
statement_params: Optional[dict[str, Any]] = None,
|
|
932
976
|
) -> jobs.MLJob[Any]:
|
|
@@ -961,6 +1005,7 @@ class ServiceOperator:
|
|
|
961
1005
|
warehouse=warehouse,
|
|
962
1006
|
cpu=cpu_requests,
|
|
963
1007
|
memory=memory_requests,
|
|
1008
|
+
gpu=gpu_requests,
|
|
964
1009
|
replicas=replicas,
|
|
965
1010
|
)
|
|
966
1011
|
|
|
@@ -204,7 +204,7 @@ class ModelDeploymentSpec:
|
|
|
204
204
|
job_schema_name: Optional[sql_identifier.SqlIdentifier] = None,
|
|
205
205
|
cpu: Optional[str] = None,
|
|
206
206
|
memory: Optional[str] = None,
|
|
207
|
-
gpu: Optional[
|
|
207
|
+
gpu: Optional[str] = None,
|
|
208
208
|
num_workers: Optional[int] = None,
|
|
209
209
|
max_batch_rows: Optional[int] = None,
|
|
210
210
|
replicas: Optional[int] = None,
|