snowflake-ml-python 1.15.0__py3-none-any.whl → 1.17.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/human_readable_id/adjectives.txt +5 -5
- snowflake/ml/_internal/human_readable_id/animals.txt +3 -3
- snowflake/ml/_internal/platform_capabilities.py +4 -0
- snowflake/ml/_internal/utils/mixins.py +24 -9
- snowflake/ml/experiment/experiment_tracking.py +63 -19
- snowflake/ml/jobs/__init__.py +4 -0
- snowflake/ml/jobs/_interop/__init__.py +0 -0
- snowflake/ml/jobs/_interop/data_utils.py +124 -0
- snowflake/ml/jobs/_interop/dto_schema.py +95 -0
- snowflake/ml/jobs/{_utils/interop_utils.py → _interop/exception_utils.py} +49 -178
- snowflake/ml/jobs/_interop/legacy.py +225 -0
- snowflake/ml/jobs/_interop/protocols.py +471 -0
- snowflake/ml/jobs/_interop/results.py +51 -0
- snowflake/ml/jobs/_interop/utils.py +144 -0
- snowflake/ml/jobs/_utils/constants.py +4 -1
- snowflake/ml/jobs/_utils/feature_flags.py +37 -5
- snowflake/ml/jobs/_utils/payload_utils.py +1 -1
- snowflake/ml/jobs/_utils/scripts/mljob_launcher.py +139 -102
- snowflake/ml/jobs/_utils/spec_utils.py +50 -11
- snowflake/ml/jobs/_utils/types.py +10 -0
- snowflake/ml/jobs/job.py +168 -36
- snowflake/ml/jobs/manager.py +54 -36
- snowflake/ml/model/__init__.py +16 -2
- snowflake/ml/model/_client/model/batch_inference_specs.py +18 -2
- snowflake/ml/model/_client/model/model_version_impl.py +44 -7
- snowflake/ml/model/_client/ops/model_ops.py +4 -0
- snowflake/ml/model/_client/ops/service_ops.py +50 -5
- snowflake/ml/model/_client/service/model_deployment_spec.py +1 -1
- snowflake/ml/model/_client/sql/model_version.py +3 -1
- snowflake/ml/model/_client/sql/stage.py +8 -0
- snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +1 -0
- snowflake/ml/model/_model_composer/model_method/model_method.py +32 -4
- snowflake/ml/model/_model_composer/model_method/utils.py +28 -0
- snowflake/ml/model/_packager/model_env/model_env.py +48 -21
- snowflake/ml/model/_packager/model_meta/model_meta.py +8 -0
- snowflake/ml/model/_packager/model_meta/model_meta_schema.py +1 -0
- snowflake/ml/model/_packager/model_runtime/_snowml_inference_alternative_requirements.py +3 -3
- snowflake/ml/model/type_hints.py +13 -0
- snowflake/ml/model/volatility.py +34 -0
- snowflake/ml/modeling/_internal/snowpark_implementations/xgboost_external_memory_trainer.py +5 -5
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +1 -1
- snowflake/ml/modeling/cluster/affinity_propagation.py +1 -1
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +1 -1
- snowflake/ml/modeling/cluster/birch.py +1 -1
- snowflake/ml/modeling/cluster/bisecting_k_means.py +1 -1
- snowflake/ml/modeling/cluster/dbscan.py +1 -1
- snowflake/ml/modeling/cluster/feature_agglomeration.py +1 -1
- snowflake/ml/modeling/cluster/k_means.py +1 -1
- snowflake/ml/modeling/cluster/mean_shift.py +1 -1
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +1 -1
- snowflake/ml/modeling/cluster/optics.py +1 -1
- snowflake/ml/modeling/cluster/spectral_biclustering.py +1 -1
- snowflake/ml/modeling/cluster/spectral_clustering.py +1 -1
- snowflake/ml/modeling/cluster/spectral_coclustering.py +1 -1
- snowflake/ml/modeling/compose/column_transformer.py +1 -1
- snowflake/ml/modeling/compose/transformed_target_regressor.py +1 -1
- snowflake/ml/modeling/covariance/elliptic_envelope.py +1 -1
- snowflake/ml/modeling/covariance/empirical_covariance.py +1 -1
- snowflake/ml/modeling/covariance/graphical_lasso.py +1 -1
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +1 -1
- snowflake/ml/modeling/covariance/ledoit_wolf.py +1 -1
- snowflake/ml/modeling/covariance/min_cov_det.py +1 -1
- snowflake/ml/modeling/covariance/oas.py +1 -1
- snowflake/ml/modeling/covariance/shrunk_covariance.py +1 -1
- snowflake/ml/modeling/decomposition/dictionary_learning.py +1 -1
- snowflake/ml/modeling/decomposition/factor_analysis.py +1 -1
- snowflake/ml/modeling/decomposition/fast_ica.py +1 -1
- snowflake/ml/modeling/decomposition/incremental_pca.py +1 -1
- snowflake/ml/modeling/decomposition/kernel_pca.py +1 -1
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +1 -1
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +1 -1
- snowflake/ml/modeling/decomposition/pca.py +1 -1
- snowflake/ml/modeling/decomposition/sparse_pca.py +1 -1
- snowflake/ml/modeling/decomposition/truncated_svd.py +1 -1
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +1 -1
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +1 -1
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +1 -1
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +1 -1
- snowflake/ml/modeling/ensemble/bagging_classifier.py +1 -1
- snowflake/ml/modeling/ensemble/bagging_regressor.py +1 -1
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +1 -1
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +1 -1
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +1 -1
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +1 -1
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +1 -1
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +1 -1
- snowflake/ml/modeling/ensemble/isolation_forest.py +1 -1
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +1 -1
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +1 -1
- snowflake/ml/modeling/ensemble/stacking_regressor.py +1 -1
- snowflake/ml/modeling/ensemble/voting_classifier.py +1 -1
- snowflake/ml/modeling/ensemble/voting_regressor.py +1 -1
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +1 -1
- snowflake/ml/modeling/feature_selection/select_fdr.py +1 -1
- snowflake/ml/modeling/feature_selection/select_fpr.py +1 -1
- snowflake/ml/modeling/feature_selection/select_fwe.py +1 -1
- snowflake/ml/modeling/feature_selection/select_k_best.py +1 -1
- snowflake/ml/modeling/feature_selection/select_percentile.py +1 -1
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +1 -1
- snowflake/ml/modeling/feature_selection/variance_threshold.py +1 -1
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +1 -1
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +1 -1
- snowflake/ml/modeling/impute/iterative_imputer.py +1 -1
- snowflake/ml/modeling/impute/knn_imputer.py +1 -1
- snowflake/ml/modeling/impute/missing_indicator.py +1 -1
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +1 -1
- snowflake/ml/modeling/kernel_approximation/nystroem.py +1 -1
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +1 -1
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +1 -1
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +1 -1
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +1 -1
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +1 -1
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +1 -1
- snowflake/ml/modeling/linear_model/ard_regression.py +1 -1
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +1 -1
- snowflake/ml/modeling/linear_model/elastic_net.py +1 -1
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +1 -1
- snowflake/ml/modeling/linear_model/gamma_regressor.py +1 -1
- snowflake/ml/modeling/linear_model/huber_regressor.py +1 -1
- snowflake/ml/modeling/linear_model/lars.py +1 -1
- snowflake/ml/modeling/linear_model/lars_cv.py +1 -1
- snowflake/ml/modeling/linear_model/lasso.py +1 -1
- snowflake/ml/modeling/linear_model/lasso_cv.py +1 -1
- snowflake/ml/modeling/linear_model/lasso_lars.py +1 -1
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +1 -1
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +1 -1
- snowflake/ml/modeling/linear_model/linear_regression.py +1 -1
- snowflake/ml/modeling/linear_model/logistic_regression.py +1 -1
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +1 -1
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +1 -1
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +1 -1
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +1 -1
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +1 -1
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +1 -1
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +1 -1
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +1 -1
- snowflake/ml/modeling/linear_model/perceptron.py +1 -1
- snowflake/ml/modeling/linear_model/poisson_regressor.py +1 -1
- snowflake/ml/modeling/linear_model/ransac_regressor.py +1 -1
- snowflake/ml/modeling/linear_model/ridge.py +1 -1
- snowflake/ml/modeling/linear_model/ridge_classifier.py +1 -1
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +1 -1
- snowflake/ml/modeling/linear_model/ridge_cv.py +1 -1
- snowflake/ml/modeling/linear_model/sgd_classifier.py +1 -1
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +1 -1
- snowflake/ml/modeling/linear_model/sgd_regressor.py +1 -1
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +1 -1
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +1 -1
- snowflake/ml/modeling/manifold/isomap.py +1 -1
- snowflake/ml/modeling/manifold/mds.py +1 -1
- snowflake/ml/modeling/manifold/spectral_embedding.py +1 -1
- snowflake/ml/modeling/manifold/tsne.py +1 -1
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +1 -1
- snowflake/ml/modeling/mixture/gaussian_mixture.py +1 -1
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +1 -1
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +1 -1
- snowflake/ml/modeling/multiclass/output_code_classifier.py +1 -1
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +1 -1
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +1 -1
- snowflake/ml/modeling/naive_bayes/complement_nb.py +1 -1
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +1 -1
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +1 -1
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +1 -1
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +1 -1
- snowflake/ml/modeling/neighbors/kernel_density.py +1 -1
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +1 -1
- snowflake/ml/modeling/neighbors/nearest_centroid.py +1 -1
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +1 -1
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +1 -1
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +1 -1
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +1 -1
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +1 -1
- snowflake/ml/modeling/neural_network/mlp_classifier.py +1 -1
- snowflake/ml/modeling/neural_network/mlp_regressor.py +1 -1
- snowflake/ml/modeling/preprocessing/polynomial_features.py +1 -1
- snowflake/ml/modeling/semi_supervised/label_propagation.py +1 -1
- snowflake/ml/modeling/semi_supervised/label_spreading.py +1 -1
- snowflake/ml/modeling/svm/linear_svc.py +1 -1
- snowflake/ml/modeling/svm/linear_svr.py +1 -1
- snowflake/ml/modeling/svm/nu_svc.py +1 -1
- snowflake/ml/modeling/svm/nu_svr.py +1 -1
- snowflake/ml/modeling/svm/svc.py +1 -1
- snowflake/ml/modeling/svm/svr.py +1 -1
- snowflake/ml/modeling/tree/decision_tree_classifier.py +1 -1
- snowflake/ml/modeling/tree/decision_tree_regressor.py +1 -1
- snowflake/ml/modeling/tree/extra_tree_classifier.py +1 -1
- snowflake/ml/modeling/tree/extra_tree_regressor.py +1 -1
- snowflake/ml/modeling/xgboost/xgb_classifier.py +1 -1
- snowflake/ml/modeling/xgboost/xgb_regressor.py +1 -1
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +1 -1
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +1 -1
- snowflake/ml/registry/_manager/model_manager.py +1 -0
- snowflake/ml/registry/_manager/model_parameter_reconciler.py +27 -0
- snowflake/ml/registry/registry.py +15 -0
- snowflake/ml/utils/authentication.py +16 -0
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.15.0.dist-info → snowflake_ml_python-1.17.0.dist-info}/METADATA +65 -5
- {snowflake_ml_python-1.15.0.dist-info → snowflake_ml_python-1.17.0.dist-info}/RECORD +201 -192
- {snowflake_ml_python-1.15.0.dist-info → snowflake_ml_python-1.17.0.dist-info}/WHEEL +0 -0
- {snowflake_ml_python-1.15.0.dist-info → snowflake_ml_python-1.17.0.dist-info}/licenses/LICENSE.txt +0 -0
- {snowflake_ml_python-1.15.0.dist-info → snowflake_ml_python-1.17.0.dist-info}/top_level.txt +0 -0
|
@@ -1,19 +1,12 @@
|
|
|
1
1
|
import builtins
|
|
2
2
|
import functools
|
|
3
3
|
import importlib
|
|
4
|
-
import json
|
|
5
|
-
import os
|
|
6
|
-
import pickle
|
|
7
4
|
import re
|
|
8
5
|
import sys
|
|
9
6
|
import traceback
|
|
10
7
|
from collections import namedtuple
|
|
11
|
-
from dataclasses import dataclass
|
|
12
8
|
from types import TracebackType
|
|
13
|
-
from typing import Any, Callable, Optional,
|
|
14
|
-
|
|
15
|
-
from snowflake import snowpark
|
|
16
|
-
from snowflake.snowpark import exceptions as sp_exceptions
|
|
9
|
+
from typing import Any, Callable, Optional, cast
|
|
17
10
|
|
|
18
11
|
_TRACEBACK_ENTRY_PATTERN = re.compile(
|
|
19
12
|
r'File "(?P<filename>[^"]+)", line (?P<lineno>\d+), in (?P<name>[^\n]+)(?:\n(?!^\s*File)^\s*(?P<line>[^\n]+))?\n',
|
|
@@ -21,175 +14,46 @@ _TRACEBACK_ENTRY_PATTERN = re.compile(
|
|
|
21
14
|
)
|
|
22
15
|
_REMOTE_ERROR_ATTR_NAME = "_remote_error"
|
|
23
16
|
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
@dataclass(frozen=True)
|
|
28
|
-
class ExecutionResult:
|
|
29
|
-
result: Any = None
|
|
30
|
-
exception: Optional[BaseException] = None
|
|
31
|
-
|
|
32
|
-
@property
|
|
33
|
-
def success(self) -> bool:
|
|
34
|
-
return self.exception is None
|
|
35
|
-
|
|
36
|
-
def to_dict(self) -> dict[str, Any]:
|
|
37
|
-
"""Return the serializable dictionary."""
|
|
38
|
-
if isinstance(self.exception, BaseException):
|
|
39
|
-
exc_type = type(self.exception)
|
|
40
|
-
return {
|
|
41
|
-
"success": False,
|
|
42
|
-
"exc_type": f"{exc_type.__module__}.{exc_type.__name__}",
|
|
43
|
-
"exc_value": self.exception,
|
|
44
|
-
"exc_tb": "".join(traceback.format_tb(self.exception.__traceback__)),
|
|
45
|
-
}
|
|
46
|
-
return {
|
|
47
|
-
"success": True,
|
|
48
|
-
"result_type": type(self.result).__qualname__,
|
|
49
|
-
"result": self.result,
|
|
50
|
-
}
|
|
51
|
-
|
|
52
|
-
@classmethod
|
|
53
|
-
def from_dict(cls, result_dict: dict[str, Any]) -> "ExecutionResult":
|
|
54
|
-
if not isinstance(result_dict.get("success"), bool):
|
|
55
|
-
raise ValueError("Invalid result dictionary")
|
|
56
|
-
|
|
57
|
-
if result_dict["success"]:
|
|
58
|
-
# Load successful result
|
|
59
|
-
return cls(result=result_dict.get("result"))
|
|
60
|
-
|
|
61
|
-
# Load exception
|
|
62
|
-
exc_type = result_dict.get("exc_type", "RuntimeError")
|
|
63
|
-
exc_value = result_dict.get("exc_value", "Unknown error")
|
|
64
|
-
exc_tb = result_dict.get("exc_tb", "")
|
|
65
|
-
return cls(exception=load_exception(exc_type, exc_value, exc_tb))
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
def fetch_result(session: snowpark.Session, result_path: str) -> ExecutionResult:
|
|
69
|
-
"""
|
|
70
|
-
Fetch the serialized result from the specified path.
|
|
17
|
+
RemoteErrorInfo = namedtuple("RemoteErrorInfo", ["exc_type", "exc_msg", "exc_tb"])
|
|
71
18
|
|
|
72
|
-
Args:
|
|
73
|
-
session: Snowpark Session to use for file operations.
|
|
74
|
-
result_path: The path to the serialized result file.
|
|
75
19
|
|
|
76
|
-
|
|
77
|
-
|
|
20
|
+
class RemoteError(RuntimeError):
|
|
21
|
+
"""Base exception for errors from remote execution environment which could not be reconstructed locally."""
|
|
78
22
|
|
|
79
|
-
Raises:
|
|
80
|
-
RuntimeError: If both pickle and JSON result retrieval fail.
|
|
81
|
-
"""
|
|
82
|
-
try:
|
|
83
|
-
# TODO: Check if file exists
|
|
84
|
-
with session.file.get_stream(result_path) as result_stream:
|
|
85
|
-
return ExecutionResult.from_dict(pickle.load(result_stream))
|
|
86
|
-
except (
|
|
87
|
-
sp_exceptions.SnowparkSQLException,
|
|
88
|
-
pickle.UnpicklingError,
|
|
89
|
-
TypeError,
|
|
90
|
-
ImportError,
|
|
91
|
-
AttributeError,
|
|
92
|
-
MemoryError,
|
|
93
|
-
) as pickle_error:
|
|
94
|
-
# Fall back to JSON result if loading pickled result fails for any reason
|
|
95
|
-
try:
|
|
96
|
-
result_json_path = os.path.splitext(result_path)[0] + ".json"
|
|
97
|
-
with session.file.get_stream(result_json_path) as result_stream:
|
|
98
|
-
return ExecutionResult.from_dict(json.load(result_stream))
|
|
99
|
-
except Exception as json_error:
|
|
100
|
-
# Both pickle and JSON failed - provide helpful error message
|
|
101
|
-
raise RuntimeError(_fetch_result_error_message(pickle_error, result_path, json_error)) from pickle_error
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
def _fetch_result_error_message(error: Exception, result_path: str, json_error: Optional[Exception] = None) -> str:
|
|
105
|
-
"""Create helpful error messages for common result retrieval failures."""
|
|
106
|
-
|
|
107
|
-
# Package import issues
|
|
108
|
-
if isinstance(error, ImportError):
|
|
109
|
-
return f"Failed to retrieve job result: Package not installed in your local environment. Error: {str(error)}"
|
|
110
|
-
|
|
111
|
-
# Package versions differ between runtime and local environment
|
|
112
|
-
if isinstance(error, AttributeError):
|
|
113
|
-
return f"Failed to retrieve job result: Package version mismatch. Error: {str(error)}"
|
|
114
|
-
|
|
115
|
-
# Serialization issues
|
|
116
|
-
if isinstance(error, TypeError):
|
|
117
|
-
return f"Failed to retrieve job result: Non-serializable objects were returned. Error: {str(error)}"
|
|
118
|
-
|
|
119
|
-
# Python version pickling incompatibility
|
|
120
|
-
if isinstance(error, pickle.UnpicklingError) and "protocol" in str(error).lower():
|
|
121
|
-
# TODO: Update this once we support different Python versions
|
|
122
|
-
client_version = f"Python {sys.version_info.major}.{sys.version_info.minor}"
|
|
123
|
-
runtime_version = "Python 3.10"
|
|
124
|
-
return (
|
|
125
|
-
f"Failed to retrieve job result: Python version mismatch - job ran on {runtime_version}, "
|
|
126
|
-
f"local environment using Python {client_version}. Error: {str(error)}"
|
|
127
|
-
)
|
|
128
23
|
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
return f"Failed to retrieve job result: Result too large for memory. Error: {str(error)}"
|
|
141
|
-
|
|
142
|
-
# Generic fallback
|
|
143
|
-
base_message = f"Failed to retrieve job result: {str(error)}"
|
|
144
|
-
if json_error:
|
|
145
|
-
base_message += f" (JSON fallback also failed: {str(json_error)})"
|
|
146
|
-
return base_message
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
def load_exception(exc_type_name: str, exc_value: Union[Exception, str], exc_tb: str) -> Exception:
|
|
150
|
-
"""
|
|
151
|
-
Create an exception with a string-formatted traceback.
|
|
152
|
-
|
|
153
|
-
When this exception is raised and not caught, it will display the original traceback.
|
|
154
|
-
When caught, it behaves like a regular exception without showing the traceback.
|
|
155
|
-
|
|
156
|
-
Args:
|
|
157
|
-
exc_type_name: Name of the exception type (e.g., 'ValueError', 'RuntimeError')
|
|
158
|
-
exc_value: The deserialized exception value or exception string (i.e. message)
|
|
159
|
-
exc_tb: String representation of the traceback
|
|
24
|
+
def build_exception(type_str: str, message: str, traceback: str, original_repr: Optional[str] = None) -> BaseException:
|
|
25
|
+
"""Build an exception from metadata, attaching remote error info."""
|
|
26
|
+
if not original_repr:
|
|
27
|
+
original_repr = f"{type_str}('{message}')"
|
|
28
|
+
try:
|
|
29
|
+
ex = reconstruct_exception(type_str=type_str, message=message)
|
|
30
|
+
except Exception as e:
|
|
31
|
+
# Fallback to a generic error type if reconstruction fails
|
|
32
|
+
ex = RemoteError(original_repr)
|
|
33
|
+
ex.__cause__ = e
|
|
34
|
+
return attach_remote_error_info(ex, type_str, message, traceback)
|
|
160
35
|
|
|
161
|
-
Returns:
|
|
162
|
-
An exception object with the original traceback information
|
|
163
36
|
|
|
164
|
-
|
|
165
|
-
"""
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
exc_type = getattr(module, class_name)
|
|
178
|
-
if exc_type is None or not issubclass(exc_type, Exception):
|
|
179
|
-
raise TypeError(f"{exc_type_name} is not a known exception type")
|
|
180
|
-
# Create the exception instance
|
|
181
|
-
exception = exc_type(exc_value)
|
|
182
|
-
except (ImportError, AttributeError, TypeError):
|
|
183
|
-
# Fall back to a generic exception
|
|
184
|
-
exception = RuntimeError(
|
|
185
|
-
f"Exception deserialization failed, original exception: {exc_type_name}: {exc_value}"
|
|
186
|
-
)
|
|
37
|
+
def reconstruct_exception(type_str: str, message: str) -> BaseException:
|
|
38
|
+
"""Best effort reconstruction of an exception from metadata."""
|
|
39
|
+
try:
|
|
40
|
+
type_split = type_str.rsplit(".", 1)
|
|
41
|
+
if len(type_split) == 1:
|
|
42
|
+
module = builtins
|
|
43
|
+
else:
|
|
44
|
+
module = importlib.import_module(type_split[0])
|
|
45
|
+
exc_type = getattr(module, type_split[-1])
|
|
46
|
+
except (ImportError, AttributeError):
|
|
47
|
+
raise ModuleNotFoundError(
|
|
48
|
+
f"Unrecognized exception type '{type_str}', likely due to a missing or unavailable package"
|
|
49
|
+
) from None
|
|
187
50
|
|
|
188
|
-
|
|
189
|
-
|
|
51
|
+
if not issubclass(exc_type, BaseException):
|
|
52
|
+
raise TypeError(f"Imported type {type_str} is not a known exception type, possibly due to a name conflict")
|
|
53
|
+
return cast(BaseException, exc_type(message))
|
|
190
54
|
|
|
191
55
|
|
|
192
|
-
def
|
|
56
|
+
def attach_remote_error_info(ex: BaseException, exc_type: str, exc_msg: str, traceback_str: str) -> BaseException:
|
|
193
57
|
"""
|
|
194
58
|
Attach a string-formatted traceback to an exception.
|
|
195
59
|
|
|
@@ -207,11 +71,11 @@ def _attach_remote_error_info(ex: Exception, exc_type: str, exc_msg: str, traceb
|
|
|
207
71
|
"""
|
|
208
72
|
# Store the traceback information
|
|
209
73
|
exc_type = exc_type.rsplit(".", 1)[-1] # Remove module path
|
|
210
|
-
setattr(ex, _REMOTE_ERROR_ATTR_NAME,
|
|
74
|
+
setattr(ex, _REMOTE_ERROR_ATTR_NAME, RemoteErrorInfo(exc_type=exc_type, exc_msg=exc_msg, exc_tb=traceback_str))
|
|
211
75
|
return ex
|
|
212
76
|
|
|
213
77
|
|
|
214
|
-
def
|
|
78
|
+
def retrieve_remote_error_info(ex: Optional[BaseException]) -> Optional[RemoteErrorInfo]:
|
|
215
79
|
"""
|
|
216
80
|
Retrieve the string-formatted traceback from an exception if it exists.
|
|
217
81
|
|
|
@@ -285,7 +149,7 @@ def _install_sys_excepthook() -> None:
|
|
|
285
149
|
sys.excepthook is the global hook that Python calls when an unhandled exception occurs.
|
|
286
150
|
By default it prints the exception type, message and traceback to stderr.
|
|
287
151
|
|
|
288
|
-
We override sys.excepthook to intercept exceptions that contain our special
|
|
152
|
+
We override sys.excepthook to intercept exceptions that contain our special RemoteErrorInfo
|
|
289
153
|
attribute. These exceptions come from deserialized remote execution results and contain
|
|
290
154
|
the original traceback information from where they occurred.
|
|
291
155
|
|
|
@@ -327,7 +191,7 @@ def _install_sys_excepthook() -> None:
|
|
|
327
191
|
"\nDuring handling of the above exception, another exception occurred:\n", file=sys.stderr
|
|
328
192
|
)
|
|
329
193
|
|
|
330
|
-
if (remote_err :=
|
|
194
|
+
if (remote_err := retrieve_remote_error_info(exc_value)) and isinstance(remote_err, RemoteErrorInfo):
|
|
331
195
|
# Display stored traceback for deserialized exceptions
|
|
332
196
|
print("Traceback (from remote execution):", file=sys.stderr) # noqa: T201
|
|
333
197
|
print(remote_err.exc_tb, end="", file=sys.stderr) # noqa: T201
|
|
@@ -408,7 +272,7 @@ def _install_ipython_hook() -> bool:
|
|
|
408
272
|
tb_offset: Optional[int],
|
|
409
273
|
**kwargs: Any,
|
|
410
274
|
) -> list[list[str]]:
|
|
411
|
-
if (remote_err :=
|
|
275
|
+
if (remote_err := retrieve_remote_error_info(evalue)) and isinstance(remote_err, RemoteErrorInfo):
|
|
412
276
|
# Implementation forked from IPython.core.ultratb.VerboseTB.format_exception_as_a_whole
|
|
413
277
|
head = self.prepare_header(remote_err.exc_type, long_version=False).replace(
|
|
414
278
|
"(most recent call last)",
|
|
@@ -448,7 +312,7 @@ def _install_ipython_hook() -> bool:
|
|
|
448
312
|
tb_offset: Optional[int] = None,
|
|
449
313
|
**kwargs: Any,
|
|
450
314
|
) -> list[str]:
|
|
451
|
-
if (remote_err :=
|
|
315
|
+
if (remote_err := retrieve_remote_error_info(evalue)) and isinstance(remote_err, RemoteErrorInfo):
|
|
452
316
|
tb_list = [
|
|
453
317
|
(m.group("filename"), m.group("lineno"), m.group("name"), m.group("line"))
|
|
454
318
|
for m in re.finditer(_TRACEBACK_ENTRY_PATTERN, remote_err.exc_tb or "")
|
|
@@ -493,9 +357,16 @@ def _uninstall_ipython_hook() -> None:
|
|
|
493
357
|
|
|
494
358
|
|
|
495
359
|
def install_exception_display_hooks() -> None:
|
|
496
|
-
|
|
497
|
-
_install_sys_excepthook()
|
|
360
|
+
"""Install custom exception display hooks for improved remote error reporting.
|
|
498
361
|
|
|
362
|
+
This function should be called once during package initialization to set up
|
|
363
|
+
enhanced error handling for remote job execution errors. The hooks will:
|
|
499
364
|
|
|
500
|
-
|
|
501
|
-
|
|
365
|
+
- Display original remote tracebacks instead of local deserialization traces
|
|
366
|
+
- Work in both standard Python and IPython/Jupyter environments
|
|
367
|
+
- Safely fall back to original behavior if errors occur
|
|
368
|
+
|
|
369
|
+
Note: This function is idempotent and safe to call multiple times.
|
|
370
|
+
"""
|
|
371
|
+
if not _install_ipython_hook():
|
|
372
|
+
_install_sys_excepthook()
|
|
@@ -0,0 +1,225 @@
|
|
|
1
|
+
"""Legacy result serialization protocol support for ML Jobs.
|
|
2
|
+
|
|
3
|
+
This module provides backward compatibility with the result serialization protocol used by
|
|
4
|
+
mljob_launcher.py prior to snowflake-ml-python>=1.17.0
|
|
5
|
+
|
|
6
|
+
LEGACY PROTOCOL (v1):
|
|
7
|
+
---------------------
|
|
8
|
+
The old serialization protocol (save_mljob_result_v1 in mljob_launcher.py) worked as follows:
|
|
9
|
+
|
|
10
|
+
1. Results were stored in an ExecutionResult dataclass with two optional fields:
|
|
11
|
+
- result: Any = None # For successful executions
|
|
12
|
+
- exception: BaseException = None # For failed executions
|
|
13
|
+
|
|
14
|
+
2. The ExecutionResult was converted to a dictionary via to_dict():
|
|
15
|
+
Success case:
|
|
16
|
+
{"success": True, "result_type": <type qualname>, "result": <value>}
|
|
17
|
+
|
|
18
|
+
Failure case:
|
|
19
|
+
{"success": False, "exc_type": "<module>.<class>", "exc_value": <exception>,
|
|
20
|
+
"exc_tb": <formatted traceback string>}
|
|
21
|
+
|
|
22
|
+
3. The dictionary was serialized TWICE for fault tolerance:
|
|
23
|
+
- Primary: cloudpickle to .pkl file under output/mljob_result.pkl (supports complex Python objects)
|
|
24
|
+
- Fallback: JSON to .json file under output/mljob_result.json (for cross-version compatibility)
|
|
25
|
+
|
|
26
|
+
WHY THIS MODULE EXISTS:
|
|
27
|
+
-----------------------
|
|
28
|
+
Jobs submitted with client versions using the v1 protocol will write v1-format result files.
|
|
29
|
+
This module ensures that newer clients can still retrieve results from:
|
|
30
|
+
- Jobs submitted before the protocol change
|
|
31
|
+
- Jobs running in environments where snowflake.ml.jobs._interop is not available
|
|
32
|
+
(triggering the ImportError fallback to v1 in save_mljob_result)
|
|
33
|
+
|
|
34
|
+
RETRIEVAL FLOW:
|
|
35
|
+
---------------
|
|
36
|
+
fetch_result() implements the v1 retrieval logic:
|
|
37
|
+
1. Try to unpickle from .pkl file
|
|
38
|
+
2. On failure (version mismatch, missing imports, etc.), fall back to .json file
|
|
39
|
+
3. Convert the legacy dict format to ExecutionResult
|
|
40
|
+
4. Provide helpful error messages for common failure modes
|
|
41
|
+
|
|
42
|
+
REMOVAL IMPLICATIONS:
|
|
43
|
+
---------------------
|
|
44
|
+
Removing this module would break result retrieval for:
|
|
45
|
+
- Any jobs that were submitted with snowflake-ml-python<1.17.0 and are still running/completed
|
|
46
|
+
- Any jobs running in old runtime environments that fall back to v1 serialization
|
|
47
|
+
|
|
48
|
+
Safe to remove when:
|
|
49
|
+
- All ML Runtime images have been updated to include the new _interop modules
|
|
50
|
+
- Sufficient time has passed that no jobs using the old protocol are still retrievable
|
|
51
|
+
(consider retention policies for job history/logs)
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
import json
|
|
55
|
+
import os
|
|
56
|
+
import pickle
|
|
57
|
+
import sys
|
|
58
|
+
import traceback
|
|
59
|
+
from dataclasses import dataclass
|
|
60
|
+
from typing import Any, Optional, Union
|
|
61
|
+
|
|
62
|
+
from snowflake import snowpark
|
|
63
|
+
from snowflake.ml.jobs._interop import exception_utils, results
|
|
64
|
+
from snowflake.snowpark import exceptions as sp_exceptions
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
@dataclass(frozen=True)
|
|
68
|
+
class ExecutionResult:
|
|
69
|
+
result: Any = None
|
|
70
|
+
exception: Optional[BaseException] = None
|
|
71
|
+
|
|
72
|
+
@property
|
|
73
|
+
def success(self) -> bool:
|
|
74
|
+
return self.exception is None
|
|
75
|
+
|
|
76
|
+
def to_dict(self) -> dict[str, Any]:
|
|
77
|
+
"""Return the serializable dictionary."""
|
|
78
|
+
if isinstance(self.exception, BaseException):
|
|
79
|
+
exc_type = type(self.exception)
|
|
80
|
+
return {
|
|
81
|
+
"success": False,
|
|
82
|
+
"exc_type": f"{exc_type.__module__}.{exc_type.__name__}",
|
|
83
|
+
"exc_value": self.exception,
|
|
84
|
+
"exc_tb": "".join(traceback.format_tb(self.exception.__traceback__)),
|
|
85
|
+
}
|
|
86
|
+
return {
|
|
87
|
+
"success": True,
|
|
88
|
+
"result_type": type(self.result).__qualname__,
|
|
89
|
+
"result": self.result,
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
@classmethod
|
|
93
|
+
def from_dict(cls, result_dict: dict[str, Any]) -> "ExecutionResult":
|
|
94
|
+
if not isinstance(result_dict.get("success"), bool):
|
|
95
|
+
raise ValueError("Invalid result dictionary")
|
|
96
|
+
|
|
97
|
+
if result_dict["success"]:
|
|
98
|
+
# Load successful result
|
|
99
|
+
return cls(result=result_dict.get("result"))
|
|
100
|
+
|
|
101
|
+
# Load exception
|
|
102
|
+
exc_type = result_dict.get("exc_type", "RuntimeError")
|
|
103
|
+
exc_value = result_dict.get("exc_value", "Unknown error")
|
|
104
|
+
exc_tb = result_dict.get("exc_tb", "")
|
|
105
|
+
return cls(exception=load_exception(exc_type, exc_value, exc_tb))
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def fetch_result(
|
|
109
|
+
session: snowpark.Session, result_path: str, result_json: Optional[dict[str, Any]] = None
|
|
110
|
+
) -> ExecutionResult:
|
|
111
|
+
"""
|
|
112
|
+
Fetch the serialized result from the specified path.
|
|
113
|
+
|
|
114
|
+
Args:
|
|
115
|
+
session: Snowpark Session to use for file operations.
|
|
116
|
+
result_path: The path to the serialized result file.
|
|
117
|
+
result_json: Optional pre-loaded JSON result dictionary to use instead of fetching from file.
|
|
118
|
+
|
|
119
|
+
Returns:
|
|
120
|
+
A dictionary containing the execution result if available, None otherwise.
|
|
121
|
+
|
|
122
|
+
Raises:
|
|
123
|
+
RuntimeError: If both pickle and JSON result retrieval fail.
|
|
124
|
+
"""
|
|
125
|
+
try:
|
|
126
|
+
with session.file.get_stream(result_path) as result_stream:
|
|
127
|
+
return ExecutionResult.from_dict(pickle.load(result_stream))
|
|
128
|
+
except (
|
|
129
|
+
sp_exceptions.SnowparkSQLException,
|
|
130
|
+
pickle.UnpicklingError,
|
|
131
|
+
TypeError,
|
|
132
|
+
ImportError,
|
|
133
|
+
AttributeError,
|
|
134
|
+
MemoryError,
|
|
135
|
+
) as pickle_error:
|
|
136
|
+
# Fall back to JSON result if loading pickled result fails for any reason
|
|
137
|
+
try:
|
|
138
|
+
if result_json is None:
|
|
139
|
+
result_json_path = os.path.splitext(result_path)[0] + ".json"
|
|
140
|
+
with session.file.get_stream(result_json_path) as result_stream:
|
|
141
|
+
result_json = json.load(result_stream)
|
|
142
|
+
return ExecutionResult.from_dict(result_json)
|
|
143
|
+
except Exception as json_error:
|
|
144
|
+
# Both pickle and JSON failed - provide helpful error message
|
|
145
|
+
raise RuntimeError(_fetch_result_error_message(pickle_error, result_path, json_error)) from pickle_error
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def _fetch_result_error_message(error: Exception, result_path: str, json_error: Optional[Exception] = None) -> str:
|
|
149
|
+
"""Create helpful error messages for common result retrieval failures."""
|
|
150
|
+
|
|
151
|
+
# Package import issues
|
|
152
|
+
if isinstance(error, ImportError):
|
|
153
|
+
return f"Failed to retrieve job result: Package not installed in your local environment. Error: {str(error)}"
|
|
154
|
+
|
|
155
|
+
# Package versions differ between runtime and local environment
|
|
156
|
+
if isinstance(error, AttributeError):
|
|
157
|
+
return f"Failed to retrieve job result: Package version mismatch. Error: {str(error)}"
|
|
158
|
+
|
|
159
|
+
# Serialization issues
|
|
160
|
+
if isinstance(error, TypeError):
|
|
161
|
+
return f"Failed to retrieve job result: Non-serializable objects were returned. Error: {str(error)}"
|
|
162
|
+
|
|
163
|
+
# Python version pickling incompatibility
|
|
164
|
+
if isinstance(error, pickle.UnpicklingError) and "protocol" in str(error).lower():
|
|
165
|
+
client_version = f"Python {sys.version_info.major}.{sys.version_info.minor}"
|
|
166
|
+
runtime_version = "Python 3.10" # NOTE: This may be inaccurate, but this path isn't maintained anymore
|
|
167
|
+
return (
|
|
168
|
+
f"Failed to retrieve job result: Python version mismatch - job ran on {runtime_version}, "
|
|
169
|
+
f"local environment using Python {client_version}. Error: {str(error)}"
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
# File access issues
|
|
173
|
+
if isinstance(error, sp_exceptions.SnowparkSQLException):
|
|
174
|
+
if "not found" in str(error).lower() or "does not exist" in str(error).lower():
|
|
175
|
+
return (
|
|
176
|
+
f"Failed to retrieve job result: No result file found. Check job.get_logs() for execution "
|
|
177
|
+
f"errors. Error: {str(error)}"
|
|
178
|
+
)
|
|
179
|
+
else:
|
|
180
|
+
return f"Failed to retrieve job result: Cannot access result file. Error: {str(error)}"
|
|
181
|
+
|
|
182
|
+
if isinstance(error, MemoryError):
|
|
183
|
+
return f"Failed to retrieve job result: Result too large for memory. Error: {str(error)}"
|
|
184
|
+
|
|
185
|
+
# Generic fallback
|
|
186
|
+
base_message = f"Failed to retrieve job result: {str(error)}"
|
|
187
|
+
if json_error:
|
|
188
|
+
base_message += f" (JSON fallback also failed: {str(json_error)})"
|
|
189
|
+
return base_message
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def load_exception(exc_type_name: str, exc_value: Union[Exception, str], exc_tb: str) -> BaseException:
|
|
193
|
+
"""
|
|
194
|
+
Create an exception with a string-formatted traceback.
|
|
195
|
+
|
|
196
|
+
When this exception is raised and not caught, it will display the original traceback.
|
|
197
|
+
When caught, it behaves like a regular exception without showing the traceback.
|
|
198
|
+
|
|
199
|
+
Args:
|
|
200
|
+
exc_type_name: Name of the exception type (e.g., 'ValueError', 'RuntimeError')
|
|
201
|
+
exc_value: The deserialized exception value or exception string (i.e. message)
|
|
202
|
+
exc_tb: String representation of the traceback
|
|
203
|
+
|
|
204
|
+
Returns:
|
|
205
|
+
An exception object with the original traceback information
|
|
206
|
+
|
|
207
|
+
# noqa: DAR401
|
|
208
|
+
"""
|
|
209
|
+
if isinstance(exc_value, Exception):
|
|
210
|
+
exception = exc_value
|
|
211
|
+
return exception_utils.attach_remote_error_info(exception, exc_type_name, str(exc_value), exc_tb)
|
|
212
|
+
return exception_utils.build_exception(exc_type_name, str(exc_value), exc_tb)
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
def load_legacy_result(
|
|
216
|
+
session: snowpark.Session, result_path: str, result_json: Optional[dict[str, Any]] = None
|
|
217
|
+
) -> results.ExecutionResult:
|
|
218
|
+
# Load result using legacy interop
|
|
219
|
+
legacy_result = fetch_result(session, result_path, result_json=result_json)
|
|
220
|
+
|
|
221
|
+
# Adapt legacy result to new result
|
|
222
|
+
return results.ExecutionResult(
|
|
223
|
+
success=legacy_result.success,
|
|
224
|
+
value=legacy_result.exception or legacy_result.result,
|
|
225
|
+
)
|