snowflake-ml-python 1.0.1__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +2 -1
- snowflake/ml/_internal/file_utils.py +35 -40
- snowflake/ml/_internal/telemetry.py +5 -8
- snowflake/ml/_internal/utils/identifier.py +74 -7
- snowflake/ml/_internal/utils/uri.py +7 -2
- snowflake/ml/model/_core_requirements.py +1 -1
- snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
- snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
- snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
- snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
- snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
- snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +25 -28
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +7 -4
- snowflake/ml/model/_deployer.py +14 -27
- snowflake/ml/model/_env.py +4 -4
- snowflake/ml/model/_handlers/_base.py +3 -1
- snowflake/ml/model/_handlers/custom.py +14 -2
- snowflake/ml/model/_handlers/pytorch.py +186 -0
- snowflake/ml/model/_handlers/sklearn.py +14 -8
- snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
- snowflake/ml/model/_handlers/torchscript.py +180 -0
- snowflake/ml/model/_handlers/xgboost.py +19 -9
- snowflake/ml/model/_model.py +27 -21
- snowflake/ml/model/_model_meta.py +33 -19
- snowflake/ml/model/model_signature.py +446 -66
- snowflake/ml/model/type_hints.py +28 -15
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +79 -43
- snowflake/ml/modeling/cluster/affinity_propagation.py +79 -43
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +79 -43
- snowflake/ml/modeling/cluster/birch.py +79 -43
- snowflake/ml/modeling/cluster/bisecting_k_means.py +79 -43
- snowflake/ml/modeling/cluster/dbscan.py +79 -43
- snowflake/ml/modeling/cluster/feature_agglomeration.py +79 -43
- snowflake/ml/modeling/cluster/k_means.py +79 -43
- snowflake/ml/modeling/cluster/mean_shift.py +79 -43
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +79 -43
- snowflake/ml/modeling/cluster/optics.py +79 -43
- snowflake/ml/modeling/cluster/spectral_biclustering.py +79 -43
- snowflake/ml/modeling/cluster/spectral_clustering.py +79 -43
- snowflake/ml/modeling/cluster/spectral_coclustering.py +79 -43
- snowflake/ml/modeling/compose/column_transformer.py +79 -43
- snowflake/ml/modeling/compose/transformed_target_regressor.py +79 -43
- snowflake/ml/modeling/covariance/elliptic_envelope.py +79 -43
- snowflake/ml/modeling/covariance/empirical_covariance.py +79 -43
- snowflake/ml/modeling/covariance/graphical_lasso.py +79 -43
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +79 -43
- snowflake/ml/modeling/covariance/ledoit_wolf.py +79 -43
- snowflake/ml/modeling/covariance/min_cov_det.py +79 -43
- snowflake/ml/modeling/covariance/oas.py +79 -43
- snowflake/ml/modeling/covariance/shrunk_covariance.py +79 -43
- snowflake/ml/modeling/decomposition/dictionary_learning.py +79 -43
- snowflake/ml/modeling/decomposition/factor_analysis.py +79 -43
- snowflake/ml/modeling/decomposition/fast_ica.py +79 -43
- snowflake/ml/modeling/decomposition/incremental_pca.py +79 -43
- snowflake/ml/modeling/decomposition/kernel_pca.py +79 -43
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +79 -43
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +79 -43
- snowflake/ml/modeling/decomposition/pca.py +79 -43
- snowflake/ml/modeling/decomposition/sparse_pca.py +79 -43
- snowflake/ml/modeling/decomposition/truncated_svd.py +79 -43
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +79 -43
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +79 -43
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/bagging_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/bagging_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/isolation_forest.py +79 -43
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/stacking_regressor.py +79 -43
- snowflake/ml/modeling/ensemble/voting_classifier.py +79 -43
- snowflake/ml/modeling/ensemble/voting_regressor.py +79 -43
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +79 -43
- snowflake/ml/modeling/feature_selection/select_fdr.py +79 -43
- snowflake/ml/modeling/feature_selection/select_fpr.py +79 -43
- snowflake/ml/modeling/feature_selection/select_fwe.py +79 -43
- snowflake/ml/modeling/feature_selection/select_k_best.py +79 -43
- snowflake/ml/modeling/feature_selection/select_percentile.py +79 -43
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +79 -43
- snowflake/ml/modeling/feature_selection/variance_threshold.py +79 -43
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +79 -43
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +79 -43
- snowflake/ml/modeling/impute/iterative_imputer.py +79 -43
- snowflake/ml/modeling/impute/knn_imputer.py +79 -43
- snowflake/ml/modeling/impute/missing_indicator.py +79 -43
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +79 -43
- snowflake/ml/modeling/kernel_approximation/nystroem.py +79 -43
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +79 -43
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +79 -43
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +79 -43
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +79 -43
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +79 -43
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/ard_regression.py +79 -43
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +79 -43
- snowflake/ml/modeling/linear_model/elastic_net.py +79 -43
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +79 -43
- snowflake/ml/modeling/linear_model/gamma_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/huber_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/lars.py +79 -43
- snowflake/ml/modeling/linear_model/lars_cv.py +79 -43
- snowflake/ml/modeling/linear_model/lasso.py +79 -43
- snowflake/ml/modeling/linear_model/lasso_cv.py +79 -43
- snowflake/ml/modeling/linear_model/lasso_lars.py +79 -43
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +79 -43
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +79 -43
- snowflake/ml/modeling/linear_model/linear_regression.py +79 -43
- snowflake/ml/modeling/linear_model/logistic_regression.py +79 -43
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +79 -43
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +79 -43
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +79 -43
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +79 -43
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +79 -43
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +79 -43
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +79 -43
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/perceptron.py +79 -43
- snowflake/ml/modeling/linear_model/poisson_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/ransac_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/ridge.py +79 -43
- snowflake/ml/modeling/linear_model/ridge_classifier.py +79 -43
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +79 -43
- snowflake/ml/modeling/linear_model/ridge_cv.py +79 -43
- snowflake/ml/modeling/linear_model/sgd_classifier.py +79 -43
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +79 -43
- snowflake/ml/modeling/linear_model/sgd_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +79 -43
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +79 -43
- snowflake/ml/modeling/manifold/isomap.py +79 -43
- snowflake/ml/modeling/manifold/mds.py +79 -43
- snowflake/ml/modeling/manifold/spectral_embedding.py +79 -43
- snowflake/ml/modeling/manifold/tsne.py +79 -43
- snowflake/ml/modeling/metrics/classification.py +6 -1
- snowflake/ml/modeling/metrics/regression.py +517 -9
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +79 -43
- snowflake/ml/modeling/mixture/gaussian_mixture.py +79 -43
- snowflake/ml/modeling/model_selection/grid_search_cv.py +79 -43
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +79 -43
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +79 -43
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +79 -43
- snowflake/ml/modeling/multiclass/output_code_classifier.py +79 -43
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +79 -43
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +79 -43
- snowflake/ml/modeling/naive_bayes/complement_nb.py +79 -43
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +79 -43
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +79 -43
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +79 -43
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +79 -43
- snowflake/ml/modeling/neighbors/kernel_density.py +79 -43
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +79 -43
- snowflake/ml/modeling/neighbors/nearest_centroid.py +79 -43
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +79 -43
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +79 -43
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +79 -43
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +79 -43
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +79 -43
- snowflake/ml/modeling/neural_network/mlp_classifier.py +79 -43
- snowflake/ml/modeling/neural_network/mlp_regressor.py +79 -43
- snowflake/ml/modeling/pipeline/pipeline.py +24 -0
- snowflake/ml/modeling/preprocessing/one_hot_encoder.py +18 -19
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
- snowflake/ml/modeling/preprocessing/polynomial_features.py +79 -43
- snowflake/ml/modeling/semi_supervised/label_propagation.py +79 -43
- snowflake/ml/modeling/semi_supervised/label_spreading.py +79 -43
- snowflake/ml/modeling/svm/linear_svc.py +79 -43
- snowflake/ml/modeling/svm/linear_svr.py +79 -43
- snowflake/ml/modeling/svm/nu_svc.py +79 -43
- snowflake/ml/modeling/svm/nu_svr.py +79 -43
- snowflake/ml/modeling/svm/svc.py +79 -43
- snowflake/ml/modeling/svm/svr.py +79 -43
- snowflake/ml/modeling/tree/decision_tree_classifier.py +79 -43
- snowflake/ml/modeling/tree/decision_tree_regressor.py +79 -43
- snowflake/ml/modeling/tree/extra_tree_classifier.py +79 -43
- snowflake/ml/modeling/tree/extra_tree_regressor.py +79 -43
- snowflake/ml/modeling/xgboost/xgb_classifier.py +79 -43
- snowflake/ml/modeling/xgboost/xgb_regressor.py +79 -43
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +79 -43
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +79 -43
- snowflake/ml/registry/model_registry.py +123 -121
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +50 -8
- snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
- snowflake_ml_python-1.0.1.dist-info/RECORD +0 -246
- {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -310,7 +310,8 @@ def validate_requirements_in_snowflake_conda_channel(
|
|
310
310
|
FROM information_schema.packages
|
311
311
|
WHERE ({pkg_names_str})
|
312
312
|
AND language = 'python'
|
313
|
-
AND runtime_version = '{parsed_python_version.major}.{parsed_python_version.minor}'
|
313
|
+
AND (runtime_version = '{parsed_python_version.major}.{parsed_python_version.minor}'
|
314
|
+
OR runtime_version is null);
|
314
315
|
"""
|
315
316
|
)
|
316
317
|
else:
|
@@ -1,15 +1,13 @@
|
|
1
1
|
import contextlib
|
2
2
|
import hashlib
|
3
|
-
import importlib
|
4
3
|
import io
|
5
4
|
import os
|
6
5
|
import pathlib
|
6
|
+
import pkgutil
|
7
7
|
import shutil
|
8
8
|
import tempfile
|
9
9
|
import zipfile
|
10
|
-
from typing import IO, Generator,
|
11
|
-
|
12
|
-
from snowflake.snowpark import session as snowpark_session
|
10
|
+
from typing import IO, Generator, List, Optional, Union
|
13
11
|
|
14
12
|
GENERATED_PY_FILE_EXT = (".pyc", ".pyo", ".pyd", ".pyi")
|
15
13
|
|
@@ -61,10 +59,12 @@ def zip_file_or_directory_to_stream(
|
|
61
59
|
Raises:
|
62
60
|
FileNotFoundError: Raised when the given path does not exist.
|
63
61
|
ValueError: Raised when the leading path is not a actual leading path of path
|
62
|
+
ValueError: Raised when the arcname cannot be encoded using ASCII.
|
64
63
|
|
65
64
|
Yields:
|
66
65
|
A bytes IO stream containing the zip file.
|
67
66
|
"""
|
67
|
+
# TODO(SNOW-862576): Should remove check on ASCII encoding after SNOW-862576 fixed.
|
68
68
|
if not os.path.exists(path):
|
69
69
|
raise FileNotFoundError(f"{path} is not found")
|
70
70
|
if leading_path and not path.startswith(leading_path):
|
@@ -78,23 +78,35 @@ def zip_file_or_directory_to_stream(
|
|
78
78
|
if os.path.realpath(path) != os.path.realpath(start_path):
|
79
79
|
cur_path = os.path.dirname(path)
|
80
80
|
while os.path.realpath(cur_path) != os.path.realpath(start_path):
|
81
|
-
|
81
|
+
arcname = os.path.relpath(cur_path, start_path)
|
82
|
+
if not _able_ascii_encode(arcname):
|
83
|
+
raise ValueError(f"File name {arcname} cannot be encoded using ASCII. Please rename.")
|
84
|
+
zf.write(cur_path, arcname)
|
82
85
|
cur_path = os.path.dirname(cur_path)
|
83
86
|
|
84
87
|
if os.path.isdir(path):
|
85
|
-
for
|
88
|
+
for dirpath, _, files in os.walk(path):
|
86
89
|
# ignore __pycache__
|
87
|
-
if ignore_generated_py_file and "__pycache__" in
|
90
|
+
if ignore_generated_py_file and "__pycache__" in dirpath:
|
88
91
|
continue
|
89
|
-
|
92
|
+
arcname = os.path.relpath(dirpath, start_path)
|
93
|
+
if not _able_ascii_encode(arcname):
|
94
|
+
raise ValueError(f"File name {arcname} cannot be encoded using ASCII. Please rename.")
|
95
|
+
zf.write(dirpath, arcname)
|
90
96
|
for file in files:
|
91
97
|
# ignore generated python files
|
92
98
|
if ignore_generated_py_file and file.endswith(GENERATED_PY_FILE_EXT):
|
93
99
|
continue
|
94
|
-
|
95
|
-
|
100
|
+
file_path = os.path.join(dirpath, file)
|
101
|
+
arcname = os.path.relpath(file_path, start_path)
|
102
|
+
if not _able_ascii_encode(arcname):
|
103
|
+
raise ValueError(f"File name {arcname} cannot be encoded using ASCII. Please rename.")
|
104
|
+
zf.write(file_path, arcname)
|
96
105
|
else:
|
97
|
-
|
106
|
+
arcname = os.path.relpath(path, start_path)
|
107
|
+
if not _able_ascii_encode(arcname):
|
108
|
+
raise ValueError(f"File name {arcname} cannot be encoded using ASCII. Please rename.")
|
109
|
+
zf.write(path, arcname)
|
98
110
|
|
99
111
|
yield input_stream
|
100
112
|
|
@@ -116,19 +128,6 @@ def unzip_stream_in_temp_dir(stream: IO[bytes], temp_root: Optional[str] = None)
|
|
116
128
|
yield tempdir
|
117
129
|
|
118
130
|
|
119
|
-
@contextlib.contextmanager
|
120
|
-
def zip_snowml() -> Generator[Tuple[io.BytesIO, str], None, None]:
|
121
|
-
"""Zip the snowflake-ml source code as a zip-file for import.
|
122
|
-
|
123
|
-
Yields:
|
124
|
-
A bytes IO stream containing the zip file.
|
125
|
-
"""
|
126
|
-
snowml_path = list(importlib.import_module("snowflake.ml").__path__)[0]
|
127
|
-
root_path = os.path.normpath(os.path.join(snowml_path, os.pardir, os.pardir))
|
128
|
-
with zip_file_or_directory_to_stream(snowml_path, root_path) as stream:
|
129
|
-
yield stream, hash_directory(snowml_path)
|
130
|
-
|
131
|
-
|
132
131
|
def hash_directory(directory: Union[str, pathlib.Path]) -> str:
|
133
132
|
"""Hash the **content** of a folder recursively using SHA-1.
|
134
133
|
|
@@ -154,21 +153,17 @@ def hash_directory(directory: Union[str, pathlib.Path]) -> str:
|
|
154
153
|
return _update_hash_from_dir(directory, hashlib.sha1()).hexdigest()
|
155
154
|
|
156
155
|
|
157
|
-
def
|
158
|
-
|
159
|
-
|
160
|
-
|
156
|
+
def get_all_modules(dirname: str, prefix: str = "") -> List[pkgutil.ModuleInfo]:
|
157
|
+
subdirs = [f.path for f in os.scandir(dirname) if f.is_dir()]
|
158
|
+
modules = list(pkgutil.iter_modules(subdirs, prefix=prefix))
|
159
|
+
for dirname in subdirs:
|
160
|
+
modules.extend(get_all_modules(dirname, prefix=f"{prefix}.{dirname}" if prefix else dirname))
|
161
|
+
return modules
|
161
162
|
|
162
|
-
Args:
|
163
|
-
session: Snowpark connection session.
|
164
|
-
stage_location: The path to the stage location where the uploaded SnowML should be. Defaults to None.
|
165
163
|
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
file_location = os.path.join(stage_location, f"snowml_{hash_str}.zip")
|
173
|
-
session.file.put_stream(stream, stage_location=file_location, auto_compress=False, overwrite=False)
|
174
|
-
return file_location
|
164
|
+
def _able_ascii_encode(s: str) -> bool:
|
165
|
+
try:
|
166
|
+
s.encode("ascii", errors="strict")
|
167
|
+
return True
|
168
|
+
except UnicodeEncodeError:
|
169
|
+
return False
|
@@ -6,7 +6,6 @@ import enum
|
|
6
6
|
import functools
|
7
7
|
import inspect
|
8
8
|
import operator
|
9
|
-
import threading
|
10
9
|
import types
|
11
10
|
from typing import (
|
12
11
|
Any,
|
@@ -29,7 +28,6 @@ from snowflake.ml._internal import env
|
|
29
28
|
from snowflake.snowpark import dataframe, exceptions, session
|
30
29
|
from snowflake.snowpark._internal import utils
|
31
30
|
|
32
|
-
_rlock = threading.RLock()
|
33
31
|
_log_counter = 0
|
34
32
|
_FLUSH_SIZE = 10
|
35
33
|
|
@@ -308,12 +306,11 @@ def send_api_usage_telemetry(
|
|
308
306
|
return res
|
309
307
|
finally:
|
310
308
|
telemetry.send_function_usage_telemetry(**telemetry_args)
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
_log_counter = 0
|
309
|
+
global _log_counter
|
310
|
+
_log_counter += 1
|
311
|
+
if _log_counter >= _FLUSH_SIZE or "error" in telemetry_args:
|
312
|
+
telemetry.send_batch()
|
313
|
+
_log_counter = 0
|
317
314
|
|
318
315
|
return cast(Callable[_Args, _ReturnValue], wrap)
|
319
316
|
|
@@ -4,14 +4,19 @@ from typing import Any, List, Optional, Tuple, Union, overload
|
|
4
4
|
from snowflake.snowpark._internal.analyzer import analyzer_utils
|
5
5
|
|
6
6
|
# Snowflake Identifier Regex. See https://docs.snowflake.com/en/sql-reference/identifiers-syntax.html.
|
7
|
-
|
7
|
+
_SF_UNQUOTED_CASE_INSENSITIVE_IDENTIFIER = "[A-Za-z_][A-Za-z0-9_$]*"
|
8
|
+
_SF_UNQUOTED_CASE_SENSITIVE_IDENTIFIER = "[A-Z_][A-Z0-9_$]*"
|
8
9
|
SF_QUOTED_IDENTIFIER = '"(?:[^"]|"")*"'
|
9
|
-
_SF_IDENTIFIER = f"({
|
10
|
+
_SF_IDENTIFIER = f"({_SF_UNQUOTED_CASE_INSENSITIVE_IDENTIFIER}|{SF_QUOTED_IDENTIFIER})"
|
10
11
|
_SF_SCHEMA_LEVEL_OBJECT = rf"{_SF_IDENTIFIER}\.{_SF_IDENTIFIER}\.{_SF_IDENTIFIER}(.*)"
|
11
12
|
_SF_SCHEMA_LEVEL_OBJECT_RE = re.compile(_SF_SCHEMA_LEVEL_OBJECT)
|
12
13
|
|
13
|
-
UNQUOTED_CASE_INSENSITIVE_RE = re.compile(f"^({
|
14
|
+
UNQUOTED_CASE_INSENSITIVE_RE = re.compile(f"^({_SF_UNQUOTED_CASE_INSENSITIVE_IDENTIFIER})$")
|
15
|
+
UNQUOTED_CASE_SENSITIVE_RE = re.compile(f"^({_SF_UNQUOTED_CASE_SENSITIVE_IDENTIFIER})$")
|
14
16
|
QUOTED_IDENTIFIER_RE = re.compile(f"^({SF_QUOTED_IDENTIFIER})$")
|
17
|
+
DOUBLE_QUOTE = '"'
|
18
|
+
|
19
|
+
quote_name_without_upper_casing = analyzer_utils.quote_name_without_upper_casing
|
15
20
|
|
16
21
|
|
17
22
|
def _is_quoted(id: str) -> bool:
|
@@ -61,10 +66,47 @@ def _get_unescaped_name(id: str) -> str:
|
|
61
66
|
if not _is_quoted(id):
|
62
67
|
return id.upper()
|
63
68
|
unquoted_id = id[1:-1]
|
64
|
-
return unquoted_id.replace(
|
69
|
+
return unquoted_id.replace(DOUBLE_QUOTE + DOUBLE_QUOTE, DOUBLE_QUOTE)
|
65
70
|
|
66
71
|
|
67
|
-
|
72
|
+
def _get_escaped_name(id: str) -> str:
|
73
|
+
"""Add double quotes to escape quotes.
|
74
|
+
Replace double quotes with double double quotes if there is existing double quotes
|
75
|
+
|
76
|
+
NOTE: See note in :meth:`_is_quoted`.
|
77
|
+
|
78
|
+
Args:
|
79
|
+
id: The string to be checked & treated.
|
80
|
+
|
81
|
+
Returns:
|
82
|
+
String with quotes would doubled; original string would add double quotes.
|
83
|
+
"""
|
84
|
+
escape_quotes = id.replace(DOUBLE_QUOTE, DOUBLE_QUOTE + DOUBLE_QUOTE)
|
85
|
+
return DOUBLE_QUOTE + escape_quotes + DOUBLE_QUOTE
|
86
|
+
|
87
|
+
|
88
|
+
def get_inferred_name(id: str) -> str:
|
89
|
+
"""Double quote id when it is case-sensitive and can start with and
|
90
|
+
contain any valid characters; unquote otherwise.
|
91
|
+
|
92
|
+
Examples:
|
93
|
+
COL1 -> COL1
|
94
|
+
1COL -> "1COL"
|
95
|
+
Col -> "Col"
|
96
|
+
"COL" -> \"""COL""\" (ignore '\')
|
97
|
+
COL 1 -> "COL 1"
|
98
|
+
|
99
|
+
Args:
|
100
|
+
id: The string to be checked & treated.
|
101
|
+
|
102
|
+
Returns:
|
103
|
+
Double quoted identifier if necessary; unquoted string otherwise.
|
104
|
+
"""
|
105
|
+
if UNQUOTED_CASE_SENSITIVE_RE.match(id):
|
106
|
+
return id
|
107
|
+
escaped_id = get_escaped_names(id)
|
108
|
+
assert isinstance(escaped_id, str)
|
109
|
+
return escaped_id
|
68
110
|
|
69
111
|
|
70
112
|
def concat_names(ids: List[str]) -> str:
|
@@ -89,7 +131,7 @@ def concat_names(ids: List[str]) -> str:
|
|
89
131
|
parts.append(id)
|
90
132
|
final_id = "".join(parts)
|
91
133
|
if quotes_needed:
|
92
|
-
return
|
134
|
+
return _get_escaped_name(final_id)
|
93
135
|
return final_id
|
94
136
|
|
95
137
|
|
@@ -135,7 +177,7 @@ def get_unescaped_names(ids: List[str]) -> List[str]:
|
|
135
177
|
|
136
178
|
def get_unescaped_names(ids: Optional[Union[str, List[str]]]) -> Optional[Union[str, List[str]]]:
|
137
179
|
"""Given a user provided identifier(s), this method will compute the equivalent column name identifier(s) in the
|
138
|
-
response pandas dataframe(i.e., in the
|
180
|
+
response pandas dataframe(i.e., in the response of snowpark_df.to_pandas()) using the rules defined here
|
139
181
|
https://docs.snowflake.com/en/sql-reference/identifiers-syntax.
|
140
182
|
|
141
183
|
Args:
|
@@ -156,3 +198,28 @@ def get_unescaped_names(ids: Optional[Union[str, List[str]]]) -> Optional[Union[
|
|
156
198
|
return _get_unescaped_name(ids)
|
157
199
|
else:
|
158
200
|
raise ValueError("Unsupported type. Only string or list of string are supported for selecting columns.")
|
201
|
+
|
202
|
+
|
203
|
+
def get_escaped_names(ids: Optional[Union[str, List[str]]]) -> Optional[Union[str, List[str]]]:
|
204
|
+
"""Given a user provided identifier(s), this method will compute the equivalent column name identifier(s)
|
205
|
+
in case of column name contains special characters, and maintains case-sensitivity
|
206
|
+
https://docs.snowflake.com/en/sql-reference/identifiers-syntax.
|
207
|
+
|
208
|
+
Args:
|
209
|
+
ids: User provided column name identifier(s).
|
210
|
+
|
211
|
+
Returns:
|
212
|
+
Double-quoted Identifiers for column names, to make sure that column names are case sensitive
|
213
|
+
|
214
|
+
Raises:
|
215
|
+
ValueError: if input types is unsupported or column name identifiers are invalid.
|
216
|
+
"""
|
217
|
+
|
218
|
+
if ids is None:
|
219
|
+
return None
|
220
|
+
elif type(ids) is list:
|
221
|
+
return [_get_escaped_name(id) for id in ids]
|
222
|
+
elif type(ids) is str:
|
223
|
+
return _get_escaped_name(ids)
|
224
|
+
else:
|
225
|
+
raise ValueError("Unsupported type. Only string or list of string are supported for selecting columns.")
|
@@ -1,4 +1,4 @@
|
|
1
|
-
import
|
1
|
+
import posixpath
|
2
2
|
from typing import Optional
|
3
3
|
from urllib.parse import ParseResult, urlparse, urlunparse
|
4
4
|
|
@@ -35,7 +35,12 @@ def get_snowflake_stage_path_from_uri(uri: str) -> Optional[str]:
|
|
35
35
|
if not is_snowflake_stage_uri(uri):
|
36
36
|
return None
|
37
37
|
uri_components = urlparse(uri)
|
38
|
-
|
38
|
+
# posixpath.join will drop other components if any of arguments is absolute path.
|
39
|
+
# The path we get is actually absolute (starting with '/'), however, since we concat them to stage location,
|
40
|
+
# it should not.
|
41
|
+
return posixpath.normpath(
|
42
|
+
posixpath.join(posixpath.normpath(uri_components.netloc), posixpath.normpath(uri_components.path.lstrip("/")))
|
43
|
+
)
|
39
44
|
|
40
45
|
|
41
46
|
def get_uri_scheme(uri: str) -> str:
|
@@ -1 +1 @@
|
|
1
|
-
REQUIREMENTS=['anyio>=3.5.0,<4', 'cloudpickle', 'numpy>=1.23,<2', 'packaging>=20.9,<24', 'pandas>=1.0.0,<2', 'pyyaml>=6.0,<7', '
|
1
|
+
REQUIREMENTS=['anyio>=3.5.0,<4', 'cloudpickle', 'numpy>=1.23,<2', 'packaging>=20.9,<24', 'pandas>=1.0.0,<2', 'pyyaml>=6.0,<7', 'snowflake-snowpark-python>=1.4.0,<2', 'typing-extensions>=4.1.0,<5']
|
@@ -0,0 +1,15 @@
|
|
1
|
+
from abc import ABC, abstractmethod
|
2
|
+
|
3
|
+
|
4
|
+
class ImageBuilder(ABC):
|
5
|
+
"""
|
6
|
+
Abstract class encapsulating image building and upload to model registry.
|
7
|
+
"""
|
8
|
+
|
9
|
+
@abstractmethod
|
10
|
+
def build_and_upload_image(self) -> str:
|
11
|
+
"""Builds and uploads an image to the model registry.
|
12
|
+
|
13
|
+
Returns: Full image path.
|
14
|
+
"""
|
15
|
+
pass
|
@@ -0,0 +1,259 @@
|
|
1
|
+
import base64
|
2
|
+
import json
|
3
|
+
import logging
|
4
|
+
import os
|
5
|
+
import posixpath
|
6
|
+
import subprocess
|
7
|
+
import tempfile
|
8
|
+
import zipfile
|
9
|
+
from enum import Enum
|
10
|
+
from typing import List
|
11
|
+
|
12
|
+
import yaml
|
13
|
+
|
14
|
+
from snowflake import snowpark
|
15
|
+
from snowflake.ml._internal.utils import query_result_checker
|
16
|
+
from snowflake.ml.model._deploy_client.image_builds import (
|
17
|
+
base_image_builder,
|
18
|
+
docker_context,
|
19
|
+
)
|
20
|
+
from snowflake.ml.model._deploy_client.utils import constants
|
21
|
+
|
22
|
+
|
23
|
+
class Platform(Enum):
|
24
|
+
LINUX_AMD64 = "linux/amd64"
|
25
|
+
|
26
|
+
|
27
|
+
class ClientImageBuilder(base_image_builder.ImageBuilder):
|
28
|
+
"""
|
29
|
+
Client-side image building and upload to model registry.
|
30
|
+
|
31
|
+
Usage requirements:
|
32
|
+
Requires prior installation and running of Docker with BuildKit. See installation instructions in
|
33
|
+
https://docs.docker.com/engine/install/
|
34
|
+
|
35
|
+
|
36
|
+
"""
|
37
|
+
|
38
|
+
def __init__(
|
39
|
+
self, *, id: str, image_repo: str, model_zip_stage_path: str, session: snowpark.Session, use_gpu: bool = False
|
40
|
+
) -> None:
|
41
|
+
"""Initialization
|
42
|
+
|
43
|
+
Args:
|
44
|
+
id: A hexadecimal string used for naming the image tag.
|
45
|
+
image_repo: Path to image repository.
|
46
|
+
model_zip_stage_path: Path to model zip file in stage.
|
47
|
+
use_gpu: Boolean flag for generating the CPU or GPU base image.
|
48
|
+
session: Snowpark session
|
49
|
+
"""
|
50
|
+
self.image_tag = "/".join([image_repo.rstrip("/"), id]) + ":latest"
|
51
|
+
self.image_repo = image_repo
|
52
|
+
self.model_zip_stage_path = model_zip_stage_path
|
53
|
+
self.use_gpu = use_gpu
|
54
|
+
self.session = session
|
55
|
+
|
56
|
+
def build_and_upload_image(self) -> str:
|
57
|
+
"""
|
58
|
+
Builds and uploads an image to the model registry.
|
59
|
+
"""
|
60
|
+
|
61
|
+
def _setup_docker_config(docker_config_dir: str) -> None:
|
62
|
+
"""Set up a temporary docker config, which is used for running all docker commands.
|
63
|
+
|
64
|
+
Args:
|
65
|
+
docker_config_dir: Path to docker configuration directory, which stores the temporary session token.
|
66
|
+
"""
|
67
|
+
ctx = self.session._conn._conn
|
68
|
+
assert ctx._rest, "SnowflakeRestful is not set in session"
|
69
|
+
token_data = ctx._rest._token_request("ISSUE")
|
70
|
+
snowpark_session_token = token_data["data"]["sessionToken"]
|
71
|
+
token_obj = {"token": snowpark_session_token}
|
72
|
+
credentials = f"0sessiontoken:{json.dumps(token_obj)}"
|
73
|
+
encoded_credentials = base64.b64encode(credentials.encode("utf-8")).decode("utf-8")
|
74
|
+
content = {"auths": {self.image_tag: {"auth": encoded_credentials}}}
|
75
|
+
config_path = os.path.join(docker_config_dir, "config.json")
|
76
|
+
with open(config_path, "w", encoding="utf-8") as file:
|
77
|
+
json.dump(content, file)
|
78
|
+
|
79
|
+
self.validate_docker_client_env()
|
80
|
+
|
81
|
+
query_result = (
|
82
|
+
query_result_checker.SqlResultValidator(
|
83
|
+
self.session,
|
84
|
+
query="SHOW PARAMETERS LIKE 'PYTHON_CONNECTOR_QUERY_RESULT_FORMAT' IN SESSION",
|
85
|
+
)
|
86
|
+
.has_dimensions(expected_rows=1)
|
87
|
+
.validate()
|
88
|
+
)
|
89
|
+
prev_format = query_result[0].value
|
90
|
+
|
91
|
+
with tempfile.TemporaryDirectory() as config_dir:
|
92
|
+
try:
|
93
|
+
# Workaround for SNOW-841699: Fail to authenticate to image registry with session token generated from
|
94
|
+
# Snowpark. Need to temporarily set the json query format in order to process GS token response.
|
95
|
+
self.session.sql("ALTER SESSION SET PYTHON_CONNECTOR_QUERY_RESULT_FORMAT = 'json'").collect()
|
96
|
+
_setup_docker_config(config_dir)
|
97
|
+
self._build(config_dir)
|
98
|
+
self._upload(config_dir)
|
99
|
+
finally:
|
100
|
+
self.session.sql(f"ALTER SESSION SET PYTHON_CONNECTOR_QUERY_RESULT_FORMAT = '{prev_format}'").collect()
|
101
|
+
commands = ["docker", "--config", config_dir, "rmi", self.image_tag]
|
102
|
+
logging.info(f"Removing local image: {self.image_tag}")
|
103
|
+
self._run_docker_commands(commands)
|
104
|
+
return self.image_tag
|
105
|
+
|
106
|
+
def validate_docker_client_env(self) -> None:
|
107
|
+
"""Ensure docker client is running and BuildKit is enabled. Note that Buildx always uses BuildKit.
|
108
|
+
- Ensure docker daemon is running through the "docker info" command on shell. When docker daemon is running,
|
109
|
+
return code will be 0, else return code will be 1.
|
110
|
+
- Ensure BuildKit is enabled by checking "docker buildx version".
|
111
|
+
|
112
|
+
Raises:
|
113
|
+
ConnectionError: Occurs when Docker is not installed or is not running.
|
114
|
+
|
115
|
+
"""
|
116
|
+
info_command = "docker info"
|
117
|
+
buildx_command = "docker buildx version"
|
118
|
+
|
119
|
+
try:
|
120
|
+
subprocess.check_call(info_command, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, shell=True)
|
121
|
+
except subprocess.CalledProcessError:
|
122
|
+
raise ConnectionError("Failed to initialize Docker client. Please ensure Docker is installed and running.")
|
123
|
+
|
124
|
+
try:
|
125
|
+
subprocess.check_call(buildx_command, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, shell=True)
|
126
|
+
except subprocess.CalledProcessError:
|
127
|
+
raise ConnectionError(
|
128
|
+
"Please ensured Docker is installed with BuildKit by following "
|
129
|
+
"https://docs.docker.com/build/buildkit/#getting-started"
|
130
|
+
)
|
131
|
+
|
132
|
+
def _extract_model_zip(self, context_dir: str) -> str:
|
133
|
+
"""Extract a zip file into the specified directory.
|
134
|
+
|
135
|
+
Args:
|
136
|
+
context_dir: Directory to extract the zip to.
|
137
|
+
|
138
|
+
Returns:
|
139
|
+
The extracted model directory.
|
140
|
+
"""
|
141
|
+
|
142
|
+
local_model_zip_path = os.path.join(context_dir, posixpath.basename(self.model_zip_stage_path))
|
143
|
+
if zipfile.is_zipfile(local_model_zip_path):
|
144
|
+
extracted_model_dir = os.path.join(context_dir, constants.MODEL_DIR)
|
145
|
+
with zipfile.ZipFile(local_model_zip_path, "r") as model_zip:
|
146
|
+
if len(model_zip.namelist()) > 1:
|
147
|
+
model_zip.extractall(extracted_model_dir)
|
148
|
+
conda_path = os.path.join(extracted_model_dir, "env", "conda.yaml")
|
149
|
+
|
150
|
+
def remove_snowml_from_conda() -> None:
|
151
|
+
with open(conda_path, encoding="utf-8") as file:
|
152
|
+
conda_yaml = yaml.safe_load(file)
|
153
|
+
|
154
|
+
dependencies = conda_yaml["dependencies"]
|
155
|
+
dependencies = [dep for dep in dependencies if not dep.startswith("snowflake-ml-python")]
|
156
|
+
|
157
|
+
conda_yaml["dependencies"] = dependencies
|
158
|
+
|
159
|
+
with open(conda_path, "w", encoding="utf-8") as file:
|
160
|
+
yaml.dump(conda_yaml, file)
|
161
|
+
|
162
|
+
# TODO(shchen): Remove once SNOW-840411 is landed.
|
163
|
+
remove_snowml_from_conda()
|
164
|
+
return extracted_model_dir
|
165
|
+
|
166
|
+
def _build(self, docker_config_dir: str) -> None:
|
167
|
+
"""Constructs the Docker context directory and then builds a Docker image based on that context.
|
168
|
+
|
169
|
+
Args:
|
170
|
+
docker_config_dir: Path to docker configuration directory, which stores the temporary session token.
|
171
|
+
"""
|
172
|
+
|
173
|
+
with tempfile.TemporaryDirectory() as context_dir:
|
174
|
+
# Download the model zip file that is already uploaded to stage during model registry log_model step.
|
175
|
+
# This is needed in order to obtain the conda and requirement file inside the model zip.
|
176
|
+
self.session.file.get(self.model_zip_stage_path, context_dir)
|
177
|
+
|
178
|
+
extracted_model_dir = self._extract_model_zip(context_dir)
|
179
|
+
|
180
|
+
dc = docker_context.DockerContext(
|
181
|
+
context_dir=context_dir, model_dir=extracted_model_dir, use_gpu=self.use_gpu
|
182
|
+
)
|
183
|
+
dc.build()
|
184
|
+
self._build_image_from_context(context_dir=context_dir, docker_config_dir=docker_config_dir)
|
185
|
+
|
186
|
+
def _run_docker_commands(self, commands: List[str]) -> None:
|
187
|
+
"""Run docker commands in a new child process.
|
188
|
+
|
189
|
+
Args:
|
190
|
+
commands: List of commands to run.
|
191
|
+
|
192
|
+
Raises:
|
193
|
+
RuntimeError: Occurs when docker commands failed to execute.
|
194
|
+
"""
|
195
|
+
proc = subprocess.Popen(commands, cwd=os.getcwd(), stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
|
196
|
+
output_lines = []
|
197
|
+
|
198
|
+
if proc.stdout:
|
199
|
+
for line in iter(proc.stdout.readline, ""):
|
200
|
+
output_lines.append(line)
|
201
|
+
logging.info(line)
|
202
|
+
|
203
|
+
if proc.wait():
|
204
|
+
raise RuntimeError(f"Docker build failed: {''.join(output_lines)}")
|
205
|
+
|
206
|
+
def _build_image_from_context(
|
207
|
+
self, context_dir: str, docker_config_dir: str, *, platform: Platform = Platform.LINUX_AMD64
|
208
|
+
) -> None:
|
209
|
+
"""Builds a Docker image based on provided context.
|
210
|
+
|
211
|
+
Args:
|
212
|
+
context_dir: Path to context directory.
|
213
|
+
docker_config_dir: Path to docker configuration directory, which stores the temporary session token.
|
214
|
+
platform: Target platform for the build output, in the format "os[/arch[/variant]]".
|
215
|
+
"""
|
216
|
+
|
217
|
+
commands = [
|
218
|
+
"docker",
|
219
|
+
"--config",
|
220
|
+
docker_config_dir,
|
221
|
+
"buildx",
|
222
|
+
"build",
|
223
|
+
"--platform",
|
224
|
+
platform.value,
|
225
|
+
"--tag",
|
226
|
+
f"{self.image_tag}",
|
227
|
+
context_dir,
|
228
|
+
]
|
229
|
+
|
230
|
+
self._run_docker_commands(commands)
|
231
|
+
|
232
|
+
def _upload(self, docker_config_dir: str) -> None:
|
233
|
+
"""
|
234
|
+
Uploads image to the image registry. This process requires a "docker login" followed by a "docker push". Remove
|
235
|
+
local image at the end of the upload operation to save up local space. Image cache is kept for more performant
|
236
|
+
built experience at the cost of small storage footprint.
|
237
|
+
|
238
|
+
For image registry authentication, we will use a session token obtained from the Snowpark session object.
|
239
|
+
The token authentication mechanism is automatically used when the username is set to "0sessiontoken" according
|
240
|
+
to the registry implementation detailed in the following link:
|
241
|
+
https://github.com/snowflakedb/snowflake-image-registry/blob/277435c6fd79db2df9f863aa9d04dc875e034d85
|
242
|
+
/AuthAdapter/src/main/java/com/snowflake/registry/service/AuthHeader.java#L122
|
243
|
+
|
244
|
+
By default, Docker overwrites the local Docker config file "/.docker/config.json" whenever a docker login
|
245
|
+
occurs. However, to ensure better isolation between Snowflake-managed Docker credentials and the user's own
|
246
|
+
Docker credentials, we will not use the default Docker config. Instead, we will write the username and session
|
247
|
+
token to a temporary file and use "docker --config" so that it only applies to the specific Docker command being
|
248
|
+
executed, without affecting the user's local Docker setup. The credential file will be automatically removed
|
249
|
+
at the end of upload operation.
|
250
|
+
|
251
|
+
Args:
|
252
|
+
docker_config_dir: Path to docker configuration directory, which stores the temporary session token.
|
253
|
+
"""
|
254
|
+
commands = ["docker", "--config", docker_config_dir, "login", self.image_tag]
|
255
|
+
self._run_docker_commands(commands)
|
256
|
+
|
257
|
+
logging.info(f"Pushing image to image repo {self.image_tag}")
|
258
|
+
commands = ["docker", "--config", docker_config_dir, "push", self.image_tag]
|
259
|
+
self._run_docker_commands(commands)
|