snowflake-ml-python 1.0.1__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (196) hide show
  1. snowflake/ml/_internal/env_utils.py +2 -1
  2. snowflake/ml/_internal/file_utils.py +35 -40
  3. snowflake/ml/_internal/telemetry.py +5 -8
  4. snowflake/ml/_internal/utils/identifier.py +74 -7
  5. snowflake/ml/_internal/utils/uri.py +7 -2
  6. snowflake/ml/model/_core_requirements.py +1 -1
  7. snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
  8. snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
  9. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
  10. snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
  11. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
  12. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
  13. snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
  14. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
  15. snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
  16. snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
  17. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
  18. snowflake/ml/model/_deploy_client/warehouse/deploy.py +25 -28
  19. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +7 -4
  20. snowflake/ml/model/_deployer.py +14 -27
  21. snowflake/ml/model/_env.py +4 -4
  22. snowflake/ml/model/_handlers/_base.py +3 -1
  23. snowflake/ml/model/_handlers/custom.py +14 -2
  24. snowflake/ml/model/_handlers/pytorch.py +186 -0
  25. snowflake/ml/model/_handlers/sklearn.py +14 -8
  26. snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
  27. snowflake/ml/model/_handlers/torchscript.py +180 -0
  28. snowflake/ml/model/_handlers/xgboost.py +19 -9
  29. snowflake/ml/model/_model.py +27 -21
  30. snowflake/ml/model/_model_meta.py +33 -19
  31. snowflake/ml/model/model_signature.py +446 -66
  32. snowflake/ml/model/type_hints.py +28 -15
  33. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +79 -43
  34. snowflake/ml/modeling/cluster/affinity_propagation.py +79 -43
  35. snowflake/ml/modeling/cluster/agglomerative_clustering.py +79 -43
  36. snowflake/ml/modeling/cluster/birch.py +79 -43
  37. snowflake/ml/modeling/cluster/bisecting_k_means.py +79 -43
  38. snowflake/ml/modeling/cluster/dbscan.py +79 -43
  39. snowflake/ml/modeling/cluster/feature_agglomeration.py +79 -43
  40. snowflake/ml/modeling/cluster/k_means.py +79 -43
  41. snowflake/ml/modeling/cluster/mean_shift.py +79 -43
  42. snowflake/ml/modeling/cluster/mini_batch_k_means.py +79 -43
  43. snowflake/ml/modeling/cluster/optics.py +79 -43
  44. snowflake/ml/modeling/cluster/spectral_biclustering.py +79 -43
  45. snowflake/ml/modeling/cluster/spectral_clustering.py +79 -43
  46. snowflake/ml/modeling/cluster/spectral_coclustering.py +79 -43
  47. snowflake/ml/modeling/compose/column_transformer.py +79 -43
  48. snowflake/ml/modeling/compose/transformed_target_regressor.py +79 -43
  49. snowflake/ml/modeling/covariance/elliptic_envelope.py +79 -43
  50. snowflake/ml/modeling/covariance/empirical_covariance.py +79 -43
  51. snowflake/ml/modeling/covariance/graphical_lasso.py +79 -43
  52. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +79 -43
  53. snowflake/ml/modeling/covariance/ledoit_wolf.py +79 -43
  54. snowflake/ml/modeling/covariance/min_cov_det.py +79 -43
  55. snowflake/ml/modeling/covariance/oas.py +79 -43
  56. snowflake/ml/modeling/covariance/shrunk_covariance.py +79 -43
  57. snowflake/ml/modeling/decomposition/dictionary_learning.py +79 -43
  58. snowflake/ml/modeling/decomposition/factor_analysis.py +79 -43
  59. snowflake/ml/modeling/decomposition/fast_ica.py +79 -43
  60. snowflake/ml/modeling/decomposition/incremental_pca.py +79 -43
  61. snowflake/ml/modeling/decomposition/kernel_pca.py +79 -43
  62. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +79 -43
  63. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +79 -43
  64. snowflake/ml/modeling/decomposition/pca.py +79 -43
  65. snowflake/ml/modeling/decomposition/sparse_pca.py +79 -43
  66. snowflake/ml/modeling/decomposition/truncated_svd.py +79 -43
  67. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +79 -43
  68. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +79 -43
  69. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +79 -43
  70. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +79 -43
  71. snowflake/ml/modeling/ensemble/bagging_classifier.py +79 -43
  72. snowflake/ml/modeling/ensemble/bagging_regressor.py +79 -43
  73. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +79 -43
  74. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +79 -43
  75. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +79 -43
  76. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +79 -43
  77. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +79 -43
  78. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +79 -43
  79. snowflake/ml/modeling/ensemble/isolation_forest.py +79 -43
  80. snowflake/ml/modeling/ensemble/random_forest_classifier.py +79 -43
  81. snowflake/ml/modeling/ensemble/random_forest_regressor.py +79 -43
  82. snowflake/ml/modeling/ensemble/stacking_regressor.py +79 -43
  83. snowflake/ml/modeling/ensemble/voting_classifier.py +79 -43
  84. snowflake/ml/modeling/ensemble/voting_regressor.py +79 -43
  85. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +79 -43
  86. snowflake/ml/modeling/feature_selection/select_fdr.py +79 -43
  87. snowflake/ml/modeling/feature_selection/select_fpr.py +79 -43
  88. snowflake/ml/modeling/feature_selection/select_fwe.py +79 -43
  89. snowflake/ml/modeling/feature_selection/select_k_best.py +79 -43
  90. snowflake/ml/modeling/feature_selection/select_percentile.py +79 -43
  91. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +79 -43
  92. snowflake/ml/modeling/feature_selection/variance_threshold.py +79 -43
  93. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +79 -43
  94. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +79 -43
  95. snowflake/ml/modeling/impute/iterative_imputer.py +79 -43
  96. snowflake/ml/modeling/impute/knn_imputer.py +79 -43
  97. snowflake/ml/modeling/impute/missing_indicator.py +79 -43
  98. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +79 -43
  99. snowflake/ml/modeling/kernel_approximation/nystroem.py +79 -43
  100. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +79 -43
  101. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +79 -43
  102. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +79 -43
  103. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +79 -43
  104. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +79 -43
  105. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +79 -43
  106. snowflake/ml/modeling/linear_model/ard_regression.py +79 -43
  107. snowflake/ml/modeling/linear_model/bayesian_ridge.py +79 -43
  108. snowflake/ml/modeling/linear_model/elastic_net.py +79 -43
  109. snowflake/ml/modeling/linear_model/elastic_net_cv.py +79 -43
  110. snowflake/ml/modeling/linear_model/gamma_regressor.py +79 -43
  111. snowflake/ml/modeling/linear_model/huber_regressor.py +79 -43
  112. snowflake/ml/modeling/linear_model/lars.py +79 -43
  113. snowflake/ml/modeling/linear_model/lars_cv.py +79 -43
  114. snowflake/ml/modeling/linear_model/lasso.py +79 -43
  115. snowflake/ml/modeling/linear_model/lasso_cv.py +79 -43
  116. snowflake/ml/modeling/linear_model/lasso_lars.py +79 -43
  117. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +79 -43
  118. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +79 -43
  119. snowflake/ml/modeling/linear_model/linear_regression.py +79 -43
  120. snowflake/ml/modeling/linear_model/logistic_regression.py +79 -43
  121. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +79 -43
  122. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +79 -43
  123. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +79 -43
  124. snowflake/ml/modeling/linear_model/multi_task_lasso.py +79 -43
  125. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +79 -43
  126. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +79 -43
  127. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +79 -43
  128. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +79 -43
  129. snowflake/ml/modeling/linear_model/perceptron.py +79 -43
  130. snowflake/ml/modeling/linear_model/poisson_regressor.py +79 -43
  131. snowflake/ml/modeling/linear_model/ransac_regressor.py +79 -43
  132. snowflake/ml/modeling/linear_model/ridge.py +79 -43
  133. snowflake/ml/modeling/linear_model/ridge_classifier.py +79 -43
  134. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +79 -43
  135. snowflake/ml/modeling/linear_model/ridge_cv.py +79 -43
  136. snowflake/ml/modeling/linear_model/sgd_classifier.py +79 -43
  137. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +79 -43
  138. snowflake/ml/modeling/linear_model/sgd_regressor.py +79 -43
  139. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +79 -43
  140. snowflake/ml/modeling/linear_model/tweedie_regressor.py +79 -43
  141. snowflake/ml/modeling/manifold/isomap.py +79 -43
  142. snowflake/ml/modeling/manifold/mds.py +79 -43
  143. snowflake/ml/modeling/manifold/spectral_embedding.py +79 -43
  144. snowflake/ml/modeling/manifold/tsne.py +79 -43
  145. snowflake/ml/modeling/metrics/classification.py +6 -1
  146. snowflake/ml/modeling/metrics/regression.py +517 -9
  147. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +79 -43
  148. snowflake/ml/modeling/mixture/gaussian_mixture.py +79 -43
  149. snowflake/ml/modeling/model_selection/grid_search_cv.py +79 -43
  150. snowflake/ml/modeling/model_selection/randomized_search_cv.py +79 -43
  151. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +79 -43
  152. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +79 -43
  153. snowflake/ml/modeling/multiclass/output_code_classifier.py +79 -43
  154. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +79 -43
  155. snowflake/ml/modeling/naive_bayes/categorical_nb.py +79 -43
  156. snowflake/ml/modeling/naive_bayes/complement_nb.py +79 -43
  157. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +79 -43
  158. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +79 -43
  159. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +79 -43
  160. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +79 -43
  161. snowflake/ml/modeling/neighbors/kernel_density.py +79 -43
  162. snowflake/ml/modeling/neighbors/local_outlier_factor.py +79 -43
  163. snowflake/ml/modeling/neighbors/nearest_centroid.py +79 -43
  164. snowflake/ml/modeling/neighbors/nearest_neighbors.py +79 -43
  165. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +79 -43
  166. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +79 -43
  167. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +79 -43
  168. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +79 -43
  169. snowflake/ml/modeling/neural_network/mlp_classifier.py +79 -43
  170. snowflake/ml/modeling/neural_network/mlp_regressor.py +79 -43
  171. snowflake/ml/modeling/pipeline/pipeline.py +24 -0
  172. snowflake/ml/modeling/preprocessing/one_hot_encoder.py +18 -19
  173. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
  174. snowflake/ml/modeling/preprocessing/polynomial_features.py +79 -43
  175. snowflake/ml/modeling/semi_supervised/label_propagation.py +79 -43
  176. snowflake/ml/modeling/semi_supervised/label_spreading.py +79 -43
  177. snowflake/ml/modeling/svm/linear_svc.py +79 -43
  178. snowflake/ml/modeling/svm/linear_svr.py +79 -43
  179. snowflake/ml/modeling/svm/nu_svc.py +79 -43
  180. snowflake/ml/modeling/svm/nu_svr.py +79 -43
  181. snowflake/ml/modeling/svm/svc.py +79 -43
  182. snowflake/ml/modeling/svm/svr.py +79 -43
  183. snowflake/ml/modeling/tree/decision_tree_classifier.py +79 -43
  184. snowflake/ml/modeling/tree/decision_tree_regressor.py +79 -43
  185. snowflake/ml/modeling/tree/extra_tree_classifier.py +79 -43
  186. snowflake/ml/modeling/tree/extra_tree_regressor.py +79 -43
  187. snowflake/ml/modeling/xgboost/xgb_classifier.py +79 -43
  188. snowflake/ml/modeling/xgboost/xgb_regressor.py +79 -43
  189. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +79 -43
  190. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +79 -43
  191. snowflake/ml/registry/model_registry.py +123 -121
  192. snowflake/ml/version.py +1 -1
  193. {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +50 -8
  194. snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
  195. snowflake_ml_python-1.0.1.dist-info/RECORD +0 -246
  196. {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -310,7 +310,8 @@ def validate_requirements_in_snowflake_conda_channel(
310
310
  FROM information_schema.packages
311
311
  WHERE ({pkg_names_str})
312
312
  AND language = 'python'
313
- AND runtime_version = '{parsed_python_version.major}.{parsed_python_version.minor}';
313
+ AND (runtime_version = '{parsed_python_version.major}.{parsed_python_version.minor}'
314
+ OR runtime_version is null);
314
315
  """
315
316
  )
316
317
  else:
@@ -1,15 +1,13 @@
1
1
  import contextlib
2
2
  import hashlib
3
- import importlib
4
3
  import io
5
4
  import os
6
5
  import pathlib
6
+ import pkgutil
7
7
  import shutil
8
8
  import tempfile
9
9
  import zipfile
10
- from typing import IO, Generator, Optional, Tuple, Union
11
-
12
- from snowflake.snowpark import session as snowpark_session
10
+ from typing import IO, Generator, List, Optional, Union
13
11
 
14
12
  GENERATED_PY_FILE_EXT = (".pyc", ".pyo", ".pyd", ".pyi")
15
13
 
@@ -61,10 +59,12 @@ def zip_file_or_directory_to_stream(
61
59
  Raises:
62
60
  FileNotFoundError: Raised when the given path does not exist.
63
61
  ValueError: Raised when the leading path is not a actual leading path of path
62
+ ValueError: Raised when the arcname cannot be encoded using ASCII.
64
63
 
65
64
  Yields:
66
65
  A bytes IO stream containing the zip file.
67
66
  """
67
+ # TODO(SNOW-862576): Should remove check on ASCII encoding after SNOW-862576 fixed.
68
68
  if not os.path.exists(path):
69
69
  raise FileNotFoundError(f"{path} is not found")
70
70
  if leading_path and not path.startswith(leading_path):
@@ -78,23 +78,35 @@ def zip_file_or_directory_to_stream(
78
78
  if os.path.realpath(path) != os.path.realpath(start_path):
79
79
  cur_path = os.path.dirname(path)
80
80
  while os.path.realpath(cur_path) != os.path.realpath(start_path):
81
- zf.writestr(f"{os.path.relpath(cur_path, start_path)}/", "")
81
+ arcname = os.path.relpath(cur_path, start_path)
82
+ if not _able_ascii_encode(arcname):
83
+ raise ValueError(f"File name {arcname} cannot be encoded using ASCII. Please rename.")
84
+ zf.write(cur_path, arcname)
82
85
  cur_path = os.path.dirname(cur_path)
83
86
 
84
87
  if os.path.isdir(path):
85
- for dirname, _, files in os.walk(path):
88
+ for dirpath, _, files in os.walk(path):
86
89
  # ignore __pycache__
87
- if ignore_generated_py_file and "__pycache__" in dirname:
90
+ if ignore_generated_py_file and "__pycache__" in dirpath:
88
91
  continue
89
- zf.write(dirname, os.path.relpath(dirname, start_path))
92
+ arcname = os.path.relpath(dirpath, start_path)
93
+ if not _able_ascii_encode(arcname):
94
+ raise ValueError(f"File name {arcname} cannot be encoded using ASCII. Please rename.")
95
+ zf.write(dirpath, arcname)
90
96
  for file in files:
91
97
  # ignore generated python files
92
98
  if ignore_generated_py_file and file.endswith(GENERATED_PY_FILE_EXT):
93
99
  continue
94
- filename = os.path.join(dirname, file)
95
- zf.write(filename, os.path.relpath(filename, start_path))
100
+ file_path = os.path.join(dirpath, file)
101
+ arcname = os.path.relpath(file_path, start_path)
102
+ if not _able_ascii_encode(arcname):
103
+ raise ValueError(f"File name {arcname} cannot be encoded using ASCII. Please rename.")
104
+ zf.write(file_path, arcname)
96
105
  else:
97
- zf.write(path, os.path.relpath(path, start_path))
106
+ arcname = os.path.relpath(path, start_path)
107
+ if not _able_ascii_encode(arcname):
108
+ raise ValueError(f"File name {arcname} cannot be encoded using ASCII. Please rename.")
109
+ zf.write(path, arcname)
98
110
 
99
111
  yield input_stream
100
112
 
@@ -116,19 +128,6 @@ def unzip_stream_in_temp_dir(stream: IO[bytes], temp_root: Optional[str] = None)
116
128
  yield tempdir
117
129
 
118
130
 
119
- @contextlib.contextmanager
120
- def zip_snowml() -> Generator[Tuple[io.BytesIO, str], None, None]:
121
- """Zip the snowflake-ml source code as a zip-file for import.
122
-
123
- Yields:
124
- A bytes IO stream containing the zip file.
125
- """
126
- snowml_path = list(importlib.import_module("snowflake.ml").__path__)[0]
127
- root_path = os.path.normpath(os.path.join(snowml_path, os.pardir, os.pardir))
128
- with zip_file_or_directory_to_stream(snowml_path, root_path) as stream:
129
- yield stream, hash_directory(snowml_path)
130
-
131
-
132
131
  def hash_directory(directory: Union[str, pathlib.Path]) -> str:
133
132
  """Hash the **content** of a folder recursively using SHA-1.
134
133
 
@@ -154,21 +153,17 @@ def hash_directory(directory: Union[str, pathlib.Path]) -> str:
154
153
  return _update_hash_from_dir(directory, hashlib.sha1()).hexdigest()
155
154
 
156
155
 
157
- def upload_snowml(session: snowpark_session.Session, stage_location: Optional[str] = None) -> str:
158
- """Upload the SnowML local code into a stage if provided, or a session stage.
159
- It will label the file name using the SHA-1 of the snowflake.ml folder, so that if the source code does not change,
160
- it won't reupload. Any changes will, however, result a new zip file.
156
+ def get_all_modules(dirname: str, prefix: str = "") -> List[pkgutil.ModuleInfo]:
157
+ subdirs = [f.path for f in os.scandir(dirname) if f.is_dir()]
158
+ modules = list(pkgutil.iter_modules(subdirs, prefix=prefix))
159
+ for dirname in subdirs:
160
+ modules.extend(get_all_modules(dirname, prefix=f"{prefix}.{dirname}" if prefix else dirname))
161
+ return modules
161
162
 
162
- Args:
163
- session: Snowpark connection session.
164
- stage_location: The path to the stage location where the uploaded SnowML should be. Defaults to None.
165
163
 
166
- Returns:
167
- The path to the uploaded SnowML zip file.
168
- """
169
- with zip_snowml() as (stream, hash_str):
170
- if stage_location is None:
171
- stage_location = session.get_session_stage()
172
- file_location = os.path.join(stage_location, f"snowml_{hash_str}.zip")
173
- session.file.put_stream(stream, stage_location=file_location, auto_compress=False, overwrite=False)
174
- return file_location
164
+ def _able_ascii_encode(s: str) -> bool:
165
+ try:
166
+ s.encode("ascii", errors="strict")
167
+ return True
168
+ except UnicodeEncodeError:
169
+ return False
@@ -6,7 +6,6 @@ import enum
6
6
  import functools
7
7
  import inspect
8
8
  import operator
9
- import threading
10
9
  import types
11
10
  from typing import (
12
11
  Any,
@@ -29,7 +28,6 @@ from snowflake.ml._internal import env
29
28
  from snowflake.snowpark import dataframe, exceptions, session
30
29
  from snowflake.snowpark._internal import utils
31
30
 
32
- _rlock = threading.RLock()
33
31
  _log_counter = 0
34
32
  _FLUSH_SIZE = 10
35
33
 
@@ -308,12 +306,11 @@ def send_api_usage_telemetry(
308
306
  return res
309
307
  finally:
310
308
  telemetry.send_function_usage_telemetry(**telemetry_args)
311
- with _rlock:
312
- global _log_counter
313
- _log_counter += 1
314
- if _log_counter >= _FLUSH_SIZE or "error" in telemetry_args:
315
- telemetry.send_batch()
316
- _log_counter = 0
309
+ global _log_counter
310
+ _log_counter += 1
311
+ if _log_counter >= _FLUSH_SIZE or "error" in telemetry_args:
312
+ telemetry.send_batch()
313
+ _log_counter = 0
317
314
 
318
315
  return cast(Callable[_Args, _ReturnValue], wrap)
319
316
 
@@ -4,14 +4,19 @@ from typing import Any, List, Optional, Tuple, Union, overload
4
4
  from snowflake.snowpark._internal.analyzer import analyzer_utils
5
5
 
6
6
  # Snowflake Identifier Regex. See https://docs.snowflake.com/en/sql-reference/identifiers-syntax.html.
7
- _SF_UNQUOTED_IDENTIFIER = "[A-Za-z_][A-Za-z0-9_$]*"
7
+ _SF_UNQUOTED_CASE_INSENSITIVE_IDENTIFIER = "[A-Za-z_][A-Za-z0-9_$]*"
8
+ _SF_UNQUOTED_CASE_SENSITIVE_IDENTIFIER = "[A-Z_][A-Z0-9_$]*"
8
9
  SF_QUOTED_IDENTIFIER = '"(?:[^"]|"")*"'
9
- _SF_IDENTIFIER = f"({_SF_UNQUOTED_IDENTIFIER}|{SF_QUOTED_IDENTIFIER})"
10
+ _SF_IDENTIFIER = f"({_SF_UNQUOTED_CASE_INSENSITIVE_IDENTIFIER}|{SF_QUOTED_IDENTIFIER})"
10
11
  _SF_SCHEMA_LEVEL_OBJECT = rf"{_SF_IDENTIFIER}\.{_SF_IDENTIFIER}\.{_SF_IDENTIFIER}(.*)"
11
12
  _SF_SCHEMA_LEVEL_OBJECT_RE = re.compile(_SF_SCHEMA_LEVEL_OBJECT)
12
13
 
13
- UNQUOTED_CASE_INSENSITIVE_RE = re.compile(f"^({_SF_UNQUOTED_IDENTIFIER})$")
14
+ UNQUOTED_CASE_INSENSITIVE_RE = re.compile(f"^({_SF_UNQUOTED_CASE_INSENSITIVE_IDENTIFIER})$")
15
+ UNQUOTED_CASE_SENSITIVE_RE = re.compile(f"^({_SF_UNQUOTED_CASE_SENSITIVE_IDENTIFIER})$")
14
16
  QUOTED_IDENTIFIER_RE = re.compile(f"^({SF_QUOTED_IDENTIFIER})$")
17
+ DOUBLE_QUOTE = '"'
18
+
19
+ quote_name_without_upper_casing = analyzer_utils.quote_name_without_upper_casing
15
20
 
16
21
 
17
22
  def _is_quoted(id: str) -> bool:
@@ -61,10 +66,47 @@ def _get_unescaped_name(id: str) -> str:
61
66
  if not _is_quoted(id):
62
67
  return id.upper()
63
68
  unquoted_id = id[1:-1]
64
- return unquoted_id.replace('""', '"')
69
+ return unquoted_id.replace(DOUBLE_QUOTE + DOUBLE_QUOTE, DOUBLE_QUOTE)
65
70
 
66
71
 
67
- quote_name_without_upper_casing = analyzer_utils.quote_name_without_upper_casing
72
+ def _get_escaped_name(id: str) -> str:
73
+ """Add double quotes to escape quotes.
74
+ Replace double quotes with double double quotes if there is existing double quotes
75
+
76
+ NOTE: See note in :meth:`_is_quoted`.
77
+
78
+ Args:
79
+ id: The string to be checked & treated.
80
+
81
+ Returns:
82
+ String with quotes would doubled; original string would add double quotes.
83
+ """
84
+ escape_quotes = id.replace(DOUBLE_QUOTE, DOUBLE_QUOTE + DOUBLE_QUOTE)
85
+ return DOUBLE_QUOTE + escape_quotes + DOUBLE_QUOTE
86
+
87
+
88
+ def get_inferred_name(id: str) -> str:
89
+ """Double quote id when it is case-sensitive and can start with and
90
+ contain any valid characters; unquote otherwise.
91
+
92
+ Examples:
93
+ COL1 -> COL1
94
+ 1COL -> "1COL"
95
+ Col -> "Col"
96
+ "COL" -> \"""COL""\" (ignore '\')
97
+ COL 1 -> "COL 1"
98
+
99
+ Args:
100
+ id: The string to be checked & treated.
101
+
102
+ Returns:
103
+ Double quoted identifier if necessary; unquoted string otherwise.
104
+ """
105
+ if UNQUOTED_CASE_SENSITIVE_RE.match(id):
106
+ return id
107
+ escaped_id = get_escaped_names(id)
108
+ assert isinstance(escaped_id, str)
109
+ return escaped_id
68
110
 
69
111
 
70
112
  def concat_names(ids: List[str]) -> str:
@@ -89,7 +131,7 @@ def concat_names(ids: List[str]) -> str:
89
131
  parts.append(id)
90
132
  final_id = "".join(parts)
91
133
  if quotes_needed:
92
- return quote_name_without_upper_casing(final_id)
134
+ return _get_escaped_name(final_id)
93
135
  return final_id
94
136
 
95
137
 
@@ -135,7 +177,7 @@ def get_unescaped_names(ids: List[str]) -> List[str]:
135
177
 
136
178
  def get_unescaped_names(ids: Optional[Union[str, List[str]]]) -> Optional[Union[str, List[str]]]:
137
179
  """Given a user provided identifier(s), this method will compute the equivalent column name identifier(s) in the
138
- response pandas dataframe(i.e., in the respones of snowpark_df.to_pandas()) using the rules defined here
180
+ response pandas dataframe(i.e., in the response of snowpark_df.to_pandas()) using the rules defined here
139
181
  https://docs.snowflake.com/en/sql-reference/identifiers-syntax.
140
182
 
141
183
  Args:
@@ -156,3 +198,28 @@ def get_unescaped_names(ids: Optional[Union[str, List[str]]]) -> Optional[Union[
156
198
  return _get_unescaped_name(ids)
157
199
  else:
158
200
  raise ValueError("Unsupported type. Only string or list of string are supported for selecting columns.")
201
+
202
+
203
+ def get_escaped_names(ids: Optional[Union[str, List[str]]]) -> Optional[Union[str, List[str]]]:
204
+ """Given a user provided identifier(s), this method will compute the equivalent column name identifier(s)
205
+ in case of column name contains special characters, and maintains case-sensitivity
206
+ https://docs.snowflake.com/en/sql-reference/identifiers-syntax.
207
+
208
+ Args:
209
+ ids: User provided column name identifier(s).
210
+
211
+ Returns:
212
+ Double-quoted Identifiers for column names, to make sure that column names are case sensitive
213
+
214
+ Raises:
215
+ ValueError: if input types is unsupported or column name identifiers are invalid.
216
+ """
217
+
218
+ if ids is None:
219
+ return None
220
+ elif type(ids) is list:
221
+ return [_get_escaped_name(id) for id in ids]
222
+ elif type(ids) is str:
223
+ return _get_escaped_name(ids)
224
+ else:
225
+ raise ValueError("Unsupported type. Only string or list of string are supported for selecting columns.")
@@ -1,4 +1,4 @@
1
- import os.path
1
+ import posixpath
2
2
  from typing import Optional
3
3
  from urllib.parse import ParseResult, urlparse, urlunparse
4
4
 
@@ -35,7 +35,12 @@ def get_snowflake_stage_path_from_uri(uri: str) -> Optional[str]:
35
35
  if not is_snowflake_stage_uri(uri):
36
36
  return None
37
37
  uri_components = urlparse(uri)
38
- return os.path.join(uri_components.netloc.strip("/"), uri_components.path.strip("/")).rstrip("/")
38
+ # posixpath.join will drop other components if any of arguments is absolute path.
39
+ # The path we get is actually absolute (starting with '/'), however, since we concat them to stage location,
40
+ # it should not.
41
+ return posixpath.normpath(
42
+ posixpath.join(posixpath.normpath(uri_components.netloc), posixpath.normpath(uri_components.path.lstrip("/")))
43
+ )
39
44
 
40
45
 
41
46
  def get_uri_scheme(uri: str) -> str:
@@ -1 +1 @@
1
- REQUIREMENTS=['anyio>=3.5.0,<4', 'cloudpickle', 'numpy>=1.23,<2', 'packaging>=20.9,<24', 'pandas>=1.0.0,<2', 'pyyaml>=6.0,<7', 'scikit-learn>=1.2.1,<2', 'snowflake-snowpark-python>=1.4.0,<2', 'typing-extensions>=4.1.0,<5']
1
+ REQUIREMENTS=['anyio>=3.5.0,<4', 'cloudpickle', 'numpy>=1.23,<2', 'packaging>=20.9,<24', 'pandas>=1.0.0,<2', 'pyyaml>=6.0,<7', 'snowflake-snowpark-python>=1.4.0,<2', 'typing-extensions>=4.1.0,<5']
@@ -0,0 +1,15 @@
1
+ from abc import ABC, abstractmethod
2
+
3
+
4
+ class ImageBuilder(ABC):
5
+ """
6
+ Abstract class encapsulating image building and upload to model registry.
7
+ """
8
+
9
+ @abstractmethod
10
+ def build_and_upload_image(self) -> str:
11
+ """Builds and uploads an image to the model registry.
12
+
13
+ Returns: Full image path.
14
+ """
15
+ pass
@@ -0,0 +1,259 @@
1
+ import base64
2
+ import json
3
+ import logging
4
+ import os
5
+ import posixpath
6
+ import subprocess
7
+ import tempfile
8
+ import zipfile
9
+ from enum import Enum
10
+ from typing import List
11
+
12
+ import yaml
13
+
14
+ from snowflake import snowpark
15
+ from snowflake.ml._internal.utils import query_result_checker
16
+ from snowflake.ml.model._deploy_client.image_builds import (
17
+ base_image_builder,
18
+ docker_context,
19
+ )
20
+ from snowflake.ml.model._deploy_client.utils import constants
21
+
22
+
23
+ class Platform(Enum):
24
+ LINUX_AMD64 = "linux/amd64"
25
+
26
+
27
+ class ClientImageBuilder(base_image_builder.ImageBuilder):
28
+ """
29
+ Client-side image building and upload to model registry.
30
+
31
+ Usage requirements:
32
+ Requires prior installation and running of Docker with BuildKit. See installation instructions in
33
+ https://docs.docker.com/engine/install/
34
+
35
+
36
+ """
37
+
38
+ def __init__(
39
+ self, *, id: str, image_repo: str, model_zip_stage_path: str, session: snowpark.Session, use_gpu: bool = False
40
+ ) -> None:
41
+ """Initialization
42
+
43
+ Args:
44
+ id: A hexadecimal string used for naming the image tag.
45
+ image_repo: Path to image repository.
46
+ model_zip_stage_path: Path to model zip file in stage.
47
+ use_gpu: Boolean flag for generating the CPU or GPU base image.
48
+ session: Snowpark session
49
+ """
50
+ self.image_tag = "/".join([image_repo.rstrip("/"), id]) + ":latest"
51
+ self.image_repo = image_repo
52
+ self.model_zip_stage_path = model_zip_stage_path
53
+ self.use_gpu = use_gpu
54
+ self.session = session
55
+
56
+ def build_and_upload_image(self) -> str:
57
+ """
58
+ Builds and uploads an image to the model registry.
59
+ """
60
+
61
+ def _setup_docker_config(docker_config_dir: str) -> None:
62
+ """Set up a temporary docker config, which is used for running all docker commands.
63
+
64
+ Args:
65
+ docker_config_dir: Path to docker configuration directory, which stores the temporary session token.
66
+ """
67
+ ctx = self.session._conn._conn
68
+ assert ctx._rest, "SnowflakeRestful is not set in session"
69
+ token_data = ctx._rest._token_request("ISSUE")
70
+ snowpark_session_token = token_data["data"]["sessionToken"]
71
+ token_obj = {"token": snowpark_session_token}
72
+ credentials = f"0sessiontoken:{json.dumps(token_obj)}"
73
+ encoded_credentials = base64.b64encode(credentials.encode("utf-8")).decode("utf-8")
74
+ content = {"auths": {self.image_tag: {"auth": encoded_credentials}}}
75
+ config_path = os.path.join(docker_config_dir, "config.json")
76
+ with open(config_path, "w", encoding="utf-8") as file:
77
+ json.dump(content, file)
78
+
79
+ self.validate_docker_client_env()
80
+
81
+ query_result = (
82
+ query_result_checker.SqlResultValidator(
83
+ self.session,
84
+ query="SHOW PARAMETERS LIKE 'PYTHON_CONNECTOR_QUERY_RESULT_FORMAT' IN SESSION",
85
+ )
86
+ .has_dimensions(expected_rows=1)
87
+ .validate()
88
+ )
89
+ prev_format = query_result[0].value
90
+
91
+ with tempfile.TemporaryDirectory() as config_dir:
92
+ try:
93
+ # Workaround for SNOW-841699: Fail to authenticate to image registry with session token generated from
94
+ # Snowpark. Need to temporarily set the json query format in order to process GS token response.
95
+ self.session.sql("ALTER SESSION SET PYTHON_CONNECTOR_QUERY_RESULT_FORMAT = 'json'").collect()
96
+ _setup_docker_config(config_dir)
97
+ self._build(config_dir)
98
+ self._upload(config_dir)
99
+ finally:
100
+ self.session.sql(f"ALTER SESSION SET PYTHON_CONNECTOR_QUERY_RESULT_FORMAT = '{prev_format}'").collect()
101
+ commands = ["docker", "--config", config_dir, "rmi", self.image_tag]
102
+ logging.info(f"Removing local image: {self.image_tag}")
103
+ self._run_docker_commands(commands)
104
+ return self.image_tag
105
+
106
+ def validate_docker_client_env(self) -> None:
107
+ """Ensure docker client is running and BuildKit is enabled. Note that Buildx always uses BuildKit.
108
+ - Ensure docker daemon is running through the "docker info" command on shell. When docker daemon is running,
109
+ return code will be 0, else return code will be 1.
110
+ - Ensure BuildKit is enabled by checking "docker buildx version".
111
+
112
+ Raises:
113
+ ConnectionError: Occurs when Docker is not installed or is not running.
114
+
115
+ """
116
+ info_command = "docker info"
117
+ buildx_command = "docker buildx version"
118
+
119
+ try:
120
+ subprocess.check_call(info_command, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, shell=True)
121
+ except subprocess.CalledProcessError:
122
+ raise ConnectionError("Failed to initialize Docker client. Please ensure Docker is installed and running.")
123
+
124
+ try:
125
+ subprocess.check_call(buildx_command, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, shell=True)
126
+ except subprocess.CalledProcessError:
127
+ raise ConnectionError(
128
+ "Please ensured Docker is installed with BuildKit by following "
129
+ "https://docs.docker.com/build/buildkit/#getting-started"
130
+ )
131
+
132
+ def _extract_model_zip(self, context_dir: str) -> str:
133
+ """Extract a zip file into the specified directory.
134
+
135
+ Args:
136
+ context_dir: Directory to extract the zip to.
137
+
138
+ Returns:
139
+ The extracted model directory.
140
+ """
141
+
142
+ local_model_zip_path = os.path.join(context_dir, posixpath.basename(self.model_zip_stage_path))
143
+ if zipfile.is_zipfile(local_model_zip_path):
144
+ extracted_model_dir = os.path.join(context_dir, constants.MODEL_DIR)
145
+ with zipfile.ZipFile(local_model_zip_path, "r") as model_zip:
146
+ if len(model_zip.namelist()) > 1:
147
+ model_zip.extractall(extracted_model_dir)
148
+ conda_path = os.path.join(extracted_model_dir, "env", "conda.yaml")
149
+
150
+ def remove_snowml_from_conda() -> None:
151
+ with open(conda_path, encoding="utf-8") as file:
152
+ conda_yaml = yaml.safe_load(file)
153
+
154
+ dependencies = conda_yaml["dependencies"]
155
+ dependencies = [dep for dep in dependencies if not dep.startswith("snowflake-ml-python")]
156
+
157
+ conda_yaml["dependencies"] = dependencies
158
+
159
+ with open(conda_path, "w", encoding="utf-8") as file:
160
+ yaml.dump(conda_yaml, file)
161
+
162
+ # TODO(shchen): Remove once SNOW-840411 is landed.
163
+ remove_snowml_from_conda()
164
+ return extracted_model_dir
165
+
166
+ def _build(self, docker_config_dir: str) -> None:
167
+ """Constructs the Docker context directory and then builds a Docker image based on that context.
168
+
169
+ Args:
170
+ docker_config_dir: Path to docker configuration directory, which stores the temporary session token.
171
+ """
172
+
173
+ with tempfile.TemporaryDirectory() as context_dir:
174
+ # Download the model zip file that is already uploaded to stage during model registry log_model step.
175
+ # This is needed in order to obtain the conda and requirement file inside the model zip.
176
+ self.session.file.get(self.model_zip_stage_path, context_dir)
177
+
178
+ extracted_model_dir = self._extract_model_zip(context_dir)
179
+
180
+ dc = docker_context.DockerContext(
181
+ context_dir=context_dir, model_dir=extracted_model_dir, use_gpu=self.use_gpu
182
+ )
183
+ dc.build()
184
+ self._build_image_from_context(context_dir=context_dir, docker_config_dir=docker_config_dir)
185
+
186
+ def _run_docker_commands(self, commands: List[str]) -> None:
187
+ """Run docker commands in a new child process.
188
+
189
+ Args:
190
+ commands: List of commands to run.
191
+
192
+ Raises:
193
+ RuntimeError: Occurs when docker commands failed to execute.
194
+ """
195
+ proc = subprocess.Popen(commands, cwd=os.getcwd(), stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
196
+ output_lines = []
197
+
198
+ if proc.stdout:
199
+ for line in iter(proc.stdout.readline, ""):
200
+ output_lines.append(line)
201
+ logging.info(line)
202
+
203
+ if proc.wait():
204
+ raise RuntimeError(f"Docker build failed: {''.join(output_lines)}")
205
+
206
+ def _build_image_from_context(
207
+ self, context_dir: str, docker_config_dir: str, *, platform: Platform = Platform.LINUX_AMD64
208
+ ) -> None:
209
+ """Builds a Docker image based on provided context.
210
+
211
+ Args:
212
+ context_dir: Path to context directory.
213
+ docker_config_dir: Path to docker configuration directory, which stores the temporary session token.
214
+ platform: Target platform for the build output, in the format "os[/arch[/variant]]".
215
+ """
216
+
217
+ commands = [
218
+ "docker",
219
+ "--config",
220
+ docker_config_dir,
221
+ "buildx",
222
+ "build",
223
+ "--platform",
224
+ platform.value,
225
+ "--tag",
226
+ f"{self.image_tag}",
227
+ context_dir,
228
+ ]
229
+
230
+ self._run_docker_commands(commands)
231
+
232
+ def _upload(self, docker_config_dir: str) -> None:
233
+ """
234
+ Uploads image to the image registry. This process requires a "docker login" followed by a "docker push". Remove
235
+ local image at the end of the upload operation to save up local space. Image cache is kept for more performant
236
+ built experience at the cost of small storage footprint.
237
+
238
+ For image registry authentication, we will use a session token obtained from the Snowpark session object.
239
+ The token authentication mechanism is automatically used when the username is set to "0sessiontoken" according
240
+ to the registry implementation detailed in the following link:
241
+ https://github.com/snowflakedb/snowflake-image-registry/blob/277435c6fd79db2df9f863aa9d04dc875e034d85
242
+ /AuthAdapter/src/main/java/com/snowflake/registry/service/AuthHeader.java#L122
243
+
244
+ By default, Docker overwrites the local Docker config file "/.docker/config.json" whenever a docker login
245
+ occurs. However, to ensure better isolation between Snowflake-managed Docker credentials and the user's own
246
+ Docker credentials, we will not use the default Docker config. Instead, we will write the username and session
247
+ token to a temporary file and use "docker --config" so that it only applies to the specific Docker command being
248
+ executed, without affecting the user's local Docker setup. The credential file will be automatically removed
249
+ at the end of upload operation.
250
+
251
+ Args:
252
+ docker_config_dir: Path to docker configuration directory, which stores the temporary session token.
253
+ """
254
+ commands = ["docker", "--config", docker_config_dir, "login", self.image_tag]
255
+ self._run_docker_commands(commands)
256
+
257
+ logging.info(f"Pushing image to image repo {self.image_tag}")
258
+ commands = ["docker", "--config", docker_config_dir, "push", self.image_tag]
259
+ self._run_docker_commands(commands)