snowflake-ml-python 1.1.0__py3-none-any.whl → 1.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (225) hide show
  1. snowflake/cortex/_complete.py +1 -1
  2. snowflake/cortex/_extract_answer.py +1 -1
  3. snowflake/cortex/_sentiment.py +1 -1
  4. snowflake/cortex/_summarize.py +1 -1
  5. snowflake/cortex/_translate.py +1 -1
  6. snowflake/ml/_internal/env_utils.py +68 -6
  7. snowflake/ml/_internal/file_utils.py +34 -4
  8. snowflake/ml/_internal/telemetry.py +79 -91
  9. snowflake/ml/_internal/utils/identifier.py +78 -72
  10. snowflake/ml/_internal/utils/retryable_http.py +16 -4
  11. snowflake/ml/_internal/utils/spcs_attribution_utils.py +122 -0
  12. snowflake/ml/dataset/dataset.py +1 -1
  13. snowflake/ml/model/_api.py +21 -14
  14. snowflake/ml/model/_client/model/model_impl.py +176 -0
  15. snowflake/ml/model/_client/model/model_method_info.py +19 -0
  16. snowflake/ml/model/_client/model/model_version_impl.py +291 -0
  17. snowflake/ml/model/_client/ops/metadata_ops.py +107 -0
  18. snowflake/ml/model/_client/ops/model_ops.py +308 -0
  19. snowflake/ml/model/_client/sql/model.py +75 -0
  20. snowflake/ml/model/_client/sql/model_version.py +213 -0
  21. snowflake/ml/model/_client/sql/stage.py +40 -0
  22. snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +3 -4
  23. snowflake/ml/model/_deploy_client/image_builds/templates/image_build_job_spec_template +24 -8
  24. snowflake/ml/model/_deploy_client/image_builds/templates/kaniko_shell_script_template +23 -0
  25. snowflake/ml/model/_deploy_client/snowservice/deploy.py +14 -2
  26. snowflake/ml/model/_deploy_client/utils/constants.py +1 -0
  27. snowflake/ml/model/_deploy_client/warehouse/deploy.py +2 -2
  28. snowflake/ml/model/_model_composer/model_composer.py +31 -9
  29. snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +25 -10
  30. snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +2 -2
  31. snowflake/ml/model/_model_composer/model_method/infer_function.py_template +2 -1
  32. snowflake/ml/model/_model_composer/model_method/model_method.py +34 -3
  33. snowflake/ml/model/_model_composer/model_runtime/model_runtime.py +1 -1
  34. snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +3 -1
  35. snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +10 -28
  36. snowflake/ml/model/_packager/model_meta/model_meta.py +18 -16
  37. snowflake/ml/model/_signatures/snowpark_handler.py +1 -1
  38. snowflake/ml/model/model_signature.py +108 -53
  39. snowflake/ml/model/type_hints.py +1 -0
  40. snowflake/ml/modeling/_internal/distributed_hpo_trainer.py +554 -0
  41. snowflake/ml/modeling/_internal/estimator_protocols.py +1 -60
  42. snowflake/ml/modeling/_internal/model_specifications.py +146 -0
  43. snowflake/ml/modeling/_internal/model_trainer.py +13 -0
  44. snowflake/ml/modeling/_internal/model_trainer_builder.py +78 -0
  45. snowflake/ml/modeling/_internal/pandas_trainer.py +54 -0
  46. snowflake/ml/modeling/_internal/snowpark_handlers.py +6 -760
  47. snowflake/ml/modeling/_internal/snowpark_trainer.py +331 -0
  48. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +108 -135
  49. snowflake/ml/modeling/cluster/affinity_propagation.py +106 -135
  50. snowflake/ml/modeling/cluster/agglomerative_clustering.py +106 -135
  51. snowflake/ml/modeling/cluster/birch.py +106 -135
  52. snowflake/ml/modeling/cluster/bisecting_k_means.py +106 -135
  53. snowflake/ml/modeling/cluster/dbscan.py +106 -135
  54. snowflake/ml/modeling/cluster/feature_agglomeration.py +106 -135
  55. snowflake/ml/modeling/cluster/k_means.py +105 -135
  56. snowflake/ml/modeling/cluster/mean_shift.py +106 -135
  57. snowflake/ml/modeling/cluster/mini_batch_k_means.py +105 -135
  58. snowflake/ml/modeling/cluster/optics.py +106 -135
  59. snowflake/ml/modeling/cluster/spectral_biclustering.py +106 -135
  60. snowflake/ml/modeling/cluster/spectral_clustering.py +106 -135
  61. snowflake/ml/modeling/cluster/spectral_coclustering.py +106 -135
  62. snowflake/ml/modeling/compose/column_transformer.py +106 -135
  63. snowflake/ml/modeling/compose/transformed_target_regressor.py +108 -135
  64. snowflake/ml/modeling/covariance/elliptic_envelope.py +106 -135
  65. snowflake/ml/modeling/covariance/empirical_covariance.py +99 -128
  66. snowflake/ml/modeling/covariance/graphical_lasso.py +106 -135
  67. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +106 -135
  68. snowflake/ml/modeling/covariance/ledoit_wolf.py +104 -133
  69. snowflake/ml/modeling/covariance/min_cov_det.py +106 -135
  70. snowflake/ml/modeling/covariance/oas.py +99 -128
  71. snowflake/ml/modeling/covariance/shrunk_covariance.py +103 -132
  72. snowflake/ml/modeling/decomposition/dictionary_learning.py +106 -135
  73. snowflake/ml/modeling/decomposition/factor_analysis.py +106 -135
  74. snowflake/ml/modeling/decomposition/fast_ica.py +106 -135
  75. snowflake/ml/modeling/decomposition/incremental_pca.py +106 -135
  76. snowflake/ml/modeling/decomposition/kernel_pca.py +106 -135
  77. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +106 -135
  78. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +106 -135
  79. snowflake/ml/modeling/decomposition/pca.py +106 -135
  80. snowflake/ml/modeling/decomposition/sparse_pca.py +106 -135
  81. snowflake/ml/modeling/decomposition/truncated_svd.py +106 -135
  82. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +108 -135
  83. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +108 -135
  84. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +108 -135
  85. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +108 -135
  86. snowflake/ml/modeling/ensemble/bagging_classifier.py +108 -135
  87. snowflake/ml/modeling/ensemble/bagging_regressor.py +108 -135
  88. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +108 -135
  89. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +108 -135
  90. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +108 -135
  91. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +108 -135
  92. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +108 -135
  93. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +108 -135
  94. snowflake/ml/modeling/ensemble/isolation_forest.py +106 -135
  95. snowflake/ml/modeling/ensemble/random_forest_classifier.py +108 -135
  96. snowflake/ml/modeling/ensemble/random_forest_regressor.py +108 -135
  97. snowflake/ml/modeling/ensemble/stacking_regressor.py +108 -135
  98. snowflake/ml/modeling/ensemble/voting_classifier.py +108 -135
  99. snowflake/ml/modeling/ensemble/voting_regressor.py +108 -135
  100. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +101 -128
  101. snowflake/ml/modeling/feature_selection/select_fdr.py +99 -126
  102. snowflake/ml/modeling/feature_selection/select_fpr.py +99 -126
  103. snowflake/ml/modeling/feature_selection/select_fwe.py +99 -126
  104. snowflake/ml/modeling/feature_selection/select_k_best.py +100 -127
  105. snowflake/ml/modeling/feature_selection/select_percentile.py +99 -126
  106. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +106 -135
  107. snowflake/ml/modeling/feature_selection/variance_threshold.py +95 -124
  108. snowflake/ml/modeling/framework/base.py +83 -1
  109. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +108 -135
  110. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +108 -135
  111. snowflake/ml/modeling/impute/iterative_imputer.py +106 -135
  112. snowflake/ml/modeling/impute/knn_imputer.py +106 -135
  113. snowflake/ml/modeling/impute/missing_indicator.py +106 -135
  114. snowflake/ml/modeling/impute/simple_imputer.py +9 -1
  115. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +96 -125
  116. snowflake/ml/modeling/kernel_approximation/nystroem.py +106 -135
  117. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +106 -135
  118. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +105 -134
  119. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +103 -132
  120. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +108 -135
  121. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +90 -118
  122. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +90 -118
  123. snowflake/ml/modeling/linear_model/ard_regression.py +108 -135
  124. snowflake/ml/modeling/linear_model/bayesian_ridge.py +108 -135
  125. snowflake/ml/modeling/linear_model/elastic_net.py +108 -135
  126. snowflake/ml/modeling/linear_model/elastic_net_cv.py +108 -135
  127. snowflake/ml/modeling/linear_model/gamma_regressor.py +108 -135
  128. snowflake/ml/modeling/linear_model/huber_regressor.py +108 -135
  129. snowflake/ml/modeling/linear_model/lars.py +108 -135
  130. snowflake/ml/modeling/linear_model/lars_cv.py +108 -135
  131. snowflake/ml/modeling/linear_model/lasso.py +108 -135
  132. snowflake/ml/modeling/linear_model/lasso_cv.py +108 -135
  133. snowflake/ml/modeling/linear_model/lasso_lars.py +108 -135
  134. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +108 -135
  135. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +108 -135
  136. snowflake/ml/modeling/linear_model/linear_regression.py +108 -135
  137. snowflake/ml/modeling/linear_model/logistic_regression.py +108 -135
  138. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +108 -135
  139. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +108 -135
  140. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +108 -135
  141. snowflake/ml/modeling/linear_model/multi_task_lasso.py +108 -135
  142. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +108 -135
  143. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +108 -135
  144. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +108 -135
  145. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +107 -135
  146. snowflake/ml/modeling/linear_model/perceptron.py +107 -135
  147. snowflake/ml/modeling/linear_model/poisson_regressor.py +108 -135
  148. snowflake/ml/modeling/linear_model/ransac_regressor.py +108 -135
  149. snowflake/ml/modeling/linear_model/ridge.py +108 -135
  150. snowflake/ml/modeling/linear_model/ridge_classifier.py +108 -135
  151. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +108 -135
  152. snowflake/ml/modeling/linear_model/ridge_cv.py +108 -135
  153. snowflake/ml/modeling/linear_model/sgd_classifier.py +108 -135
  154. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +106 -135
  155. snowflake/ml/modeling/linear_model/sgd_regressor.py +108 -135
  156. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +108 -135
  157. snowflake/ml/modeling/linear_model/tweedie_regressor.py +108 -135
  158. snowflake/ml/modeling/manifold/isomap.py +106 -135
  159. snowflake/ml/modeling/manifold/mds.py +106 -135
  160. snowflake/ml/modeling/manifold/spectral_embedding.py +106 -135
  161. snowflake/ml/modeling/manifold/tsne.py +106 -135
  162. snowflake/ml/modeling/metrics/classification.py +196 -55
  163. snowflake/ml/modeling/metrics/correlation.py +4 -2
  164. snowflake/ml/modeling/metrics/covariance.py +7 -4
  165. snowflake/ml/modeling/metrics/ranking.py +32 -16
  166. snowflake/ml/modeling/metrics/regression.py +60 -32
  167. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +106 -135
  168. snowflake/ml/modeling/mixture/gaussian_mixture.py +106 -135
  169. snowflake/ml/modeling/model_selection/grid_search_cv.py +91 -148
  170. snowflake/ml/modeling/model_selection/randomized_search_cv.py +93 -154
  171. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +105 -132
  172. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +108 -135
  173. snowflake/ml/modeling/multiclass/output_code_classifier.py +108 -135
  174. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +108 -135
  175. snowflake/ml/modeling/naive_bayes/categorical_nb.py +108 -135
  176. snowflake/ml/modeling/naive_bayes/complement_nb.py +108 -135
  177. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +98 -125
  178. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +107 -134
  179. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +108 -135
  180. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +108 -135
  181. snowflake/ml/modeling/neighbors/kernel_density.py +106 -135
  182. snowflake/ml/modeling/neighbors/local_outlier_factor.py +106 -135
  183. snowflake/ml/modeling/neighbors/nearest_centroid.py +108 -135
  184. snowflake/ml/modeling/neighbors/nearest_neighbors.py +106 -135
  185. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +108 -135
  186. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +108 -135
  187. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +108 -135
  188. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +106 -135
  189. snowflake/ml/modeling/neural_network/mlp_classifier.py +108 -135
  190. snowflake/ml/modeling/neural_network/mlp_regressor.py +108 -135
  191. snowflake/ml/modeling/parameters/disable_distributed_hpo.py +2 -6
  192. snowflake/ml/modeling/preprocessing/binarizer.py +25 -8
  193. snowflake/ml/modeling/preprocessing/k_bins_discretizer.py +9 -4
  194. snowflake/ml/modeling/preprocessing/label_encoder.py +31 -11
  195. snowflake/ml/modeling/preprocessing/max_abs_scaler.py +27 -9
  196. snowflake/ml/modeling/preprocessing/min_max_scaler.py +42 -14
  197. snowflake/ml/modeling/preprocessing/normalizer.py +9 -4
  198. snowflake/ml/modeling/preprocessing/one_hot_encoder.py +26 -10
  199. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +37 -13
  200. snowflake/ml/modeling/preprocessing/polynomial_features.py +106 -135
  201. snowflake/ml/modeling/preprocessing/robust_scaler.py +39 -13
  202. snowflake/ml/modeling/preprocessing/standard_scaler.py +36 -12
  203. snowflake/ml/modeling/semi_supervised/label_propagation.py +108 -135
  204. snowflake/ml/modeling/semi_supervised/label_spreading.py +108 -135
  205. snowflake/ml/modeling/svm/linear_svc.py +108 -135
  206. snowflake/ml/modeling/svm/linear_svr.py +108 -135
  207. snowflake/ml/modeling/svm/nu_svc.py +108 -135
  208. snowflake/ml/modeling/svm/nu_svr.py +108 -135
  209. snowflake/ml/modeling/svm/svc.py +108 -135
  210. snowflake/ml/modeling/svm/svr.py +108 -135
  211. snowflake/ml/modeling/tree/decision_tree_classifier.py +108 -135
  212. snowflake/ml/modeling/tree/decision_tree_regressor.py +108 -135
  213. snowflake/ml/modeling/tree/extra_tree_classifier.py +108 -135
  214. snowflake/ml/modeling/tree/extra_tree_regressor.py +108 -135
  215. snowflake/ml/modeling/xgboost/xgb_classifier.py +108 -136
  216. snowflake/ml/modeling/xgboost/xgb_regressor.py +108 -136
  217. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +108 -136
  218. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +108 -136
  219. snowflake/ml/registry/model_registry.py +2 -0
  220. snowflake/ml/registry/registry.py +215 -0
  221. snowflake/ml/version.py +1 -1
  222. {snowflake_ml_python-1.1.0.dist-info → snowflake_ml_python-1.1.2.dist-info}/METADATA +34 -1
  223. snowflake_ml_python-1.1.2.dist-info/RECORD +347 -0
  224. snowflake_ml_python-1.1.0.dist-info/RECORD +0 -331
  225. {snowflake_ml_python-1.1.0.dist-info → snowflake_ml_python-1.1.2.dist-info}/WHEEL +0 -0
@@ -1,22 +1,38 @@
1
1
  spec:
2
2
  container:
3
- - name: $container_name
4
- image: $base_image
3
+ - name: "${container_name}"
4
+ image: "${base_image}"
5
5
  command:
6
6
  - sh
7
7
  args:
8
8
  - -c
9
- - >-
10
- while [ ! -f "$script_path" ]; do echo "File not found: $script_path"; sleep 1; done;
11
- chmod +x $script_path;
12
- sh $script_path;
9
+ - |
10
+ wait_for_file() {
11
+ file_path="$1"
12
+ timeout="$2"
13
+ elapsed_time=0
14
+ while [ ! -f "${file_path}" ]; do
15
+ if [ "${elapsed_time}" -ge "${timeout}" ]; then
16
+ echo "Error: ${file_path} not found within ${timeout} seconds. Exiting."
17
+ exit 1
18
+ fi
19
+ elapsed_time=$((elapsed_time + 1))
20
+ remaining_time=$((timeout - elapsed_time))
21
+ echo "Awaiting the mounting of ${file_path}. Wait time remaining: ${remaining_time} seconds"
22
+ sleep 1
23
+ done
24
+ }
25
+ wait_for_file "${script_path}" 300
26
+ wait_for_file "${mounted_token_path}" 300
27
+ chmod +x "${script_path}"
28
+ sh "${script_path}"
13
29
  volumeMounts:
14
30
  - name: vol1
15
31
  mountPath: /local/user/vol1
16
32
  - name: stagemount
17
- mountPath: /$stage
33
+ mountPath: "/${stage}"
18
34
  volume:
19
35
  - name: vol1
20
36
  source: local # only local emptyDir volume is supported
21
37
  - name: stagemount
22
- source: "@$stage"
38
+ source: "@${stage}"
@@ -11,18 +11,41 @@ cleanup() {
11
11
  kill -- -$$$ # Kill the entire process group. Extra $ to escape, the generated shell script should have two $.
12
12
  }
13
13
 
14
+ # SNOW-990976, This is an additional safety check to ensure token file exists, on top of the token file check upon
15
+ # launching SPCS job. This additional check could provide value in cases things go wrong with token refresh that result
16
+ # in token file to disappear.
17
+ wait_till_token_file_exists() {
18
+ timeout=60 # 1 minute timeout
19
+ elapsed_time=0
20
+
21
+ while [ ! -f "${SESSION_TOKEN_PATH}" ] && [ "$elapsed_time" -lt "$timeout" ]; do
22
+ sleep 1
23
+ elapsed_time=$((elapsed_time + 1))
24
+ remaining_time=$((timeout - elapsed_time))
25
+ echo "Waiting for token file to exist. Wait time remaining: ${remaining_time} seconds."
26
+ done
27
+
28
+ if [ ! -f "${SESSION_TOKEN_PATH}" ]; then
29
+ echo "Error: Token file '${SESSION_TOKEN_PATH}' does not show up within the ${timeout} seconds timeout period."
30
+ exit 1
31
+ fi
32
+ }
33
+
14
34
  generate_registry_cred() {
35
+ wait_till_token_file_exists
15
36
  AUTH_TOKEN=$(printf '0auth2accesstoken:%s' "$(cat ${SESSION_TOKEN_PATH})" | base64);
16
37
  echo '{"auths":{"$image_repo":{"auth":"'"$AUTH_TOKEN"'"}}}' | tr -d '\n' > $REGISTRY_CRED_PATH;
17
38
  }
18
39
 
19
40
  on_session_token_change() {
41
+ wait_till_token_file_exists
20
42
  # Get the initial checksum of the file
21
43
  CHECKSUM=$(md5sum "${SESSION_TOKEN_PATH}" | awk '{ print $1 }')
22
44
  # Run the command once before the loop
23
45
  echo "Monitoring session token changes in the background..."
24
46
  (
25
47
  while true; do
48
+ wait_till_token_file_exists
26
49
  # Get the current checksum of the file
27
50
  CURRENT_CHECKSUM=$(md5sum "${SESSION_TOKEN_PATH}" | awk '{ print $1 }')
28
51
  if [ "${CURRENT_CHECKSUM}" != "${CHECKSUM}" ]; then
@@ -10,14 +10,19 @@ from typing import Any, Dict, Generator, Optional, cast
10
10
 
11
11
  import importlib_resources
12
12
  import yaml
13
+ from packaging import requirements
13
14
  from typing_extensions import Unpack
14
15
 
15
- from snowflake.ml._internal import file_utils
16
+ from snowflake.ml._internal import env_utils, file_utils
16
17
  from snowflake.ml._internal.exceptions import (
17
18
  error_codes,
18
19
  exceptions as snowml_exceptions,
19
20
  )
20
- from snowflake.ml._internal.utils import identifier, query_result_checker
21
+ from snowflake.ml._internal.utils import (
22
+ identifier,
23
+ query_result_checker,
24
+ spcs_attribution_utils,
25
+ )
21
26
  from snowflake.ml.model import type_hints
22
27
  from snowflake.ml.model._deploy_client import snowservice
23
28
  from snowflake.ml.model._deploy_client.image_builds import (
@@ -161,6 +166,11 @@ def _deploy(
161
166
  # Set conda-forge as backup channel for SPCS deployment
162
167
  if "conda-forge" not in model_meta_deploy.env._conda_dependencies:
163
168
  model_meta_deploy.env._conda_dependencies["conda-forge"] = []
169
+ # Snowflake connector needs pyarrow to work correctly.
170
+ env_utils.append_conda_dependency(
171
+ model_meta_deploy.env._conda_dependencies,
172
+ (env_utils.DEFAULT_CHANNEL_NAME, requirements.Requirement("pyarrow")),
173
+ )
164
174
  if options.use_gpu:
165
175
  # Make mypy happy
166
176
  assert options.num_gpus is not None
@@ -585,6 +595,8 @@ class SnowServiceDeployment:
585
595
  )
586
596
  logger.info(f"Service {self._service_name} is ready. Creating service function...")
587
597
 
598
+ spcs_attribution_utils.record_service_start(self.session, self._service_name)
599
+
588
600
  service_function_sql = client.create_or_replace_service_function(
589
601
  service_func_name=self.service_func_name,
590
602
  service_name=self._service_name,
@@ -50,3 +50,4 @@ KANIKO_SHELL_SCRIPT_NAME = "kaniko_shell_script_fixture.sh"
50
50
  KANIKO_CONTAINER_NAME = "kaniko"
51
51
  LATEST_IMAGE_TAG = "latest"
52
52
  KANIKO_IMAGE = "kaniko-project/executor:v1.16.0-debug"
53
+ SPCS_MOUNTED_TOKEN_PATH = "/snowflake/session/token"
@@ -173,7 +173,7 @@ def _get_model_final_packages(
173
173
  else:
174
174
  required_packages = meta.env._conda_dependencies[env_utils.DEFAULT_CHANNEL_NAME]
175
175
 
176
- final_packages = env_utils.validate_requirements_in_snowflake_conda_channel(
176
+ final_packages = env_utils.validate_requirements_in_information_schema(
177
177
  session, required_packages, python_version=meta.env.python_version
178
178
  )
179
179
 
@@ -182,7 +182,7 @@ def _get_model_final_packages(
182
182
  raise snowml_exceptions.SnowflakeMLException(
183
183
  error_code=error_codes.DEPENDENCY_VERSION_ERROR,
184
184
  original_exception=RuntimeError(
185
- "The model's dependencyies are not available in Snowflake Anaconda Channel. "
185
+ "The model's dependencies are not available in Snowflake Anaconda Channel. "
186
186
  + relax_version_info_str
187
187
  + "Required packages are:\n"
188
188
  + " ".join(map(lambda x: f'"{x}"', required_packages))
@@ -3,7 +3,7 @@ import pathlib
3
3
  import tempfile
4
4
  import zipfile
5
5
  from types import ModuleType
6
- from typing import Dict, List, Optional
6
+ from typing import Any, Dict, List, Optional
7
7
 
8
8
  from absl import logging
9
9
  from packaging import requirements
@@ -32,8 +32,15 @@ class ModelComposer:
32
32
  """
33
33
 
34
34
  MODEL_FILE_REL_PATH = "model.zip"
35
+ MODEL_DIR_REL_PATH = "model"
35
36
 
36
- def __init__(self, session: Session, stage_path: str) -> None:
37
+ def __init__(
38
+ self,
39
+ session: Session,
40
+ stage_path: str,
41
+ *,
42
+ statement_params: Optional[Dict[str, Any]] = None,
43
+ ) -> None:
37
44
  self.session = session
38
45
  self.stage_path = pathlib.PurePosixPath(stage_path)
39
46
 
@@ -43,6 +50,8 @@ class ModelComposer:
43
50
  self.packager = model_packager.ModelPackager(local_dir_path=str(self._packager_workspace_path))
44
51
  self.manifest = model_manifest.ModelManifest(workspace_path=self.workspace_path)
45
52
 
53
+ self._statement_params = statement_params
54
+
46
55
  def __del__(self) -> None:
47
56
  self._workspace.cleanup()
48
57
  self._packager_workspace.cleanup()
@@ -82,13 +91,11 @@ class ModelComposer:
82
91
  options = model_types.BaseModelSaveOption()
83
92
 
84
93
  if not snowpark_utils.is_in_stored_procedure(): # type: ignore[no-untyped-call]
85
- snowml_server_availability = env_utils.validate_requirements_in_snowflake_conda_channel(
86
- session=self.session,
87
- reqs=[requirements.Requirement(f"snowflake-ml-python=={snowml_env.VERSION}")],
88
- python_version=snowml_env.PYTHON_VERSION,
94
+ snowml_matched_versions = env_utils.get_matched_package_versions_in_snowflake_conda_channel(
95
+ req=requirements.Requirement(f"snowflake-ml-python=={snowml_env.VERSION}")
89
96
  )
90
97
 
91
- if snowml_server_availability is None and options.get("embed_local_ml_library", False) is False:
98
+ if len(snowml_matched_versions) < 1 and options.get("embed_local_ml_library", False) is False:
92
99
  logging.info(
93
100
  f"Local snowflake-ml-python library has version {snowml_env.VERSION},"
94
101
  " which is not available in the Snowflake server, embedding local ML library automatically."
@@ -111,6 +118,13 @@ class ModelComposer:
111
118
 
112
119
  assert self.packager.meta is not None
113
120
 
121
+ if not options.get("_legacy_save", False):
122
+ # Keep both loose files and zipped file.
123
+ # TODO(SNOW-726678): Remove once import a directory is possible.
124
+ file_utils.copytree(
125
+ str(self._packager_workspace_path), str(self.workspace_path / ModelComposer.MODEL_DIR_REL_PATH)
126
+ )
127
+
114
128
  file_utils.make_archive(self.model_local_path, str(self._packager_workspace_path))
115
129
 
116
130
  self.manifest.save(
@@ -120,7 +134,12 @@ class ModelComposer:
120
134
  options=options,
121
135
  )
122
136
 
123
- file_utils.upload_directory_to_stage(self.session, local_path=self.workspace_path, stage_path=self.stage_path)
137
+ file_utils.upload_directory_to_stage(
138
+ self.session,
139
+ local_path=self.workspace_path,
140
+ stage_path=self.stage_path,
141
+ statement_params=self._statement_params,
142
+ )
124
143
 
125
144
  def load(
126
145
  self,
@@ -129,7 +148,10 @@ class ModelComposer:
129
148
  options: Optional[model_types.ModelLoadOption] = None,
130
149
  ) -> None:
131
150
  file_utils.download_directory_from_stage(
132
- self.session, stage_path=self.stage_path, local_path=self.workspace_path
151
+ self.session,
152
+ stage_path=self.stage_path,
153
+ local_path=self.workspace_path,
154
+ statement_params=self._statement_params,
133
155
  )
134
156
 
135
157
  # TODO (Server-side Model Rollout): Remove this section.
@@ -1,5 +1,6 @@
1
+ import collections
1
2
  import pathlib
2
- from typing import List, Optional
3
+ from typing import List, Optional, cast
3
4
 
4
5
  import yaml
5
6
 
@@ -48,7 +49,6 @@ class ModelManifest:
48
49
  ]
49
50
  self.function_generator = function_generator.FunctionGenerator(model_file_rel_path=model_file_rel_path)
50
51
  self.methods: List[model_method.ModelMethod] = []
51
- _seen_method_names: List[str] = []
52
52
  for target_method in model_meta.signatures.keys():
53
53
  method = model_method.ModelMethod(
54
54
  model_meta=model_meta,
@@ -57,17 +57,18 @@ class ModelManifest:
57
57
  function_generator=self.function_generator,
58
58
  options=model_method.get_model_method_options_from_options(options, target_method),
59
59
  )
60
- if method.method_name in _seen_method_names:
61
- raise ValueError(
62
- f"Found duplicate method named resolved as {method.method_name} in the model. "
63
- "This might because you have methods with same letters but different cases. "
64
- "In this case, set case_sensitive as True for those methods to distinguish them"
65
- )
66
- else:
67
- _seen_method_names.append(method.method_name)
68
60
 
69
61
  self.methods.append(method)
70
62
 
63
+ method_name_counter = collections.Counter([method.method_name for method in self.methods])
64
+ dup_method_names = [k for k, v in method_name_counter.items() if v > 1]
65
+ if dup_method_names:
66
+ raise ValueError(
67
+ f"Found duplicate method named resolved as {', '.join(dup_method_names)} in the model. "
68
+ "This might because you have methods with same letters but different cases. "
69
+ "In this case, set case_sensitive as True for those methods to distinguish them."
70
+ )
71
+
71
72
  manifest_dict = model_manifest_schema.ModelManifestDict(
72
73
  manifest_version=model_manifest_schema.MODEL_MANIFEST_VERSION,
73
74
  runtimes={runtime.name: runtime.save(self.workspace_path) for runtime in self.runtimes},
@@ -84,3 +85,17 @@ class ModelManifest:
84
85
 
85
86
  with (self.workspace_path / ModelManifest.MANIFEST_FILE_REL_PATH).open("w", encoding="utf-8") as f:
86
87
  yaml.safe_dump(manifest_dict, f)
88
+
89
+ def load(self) -> model_manifest_schema.ModelManifestDict:
90
+ with (self.workspace_path / ModelManifest.MANIFEST_FILE_REL_PATH).open("r", encoding="utf-8") as f:
91
+ raw_input = yaml.safe_load(f)
92
+ if not isinstance(raw_input, dict):
93
+ raise ValueError(f"Read ill-formatted model MANIFEST, should be a dict, received {type(raw_input)}")
94
+
95
+ original_loaded_manifest_version = raw_input.get("manifest_version", None)
96
+ if not original_loaded_manifest_version:
97
+ raise ValueError("Unable to get the version of the MANIFEST file.")
98
+
99
+ res = cast(model_manifest_schema.ModelManifestDict, raw_input)
100
+
101
+ return res
@@ -1,6 +1,6 @@
1
1
  # This files contains schema definition of what will be written into MANIFEST.yml
2
2
 
3
- from typing import Dict, List, Literal, TypedDict
3
+ from typing import Any, Dict, List, Literal, TypedDict
4
4
 
5
5
  from typing_extensions import NotRequired, Required
6
6
 
@@ -42,4 +42,4 @@ class ModelManifestDict(TypedDict):
42
42
  manifest_version: Required[str]
43
43
  runtimes: Required[Dict[str, ModelRuntimeDict]]
44
44
  methods: Required[List[ModelMethodDict]]
45
- user_data: NotRequired[Dict[str, str]]
45
+ user_data: NotRequired[Dict[str, Any]]
@@ -73,6 +73,7 @@ dtype_map = {{feature.name: feature.as_dtype() for feature in features}}
73
73
  # Actual function
74
74
  @vectorized(input=pd.DataFrame, max_batch_size=MAX_BATCH_SIZE)
75
75
  def {function_name}(df: pd.DataFrame) -> dict:
76
- input_df = pd.json_normalize(df[0]).astype(dtype=dtype_map)
76
+ df.columns = input_cols
77
+ input_df = df.astype(dtype=dtype_map)
77
78
  predictions_df = runner(input_df[input_cols])
78
79
  return predictions_df.to_dict("records")
@@ -1,13 +1,15 @@
1
+ import collections
1
2
  import pathlib
2
3
  from typing import Optional, TypedDict
3
4
 
4
5
  from typing_extensions import NotRequired
5
6
 
6
7
  from snowflake.ml._internal.utils import sql_identifier
7
- from snowflake.ml.model import type_hints
8
+ from snowflake.ml.model import model_signature, type_hints
8
9
  from snowflake.ml.model._model_composer.model_manifest import model_manifest_schema
9
10
  from snowflake.ml.model._model_composer.model_method import function_generator
10
11
  from snowflake.ml.model._packager.model_meta import model_meta as model_meta_api
12
+ from snowflake.snowpark._internal import type_utils
11
13
 
12
14
 
13
15
  class ModelMethodOptions(TypedDict):
@@ -69,6 +71,22 @@ class ModelMethod:
69
71
  if self.target_method not in self.model_meta.signatures.keys():
70
72
  raise ValueError(f"Target method {self.target_method} is not available in the signatures of the model.")
71
73
 
74
+ @staticmethod
75
+ def _get_method_arg_from_feature(
76
+ feature: model_signature.BaseFeatureSpec, case_sensitive: bool = False
77
+ ) -> model_manifest_schema.ModelMethodSignatureFieldWithName:
78
+ assert isinstance(feature, model_signature.FeatureSpec), "FeatureGroupSpec is not supported."
79
+ try:
80
+ feature_name = sql_identifier.SqlIdentifier(feature.name, case_sensitive=case_sensitive)
81
+ except ValueError as e:
82
+ raise ValueError(
83
+ f"Your feature {feature.name} cannot be resolved as valid SQL identifier. "
84
+ "Try specify `case_sensitive` as True."
85
+ ) from e
86
+ return model_manifest_schema.ModelMethodSignatureFieldWithName(
87
+ name=feature_name.resolved(), type=type_utils.convert_sp_to_sf_type(feature.as_snowpark_type())
88
+ )
89
+
72
90
  def save(
73
91
  self, workspace_path: pathlib.Path, options: Optional[function_generator.FunctionGenerateOptions] = None
74
92
  ) -> model_manifest_schema.ModelMethodDict:
@@ -78,13 +96,26 @@ class ModelMethod:
78
96
  self.target_method,
79
97
  options=options,
80
98
  )
99
+ input_list = [
100
+ ModelMethod._get_method_arg_from_feature(ft, case_sensitive=self.options.get("case_sensitive", False))
101
+ for ft in self.model_meta.signatures[self.target_method].inputs
102
+ ]
103
+ input_name_counter = collections.Counter([input_info["name"] for input_info in input_list])
104
+ dup_input_names = [k for k, v in input_name_counter.items() if v > 1]
105
+ if dup_input_names:
106
+ raise ValueError(
107
+ f"Found duplicate input feature named resolved as {', '.join(dup_input_names)} in the method"
108
+ f" {self.target_method} This might because you have methods with same letters but different cases. "
109
+ "In this case, set case_sensitive as True for those methods to distinguish them."
110
+ )
111
+
81
112
  return model_manifest_schema.ModelFunctionMethodDict(
82
- name=self.method_name.identifier(),
113
+ name=self.method_name.resolved(),
83
114
  runtime=self.runtime_name,
84
115
  type="FUNCTION",
85
116
  handler=".".join(
86
117
  [ModelMethod.FUNCTIONS_DIR_REL_PATH, self.target_method, self.function_generator.FUNCTION_NAME]
87
118
  ),
88
- inputs=[model_manifest_schema.ModelMethodSignatureFieldWithName(name="tmp_input", type="OBJECT")],
119
+ inputs=input_list,
89
120
  outputs=[model_manifest_schema.ModelMethodSignatureField(type="OBJECT")],
90
121
  )
@@ -44,7 +44,7 @@ class ModelRuntime:
44
44
  if self.runtime_env._snowpark_ml_version.local:
45
45
  self.embed_local_ml_library = True
46
46
  else:
47
- snowml_server_availability = env_utils.validate_requirements_in_snowflake_conda_channel(
47
+ snowml_server_availability = env_utils.validate_requirements_in_information_schema(
48
48
  session=session,
49
49
  reqs=[requirements.Requirement(snowml_pkg_spec)],
50
50
  python_version=snowml_env.PYTHON_VERSION,
@@ -59,7 +59,7 @@ def get_requirements_from_task(task: str, spcs_only: bool = False) -> List[model
59
59
  return (
60
60
  [model_env.ModelDependency(requirement="tokenizers>=0.13.3", pip_name="tokenizers")]
61
61
  if spcs_only
62
- else [model_env.ModelDependency(requirement="tokenizers", pip_name="tokenizers")]
62
+ else [model_env.ModelDependency(requirement="tokenizers<=0.13.2", pip_name="tokenizers")]
63
63
  )
64
64
 
65
65
  return []
@@ -170,6 +170,7 @@ class HuggingFacePipelineHandler(
170
170
  " `snowflake.ml.model.models.huggingface_pipeline.HuggingFacePipelineModel` object. "
171
171
  "Please make sure you are providing correct model signatures.",
172
172
  UserWarning,
173
+ stacklevel=2,
173
174
  )
174
175
  else:
175
176
  handlers_utils.validate_target_methods(model, target_methods)
@@ -179,6 +180,7 @@ class HuggingFacePipelineHandler(
179
180
  + "Model signature will automatically be inferred from pipeline task. "
180
181
  + "Or, you could specify model signature manually.",
181
182
  UserWarning,
183
+ stacklevel=2,
182
184
  )
183
185
  if inferred_pipe_sig is None:
184
186
  raise NotImplementedError(f"Cannot auto infer the signature of pipeline for task {task}")
@@ -1,4 +1,5 @@
1
1
  import os
2
+ import warnings
2
3
  from typing import TYPE_CHECKING, Callable, Dict, Optional, Type, cast, final
3
4
 
4
5
  import cloudpickle
@@ -9,7 +10,7 @@ from typing_extensions import TypeGuard, Unpack
9
10
  from snowflake.ml._internal import type_utils
10
11
  from snowflake.ml.model import custom_model, model_signature, type_hints as model_types
11
12
  from snowflake.ml.model._packager.model_env import model_env
12
- from snowflake.ml.model._packager.model_handlers import _base, _utils as handlers_utils
13
+ from snowflake.ml.model._packager.model_handlers import _base
13
14
  from snowflake.ml.model._packager.model_handlers_migrator import base_migrator
14
15
  from snowflake.ml.model._packager.model_meta import (
15
16
  model_blob_meta,
@@ -78,34 +79,15 @@ class SnowMLModelHandler(_base.BaseModelHandler["BaseEstimator"]):
78
79
  # Pipeline is inherited from BaseEstimator, so no need to add one more check
79
80
 
80
81
  if not is_sub_model:
81
- if (not model_meta.signatures) and sample_input is None:
82
- assert hasattr(model, "model_signatures")
83
- model_meta.signatures = getattr(model, "model_signatures", {})
84
- else:
85
- target_methods = handlers_utils.get_target_methods(
86
- model=model,
87
- target_methods=kwargs.pop("target_methods", None),
88
- default_target_methods=cls.DEFAULT_TARGET_METHODS,
89
- )
90
-
91
- def get_prediction(
92
- target_method_name: str, sample_input: model_types.SupportedLocalDataType
93
- ) -> model_types.SupportedLocalDataType:
94
- if not isinstance(sample_input, (pd.DataFrame,)):
95
- sample_input = model_signature._convert_local_data_to_df(sample_input)
96
-
97
- target_method = getattr(model, target_method_name, None)
98
- assert callable(target_method)
99
- predictions_df = target_method(sample_input)
100
- return predictions_df
101
-
102
- model_meta = handlers_utils.validate_signature(
103
- model=model,
104
- model_meta=model_meta,
105
- target_methods=target_methods,
106
- sample_input=sample_input,
107
- get_prediction_fn=get_prediction,
82
+ if sample_input is not None or model_meta.signatures:
83
+ warnings.warn(
84
+ "Inferring model signature from sample input or providing model signature for Snowpark ML "
85
+ + "Modeling model is not required. Model signature will automatically be inferred during fitting. ",
86
+ UserWarning,
87
+ stacklevel=2,
108
88
  )
89
+ assert hasattr(model, "model_signatures"), "Model does not have model signatures as expected."
90
+ model_meta.signatures = getattr(model, "model_signatures", {})
109
91
 
110
92
  model_blob_path = os.path.join(model_blobs_dir_path, name)
111
93
  os.makedirs(model_blob_path, exist_ok=True)
@@ -72,20 +72,22 @@ def create_model_metadata(
72
72
  """
73
73
  model_dir_path = os.path.normpath(model_dir_path)
74
74
  embed_local_ml_library = kwargs.pop("embed_local_ml_library", False)
75
- # Use the last one which is loaded first, that is mean, it is loaded from site-packages.
76
- # We could make sure that user does not overwrite our library with their code follow the same naming.
77
- snowml_path, snowml_start_path = file_utils.get_package_path(_SNOWFLAKE_ML_PKG_NAME, strategy="last")
78
- if os.path.isdir(snowml_start_path):
79
- path_to_copy = snowml_path
80
- # If the package is zip-imported, then the path will be `../path_to_zip.zip/snowflake/ml`
81
- # It is not a valid path in fact and we need to get the path to the zip file to verify it.
82
- elif os.path.isfile(snowml_start_path):
83
- extract_root = tempfile.mkdtemp()
84
- with zipfile.ZipFile(os.path.abspath(snowml_start_path), mode="r", compression=zipfile.ZIP_DEFLATED) as zf:
85
- zf.extractall(path=extract_root)
86
- path_to_copy = os.path.join(extract_root, *(_SNOWFLAKE_ML_PKG_NAME.split(".")))
87
- else:
88
- raise ValueError("`snowflake.ml` is imported via a way that embedding local ML library is not supported.")
75
+ legacy_save = kwargs.pop("_legacy_save", False)
76
+ if embed_local_ml_library:
77
+ # Use the last one which is loaded first, that is mean, it is loaded from site-packages.
78
+ # We could make sure that user does not overwrite our library with their code follow the same naming.
79
+ snowml_path, snowml_start_path = file_utils.get_package_path(_SNOWFLAKE_ML_PKG_NAME, strategy="last")
80
+ if os.path.isdir(snowml_start_path):
81
+ path_to_copy = snowml_path
82
+ # If the package is zip-imported, then the path will be `../path_to_zip.zip/snowflake/ml`
83
+ # It is not a valid path in fact and we need to get the path to the zip file to verify it.
84
+ elif os.path.isfile(snowml_start_path):
85
+ extract_root = tempfile.mkdtemp()
86
+ with zipfile.ZipFile(os.path.abspath(snowml_start_path), mode="r", compression=zipfile.ZIP_DEFLATED) as zf:
87
+ zf.extractall(path=extract_root)
88
+ path_to_copy = os.path.join(extract_root, *(_SNOWFLAKE_ML_PKG_NAME.split(".")))
89
+ else:
90
+ raise ValueError("`snowflake.ml` is imported via a way that embedding local ML library is not supported.")
89
91
 
90
92
  env = _create_env_for_model_metadata(
91
93
  conda_dependencies=conda_dependencies,
@@ -106,10 +108,10 @@ def create_model_metadata(
106
108
  )
107
109
 
108
110
  code_dir_path = os.path.join(model_dir_path, MODEL_CODE_DIR)
109
- if embed_local_ml_library or code_paths:
111
+ if (embed_local_ml_library and legacy_save) or code_paths:
110
112
  os.makedirs(code_dir_path, exist_ok=True)
111
113
 
112
- if embed_local_ml_library:
114
+ if embed_local_ml_library and legacy_save:
113
115
  snowml_path_in_code = os.path.join(code_dir_path, _SNOWFLAKE_PKG_NAME)
114
116
  os.makedirs(snowml_path_in_code, exist_ok=True)
115
117
  file_utils.copy_file_or_tree(path_to_copy, snowml_path_in_code)
@@ -51,7 +51,7 @@ class SnowparkDataFrameHandler(base_handler.BaseDataHandler[snowflake.snowpark.D
51
51
  data: snowflake.snowpark.DataFrame, role: Literal["input", "output"]
52
52
  ) -> Sequence[core.BaseFeatureSpec]:
53
53
  return pandas_handler.PandasDataFrameHandler.infer_signature(
54
- SnowparkDataFrameHandler.convert_to_df(data), role=role
54
+ SnowparkDataFrameHandler.convert_to_df(data.limit(n=1)), role=role
55
55
  )
56
56
 
57
57
  @staticmethod