snowflake-ml-python 1.1.1__py3-none-any.whl → 1.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/cortex/_complete.py +1 -1
- snowflake/cortex/_extract_answer.py +1 -1
- snowflake/cortex/_sentiment.py +1 -1
- snowflake/cortex/_summarize.py +1 -1
- snowflake/cortex/_translate.py +1 -1
- snowflake/ml/_internal/env_utils.py +68 -6
- snowflake/ml/_internal/file_utils.py +34 -4
- snowflake/ml/_internal/telemetry.py +79 -91
- snowflake/ml/_internal/utils/retryable_http.py +16 -4
- snowflake/ml/_internal/utils/spcs_attribution_utils.py +122 -0
- snowflake/ml/dataset/dataset.py +1 -1
- snowflake/ml/model/_api.py +21 -14
- snowflake/ml/model/_client/model/model_impl.py +176 -0
- snowflake/ml/model/_client/model/model_method_info.py +19 -0
- snowflake/ml/model/_client/model/model_version_impl.py +291 -0
- snowflake/ml/model/_client/ops/metadata_ops.py +107 -0
- snowflake/ml/model/_client/ops/model_ops.py +308 -0
- snowflake/ml/model/_client/sql/model.py +75 -0
- snowflake/ml/model/_client/sql/model_version.py +213 -0
- snowflake/ml/model/_client/sql/stage.py +40 -0
- snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +3 -4
- snowflake/ml/model/_deploy_client/image_builds/templates/image_build_job_spec_template +24 -8
- snowflake/ml/model/_deploy_client/image_builds/templates/kaniko_shell_script_template +23 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +14 -2
- snowflake/ml/model/_deploy_client/utils/constants.py +1 -0
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +2 -2
- snowflake/ml/model/_model_composer/model_composer.py +31 -9
- snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +25 -10
- snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +2 -2
- snowflake/ml/model/_model_composer/model_method/infer_function.py_template +2 -1
- snowflake/ml/model/_model_composer/model_method/model_method.py +34 -3
- snowflake/ml/model/_model_composer/model_runtime/model_runtime.py +1 -1
- snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +3 -1
- snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +10 -28
- snowflake/ml/model/_packager/model_meta/model_meta.py +18 -16
- snowflake/ml/model/_signatures/snowpark_handler.py +1 -1
- snowflake/ml/model/model_signature.py +108 -53
- snowflake/ml/model/type_hints.py +1 -0
- snowflake/ml/modeling/_internal/distributed_hpo_trainer.py +554 -0
- snowflake/ml/modeling/_internal/estimator_protocols.py +1 -60
- snowflake/ml/modeling/_internal/model_specifications.py +146 -0
- snowflake/ml/modeling/_internal/model_trainer.py +13 -0
- snowflake/ml/modeling/_internal/model_trainer_builder.py +78 -0
- snowflake/ml/modeling/_internal/pandas_trainer.py +54 -0
- snowflake/ml/modeling/_internal/snowpark_handlers.py +6 -760
- snowflake/ml/modeling/_internal/snowpark_trainer.py +331 -0
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +96 -124
- snowflake/ml/modeling/cluster/affinity_propagation.py +94 -124
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +94 -124
- snowflake/ml/modeling/cluster/birch.py +94 -124
- snowflake/ml/modeling/cluster/bisecting_k_means.py +94 -124
- snowflake/ml/modeling/cluster/dbscan.py +94 -124
- snowflake/ml/modeling/cluster/feature_agglomeration.py +94 -124
- snowflake/ml/modeling/cluster/k_means.py +93 -124
- snowflake/ml/modeling/cluster/mean_shift.py +94 -124
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +93 -124
- snowflake/ml/modeling/cluster/optics.py +94 -124
- snowflake/ml/modeling/cluster/spectral_biclustering.py +94 -124
- snowflake/ml/modeling/cluster/spectral_clustering.py +94 -124
- snowflake/ml/modeling/cluster/spectral_coclustering.py +94 -124
- snowflake/ml/modeling/compose/column_transformer.py +94 -124
- snowflake/ml/modeling/compose/transformed_target_regressor.py +96 -124
- snowflake/ml/modeling/covariance/elliptic_envelope.py +94 -124
- snowflake/ml/modeling/covariance/empirical_covariance.py +80 -110
- snowflake/ml/modeling/covariance/graphical_lasso.py +94 -124
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +94 -124
- snowflake/ml/modeling/covariance/ledoit_wolf.py +85 -115
- snowflake/ml/modeling/covariance/min_cov_det.py +94 -124
- snowflake/ml/modeling/covariance/oas.py +80 -110
- snowflake/ml/modeling/covariance/shrunk_covariance.py +84 -114
- snowflake/ml/modeling/decomposition/dictionary_learning.py +94 -124
- snowflake/ml/modeling/decomposition/factor_analysis.py +94 -124
- snowflake/ml/modeling/decomposition/fast_ica.py +94 -124
- snowflake/ml/modeling/decomposition/incremental_pca.py +94 -124
- snowflake/ml/modeling/decomposition/kernel_pca.py +94 -124
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +94 -124
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +94 -124
- snowflake/ml/modeling/decomposition/pca.py +94 -124
- snowflake/ml/modeling/decomposition/sparse_pca.py +94 -124
- snowflake/ml/modeling/decomposition/truncated_svd.py +94 -124
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +96 -124
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +91 -119
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +96 -124
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +96 -124
- snowflake/ml/modeling/ensemble/bagging_classifier.py +96 -124
- snowflake/ml/modeling/ensemble/bagging_regressor.py +96 -124
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +96 -124
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +96 -124
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +96 -124
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +96 -124
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +96 -124
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +96 -124
- snowflake/ml/modeling/ensemble/isolation_forest.py +94 -124
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +96 -124
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +96 -124
- snowflake/ml/modeling/ensemble/stacking_regressor.py +96 -124
- snowflake/ml/modeling/ensemble/voting_classifier.py +96 -124
- snowflake/ml/modeling/ensemble/voting_regressor.py +91 -119
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +82 -110
- snowflake/ml/modeling/feature_selection/select_fdr.py +80 -108
- snowflake/ml/modeling/feature_selection/select_fpr.py +80 -108
- snowflake/ml/modeling/feature_selection/select_fwe.py +80 -108
- snowflake/ml/modeling/feature_selection/select_k_best.py +81 -109
- snowflake/ml/modeling/feature_selection/select_percentile.py +80 -108
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +94 -124
- snowflake/ml/modeling/feature_selection/variance_threshold.py +76 -106
- snowflake/ml/modeling/framework/base.py +2 -2
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +96 -124
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +96 -124
- snowflake/ml/modeling/impute/iterative_imputer.py +94 -124
- snowflake/ml/modeling/impute/knn_imputer.py +94 -124
- snowflake/ml/modeling/impute/missing_indicator.py +94 -124
- snowflake/ml/modeling/impute/simple_imputer.py +1 -1
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +77 -107
- snowflake/ml/modeling/kernel_approximation/nystroem.py +94 -124
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +94 -124
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +86 -116
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +84 -114
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +96 -124
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +71 -100
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +71 -100
- snowflake/ml/modeling/linear_model/ard_regression.py +96 -124
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +96 -124
- snowflake/ml/modeling/linear_model/elastic_net.py +96 -124
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +96 -124
- snowflake/ml/modeling/linear_model/gamma_regressor.py +96 -124
- snowflake/ml/modeling/linear_model/huber_regressor.py +96 -124
- snowflake/ml/modeling/linear_model/lars.py +96 -124
- snowflake/ml/modeling/linear_model/lars_cv.py +96 -124
- snowflake/ml/modeling/linear_model/lasso.py +96 -124
- snowflake/ml/modeling/linear_model/lasso_cv.py +96 -124
- snowflake/ml/modeling/linear_model/lasso_lars.py +96 -124
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +96 -124
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +96 -124
- snowflake/ml/modeling/linear_model/linear_regression.py +91 -119
- snowflake/ml/modeling/linear_model/logistic_regression.py +96 -124
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +96 -124
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +96 -124
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +96 -124
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +96 -124
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +96 -124
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +96 -124
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +96 -124
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +95 -124
- snowflake/ml/modeling/linear_model/perceptron.py +95 -124
- snowflake/ml/modeling/linear_model/poisson_regressor.py +96 -124
- snowflake/ml/modeling/linear_model/ransac_regressor.py +96 -124
- snowflake/ml/modeling/linear_model/ridge.py +96 -124
- snowflake/ml/modeling/linear_model/ridge_classifier.py +96 -124
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +96 -124
- snowflake/ml/modeling/linear_model/ridge_cv.py +96 -124
- snowflake/ml/modeling/linear_model/sgd_classifier.py +96 -124
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +94 -124
- snowflake/ml/modeling/linear_model/sgd_regressor.py +96 -124
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +96 -124
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +96 -124
- snowflake/ml/modeling/manifold/isomap.py +94 -124
- snowflake/ml/modeling/manifold/mds.py +94 -124
- snowflake/ml/modeling/manifold/spectral_embedding.py +94 -124
- snowflake/ml/modeling/manifold/tsne.py +94 -124
- snowflake/ml/modeling/metrics/classification.py +187 -52
- snowflake/ml/modeling/metrics/correlation.py +4 -2
- snowflake/ml/modeling/metrics/covariance.py +7 -4
- snowflake/ml/modeling/metrics/ranking.py +32 -16
- snowflake/ml/modeling/metrics/regression.py +60 -32
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +94 -124
- snowflake/ml/modeling/mixture/gaussian_mixture.py +94 -124
- snowflake/ml/modeling/model_selection/grid_search_cv.py +88 -138
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +90 -144
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +86 -114
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +93 -121
- snowflake/ml/modeling/multiclass/output_code_classifier.py +94 -122
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +92 -120
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +96 -124
- snowflake/ml/modeling/naive_bayes/complement_nb.py +92 -120
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +79 -107
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +88 -116
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +96 -124
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +96 -124
- snowflake/ml/modeling/neighbors/kernel_density.py +94 -124
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +94 -124
- snowflake/ml/modeling/neighbors/nearest_centroid.py +89 -117
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +94 -124
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +96 -124
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +96 -124
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +96 -124
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +94 -124
- snowflake/ml/modeling/neural_network/mlp_classifier.py +96 -124
- snowflake/ml/modeling/neural_network/mlp_regressor.py +96 -124
- snowflake/ml/modeling/parameters/disable_distributed_hpo.py +2 -6
- snowflake/ml/modeling/preprocessing/binarizer.py +14 -9
- snowflake/ml/modeling/preprocessing/k_bins_discretizer.py +0 -4
- snowflake/ml/modeling/preprocessing/label_encoder.py +21 -13
- snowflake/ml/modeling/preprocessing/max_abs_scaler.py +20 -14
- snowflake/ml/modeling/preprocessing/min_max_scaler.py +35 -19
- snowflake/ml/modeling/preprocessing/normalizer.py +6 -9
- snowflake/ml/modeling/preprocessing/one_hot_encoder.py +20 -13
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +25 -13
- snowflake/ml/modeling/preprocessing/polynomial_features.py +94 -124
- snowflake/ml/modeling/preprocessing/robust_scaler.py +28 -14
- snowflake/ml/modeling/preprocessing/standard_scaler.py +25 -13
- snowflake/ml/modeling/semi_supervised/label_propagation.py +96 -124
- snowflake/ml/modeling/semi_supervised/label_spreading.py +96 -124
- snowflake/ml/modeling/svm/linear_svc.py +96 -124
- snowflake/ml/modeling/svm/linear_svr.py +96 -124
- snowflake/ml/modeling/svm/nu_svc.py +96 -124
- snowflake/ml/modeling/svm/nu_svr.py +96 -124
- snowflake/ml/modeling/svm/svc.py +96 -124
- snowflake/ml/modeling/svm/svr.py +96 -124
- snowflake/ml/modeling/tree/decision_tree_classifier.py +96 -124
- snowflake/ml/modeling/tree/decision_tree_regressor.py +96 -124
- snowflake/ml/modeling/tree/extra_tree_classifier.py +96 -124
- snowflake/ml/modeling/tree/extra_tree_regressor.py +96 -124
- snowflake/ml/modeling/xgboost/xgb_classifier.py +96 -125
- snowflake/ml/modeling/xgboost/xgb_regressor.py +96 -125
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +96 -125
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +96 -125
- snowflake/ml/registry/model_registry.py +2 -0
- snowflake/ml/registry/registry.py +215 -0
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.1.1.dist-info → snowflake_ml_python-1.1.2.dist-info}/METADATA +21 -3
- snowflake_ml_python-1.1.2.dist-info/RECORD +347 -0
- snowflake_ml_python-1.1.1.dist-info/RECORD +0 -331
- {snowflake_ml_python-1.1.1.dist-info → snowflake_ml_python-1.1.2.dist-info}/WHEEL +0 -0
@@ -1,22 +1,38 @@
|
|
1
1
|
spec:
|
2
2
|
container:
|
3
|
-
- name: $container_name
|
4
|
-
image: $base_image
|
3
|
+
- name: "${container_name}"
|
4
|
+
image: "${base_image}"
|
5
5
|
command:
|
6
6
|
- sh
|
7
7
|
args:
|
8
8
|
- -c
|
9
|
-
-
|
10
|
-
|
11
|
-
|
12
|
-
|
9
|
+
- |
|
10
|
+
wait_for_file() {
|
11
|
+
file_path="$1"
|
12
|
+
timeout="$2"
|
13
|
+
elapsed_time=0
|
14
|
+
while [ ! -f "${file_path}" ]; do
|
15
|
+
if [ "${elapsed_time}" -ge "${timeout}" ]; then
|
16
|
+
echo "Error: ${file_path} not found within ${timeout} seconds. Exiting."
|
17
|
+
exit 1
|
18
|
+
fi
|
19
|
+
elapsed_time=$((elapsed_time + 1))
|
20
|
+
remaining_time=$((timeout - elapsed_time))
|
21
|
+
echo "Awaiting the mounting of ${file_path}. Wait time remaining: ${remaining_time} seconds"
|
22
|
+
sleep 1
|
23
|
+
done
|
24
|
+
}
|
25
|
+
wait_for_file "${script_path}" 300
|
26
|
+
wait_for_file "${mounted_token_path}" 300
|
27
|
+
chmod +x "${script_path}"
|
28
|
+
sh "${script_path}"
|
13
29
|
volumeMounts:
|
14
30
|
- name: vol1
|
15
31
|
mountPath: /local/user/vol1
|
16
32
|
- name: stagemount
|
17
|
-
mountPath: /$stage
|
33
|
+
mountPath: "/${stage}"
|
18
34
|
volume:
|
19
35
|
- name: vol1
|
20
36
|
source: local # only local emptyDir volume is supported
|
21
37
|
- name: stagemount
|
22
|
-
source: "@$stage"
|
38
|
+
source: "@${stage}"
|
@@ -11,18 +11,41 @@ cleanup() {
|
|
11
11
|
kill -- -$$$ # Kill the entire process group. Extra $ to escape, the generated shell script should have two $.
|
12
12
|
}
|
13
13
|
|
14
|
+
# SNOW-990976, This is an additional safety check to ensure token file exists, on top of the token file check upon
|
15
|
+
# launching SPCS job. This additional check could provide value in cases things go wrong with token refresh that result
|
16
|
+
# in token file to disappear.
|
17
|
+
wait_till_token_file_exists() {
|
18
|
+
timeout=60 # 1 minute timeout
|
19
|
+
elapsed_time=0
|
20
|
+
|
21
|
+
while [ ! -f "${SESSION_TOKEN_PATH}" ] && [ "$elapsed_time" -lt "$timeout" ]; do
|
22
|
+
sleep 1
|
23
|
+
elapsed_time=$((elapsed_time + 1))
|
24
|
+
remaining_time=$((timeout - elapsed_time))
|
25
|
+
echo "Waiting for token file to exist. Wait time remaining: ${remaining_time} seconds."
|
26
|
+
done
|
27
|
+
|
28
|
+
if [ ! -f "${SESSION_TOKEN_PATH}" ]; then
|
29
|
+
echo "Error: Token file '${SESSION_TOKEN_PATH}' does not show up within the ${timeout} seconds timeout period."
|
30
|
+
exit 1
|
31
|
+
fi
|
32
|
+
}
|
33
|
+
|
14
34
|
generate_registry_cred() {
|
35
|
+
wait_till_token_file_exists
|
15
36
|
AUTH_TOKEN=$(printf '0auth2accesstoken:%s' "$(cat ${SESSION_TOKEN_PATH})" | base64);
|
16
37
|
echo '{"auths":{"$image_repo":{"auth":"'"$AUTH_TOKEN"'"}}}' | tr -d '\n' > $REGISTRY_CRED_PATH;
|
17
38
|
}
|
18
39
|
|
19
40
|
on_session_token_change() {
|
41
|
+
wait_till_token_file_exists
|
20
42
|
# Get the initial checksum of the file
|
21
43
|
CHECKSUM=$(md5sum "${SESSION_TOKEN_PATH}" | awk '{ print $1 }')
|
22
44
|
# Run the command once before the loop
|
23
45
|
echo "Monitoring session token changes in the background..."
|
24
46
|
(
|
25
47
|
while true; do
|
48
|
+
wait_till_token_file_exists
|
26
49
|
# Get the current checksum of the file
|
27
50
|
CURRENT_CHECKSUM=$(md5sum "${SESSION_TOKEN_PATH}" | awk '{ print $1 }')
|
28
51
|
if [ "${CURRENT_CHECKSUM}" != "${CHECKSUM}" ]; then
|
@@ -10,14 +10,19 @@ from typing import Any, Dict, Generator, Optional, cast
|
|
10
10
|
|
11
11
|
import importlib_resources
|
12
12
|
import yaml
|
13
|
+
from packaging import requirements
|
13
14
|
from typing_extensions import Unpack
|
14
15
|
|
15
|
-
from snowflake.ml._internal import file_utils
|
16
|
+
from snowflake.ml._internal import env_utils, file_utils
|
16
17
|
from snowflake.ml._internal.exceptions import (
|
17
18
|
error_codes,
|
18
19
|
exceptions as snowml_exceptions,
|
19
20
|
)
|
20
|
-
from snowflake.ml._internal.utils import
|
21
|
+
from snowflake.ml._internal.utils import (
|
22
|
+
identifier,
|
23
|
+
query_result_checker,
|
24
|
+
spcs_attribution_utils,
|
25
|
+
)
|
21
26
|
from snowflake.ml.model import type_hints
|
22
27
|
from snowflake.ml.model._deploy_client import snowservice
|
23
28
|
from snowflake.ml.model._deploy_client.image_builds import (
|
@@ -161,6 +166,11 @@ def _deploy(
|
|
161
166
|
# Set conda-forge as backup channel for SPCS deployment
|
162
167
|
if "conda-forge" not in model_meta_deploy.env._conda_dependencies:
|
163
168
|
model_meta_deploy.env._conda_dependencies["conda-forge"] = []
|
169
|
+
# Snowflake connector needs pyarrow to work correctly.
|
170
|
+
env_utils.append_conda_dependency(
|
171
|
+
model_meta_deploy.env._conda_dependencies,
|
172
|
+
(env_utils.DEFAULT_CHANNEL_NAME, requirements.Requirement("pyarrow")),
|
173
|
+
)
|
164
174
|
if options.use_gpu:
|
165
175
|
# Make mypy happy
|
166
176
|
assert options.num_gpus is not None
|
@@ -585,6 +595,8 @@ class SnowServiceDeployment:
|
|
585
595
|
)
|
586
596
|
logger.info(f"Service {self._service_name} is ready. Creating service function...")
|
587
597
|
|
598
|
+
spcs_attribution_utils.record_service_start(self.session, self._service_name)
|
599
|
+
|
588
600
|
service_function_sql = client.create_or_replace_service_function(
|
589
601
|
service_func_name=self.service_func_name,
|
590
602
|
service_name=self._service_name,
|
@@ -173,7 +173,7 @@ def _get_model_final_packages(
|
|
173
173
|
else:
|
174
174
|
required_packages = meta.env._conda_dependencies[env_utils.DEFAULT_CHANNEL_NAME]
|
175
175
|
|
176
|
-
final_packages = env_utils.
|
176
|
+
final_packages = env_utils.validate_requirements_in_information_schema(
|
177
177
|
session, required_packages, python_version=meta.env.python_version
|
178
178
|
)
|
179
179
|
|
@@ -182,7 +182,7 @@ def _get_model_final_packages(
|
|
182
182
|
raise snowml_exceptions.SnowflakeMLException(
|
183
183
|
error_code=error_codes.DEPENDENCY_VERSION_ERROR,
|
184
184
|
original_exception=RuntimeError(
|
185
|
-
"The model's
|
185
|
+
"The model's dependencies are not available in Snowflake Anaconda Channel. "
|
186
186
|
+ relax_version_info_str
|
187
187
|
+ "Required packages are:\n"
|
188
188
|
+ " ".join(map(lambda x: f'"{x}"', required_packages))
|
@@ -3,7 +3,7 @@ import pathlib
|
|
3
3
|
import tempfile
|
4
4
|
import zipfile
|
5
5
|
from types import ModuleType
|
6
|
-
from typing import Dict, List, Optional
|
6
|
+
from typing import Any, Dict, List, Optional
|
7
7
|
|
8
8
|
from absl import logging
|
9
9
|
from packaging import requirements
|
@@ -32,8 +32,15 @@ class ModelComposer:
|
|
32
32
|
"""
|
33
33
|
|
34
34
|
MODEL_FILE_REL_PATH = "model.zip"
|
35
|
+
MODEL_DIR_REL_PATH = "model"
|
35
36
|
|
36
|
-
def __init__(
|
37
|
+
def __init__(
|
38
|
+
self,
|
39
|
+
session: Session,
|
40
|
+
stage_path: str,
|
41
|
+
*,
|
42
|
+
statement_params: Optional[Dict[str, Any]] = None,
|
43
|
+
) -> None:
|
37
44
|
self.session = session
|
38
45
|
self.stage_path = pathlib.PurePosixPath(stage_path)
|
39
46
|
|
@@ -43,6 +50,8 @@ class ModelComposer:
|
|
43
50
|
self.packager = model_packager.ModelPackager(local_dir_path=str(self._packager_workspace_path))
|
44
51
|
self.manifest = model_manifest.ModelManifest(workspace_path=self.workspace_path)
|
45
52
|
|
53
|
+
self._statement_params = statement_params
|
54
|
+
|
46
55
|
def __del__(self) -> None:
|
47
56
|
self._workspace.cleanup()
|
48
57
|
self._packager_workspace.cleanup()
|
@@ -82,13 +91,11 @@ class ModelComposer:
|
|
82
91
|
options = model_types.BaseModelSaveOption()
|
83
92
|
|
84
93
|
if not snowpark_utils.is_in_stored_procedure(): # type: ignore[no-untyped-call]
|
85
|
-
|
86
|
-
|
87
|
-
reqs=[requirements.Requirement(f"snowflake-ml-python=={snowml_env.VERSION}")],
|
88
|
-
python_version=snowml_env.PYTHON_VERSION,
|
94
|
+
snowml_matched_versions = env_utils.get_matched_package_versions_in_snowflake_conda_channel(
|
95
|
+
req=requirements.Requirement(f"snowflake-ml-python=={snowml_env.VERSION}")
|
89
96
|
)
|
90
97
|
|
91
|
-
if
|
98
|
+
if len(snowml_matched_versions) < 1 and options.get("embed_local_ml_library", False) is False:
|
92
99
|
logging.info(
|
93
100
|
f"Local snowflake-ml-python library has version {snowml_env.VERSION},"
|
94
101
|
" which is not available in the Snowflake server, embedding local ML library automatically."
|
@@ -111,6 +118,13 @@ class ModelComposer:
|
|
111
118
|
|
112
119
|
assert self.packager.meta is not None
|
113
120
|
|
121
|
+
if not options.get("_legacy_save", False):
|
122
|
+
# Keep both loose files and zipped file.
|
123
|
+
# TODO(SNOW-726678): Remove once import a directory is possible.
|
124
|
+
file_utils.copytree(
|
125
|
+
str(self._packager_workspace_path), str(self.workspace_path / ModelComposer.MODEL_DIR_REL_PATH)
|
126
|
+
)
|
127
|
+
|
114
128
|
file_utils.make_archive(self.model_local_path, str(self._packager_workspace_path))
|
115
129
|
|
116
130
|
self.manifest.save(
|
@@ -120,7 +134,12 @@ class ModelComposer:
|
|
120
134
|
options=options,
|
121
135
|
)
|
122
136
|
|
123
|
-
file_utils.upload_directory_to_stage(
|
137
|
+
file_utils.upload_directory_to_stage(
|
138
|
+
self.session,
|
139
|
+
local_path=self.workspace_path,
|
140
|
+
stage_path=self.stage_path,
|
141
|
+
statement_params=self._statement_params,
|
142
|
+
)
|
124
143
|
|
125
144
|
def load(
|
126
145
|
self,
|
@@ -129,7 +148,10 @@ class ModelComposer:
|
|
129
148
|
options: Optional[model_types.ModelLoadOption] = None,
|
130
149
|
) -> None:
|
131
150
|
file_utils.download_directory_from_stage(
|
132
|
-
self.session,
|
151
|
+
self.session,
|
152
|
+
stage_path=self.stage_path,
|
153
|
+
local_path=self.workspace_path,
|
154
|
+
statement_params=self._statement_params,
|
133
155
|
)
|
134
156
|
|
135
157
|
# TODO (Server-side Model Rollout): Remove this section.
|
@@ -1,5 +1,6 @@
|
|
1
|
+
import collections
|
1
2
|
import pathlib
|
2
|
-
from typing import List, Optional
|
3
|
+
from typing import List, Optional, cast
|
3
4
|
|
4
5
|
import yaml
|
5
6
|
|
@@ -48,7 +49,6 @@ class ModelManifest:
|
|
48
49
|
]
|
49
50
|
self.function_generator = function_generator.FunctionGenerator(model_file_rel_path=model_file_rel_path)
|
50
51
|
self.methods: List[model_method.ModelMethod] = []
|
51
|
-
_seen_method_names: List[str] = []
|
52
52
|
for target_method in model_meta.signatures.keys():
|
53
53
|
method = model_method.ModelMethod(
|
54
54
|
model_meta=model_meta,
|
@@ -57,17 +57,18 @@ class ModelManifest:
|
|
57
57
|
function_generator=self.function_generator,
|
58
58
|
options=model_method.get_model_method_options_from_options(options, target_method),
|
59
59
|
)
|
60
|
-
if method.method_name in _seen_method_names:
|
61
|
-
raise ValueError(
|
62
|
-
f"Found duplicate method named resolved as {method.method_name} in the model. "
|
63
|
-
"This might because you have methods with same letters but different cases. "
|
64
|
-
"In this case, set case_sensitive as True for those methods to distinguish them"
|
65
|
-
)
|
66
|
-
else:
|
67
|
-
_seen_method_names.append(method.method_name)
|
68
60
|
|
69
61
|
self.methods.append(method)
|
70
62
|
|
63
|
+
method_name_counter = collections.Counter([method.method_name for method in self.methods])
|
64
|
+
dup_method_names = [k for k, v in method_name_counter.items() if v > 1]
|
65
|
+
if dup_method_names:
|
66
|
+
raise ValueError(
|
67
|
+
f"Found duplicate method named resolved as {', '.join(dup_method_names)} in the model. "
|
68
|
+
"This might because you have methods with same letters but different cases. "
|
69
|
+
"In this case, set case_sensitive as True for those methods to distinguish them."
|
70
|
+
)
|
71
|
+
|
71
72
|
manifest_dict = model_manifest_schema.ModelManifestDict(
|
72
73
|
manifest_version=model_manifest_schema.MODEL_MANIFEST_VERSION,
|
73
74
|
runtimes={runtime.name: runtime.save(self.workspace_path) for runtime in self.runtimes},
|
@@ -84,3 +85,17 @@ class ModelManifest:
|
|
84
85
|
|
85
86
|
with (self.workspace_path / ModelManifest.MANIFEST_FILE_REL_PATH).open("w", encoding="utf-8") as f:
|
86
87
|
yaml.safe_dump(manifest_dict, f)
|
88
|
+
|
89
|
+
def load(self) -> model_manifest_schema.ModelManifestDict:
|
90
|
+
with (self.workspace_path / ModelManifest.MANIFEST_FILE_REL_PATH).open("r", encoding="utf-8") as f:
|
91
|
+
raw_input = yaml.safe_load(f)
|
92
|
+
if not isinstance(raw_input, dict):
|
93
|
+
raise ValueError(f"Read ill-formatted model MANIFEST, should be a dict, received {type(raw_input)}")
|
94
|
+
|
95
|
+
original_loaded_manifest_version = raw_input.get("manifest_version", None)
|
96
|
+
if not original_loaded_manifest_version:
|
97
|
+
raise ValueError("Unable to get the version of the MANIFEST file.")
|
98
|
+
|
99
|
+
res = cast(model_manifest_schema.ModelManifestDict, raw_input)
|
100
|
+
|
101
|
+
return res
|
@@ -1,6 +1,6 @@
|
|
1
1
|
# This files contains schema definition of what will be written into MANIFEST.yml
|
2
2
|
|
3
|
-
from typing import Dict, List, Literal, TypedDict
|
3
|
+
from typing import Any, Dict, List, Literal, TypedDict
|
4
4
|
|
5
5
|
from typing_extensions import NotRequired, Required
|
6
6
|
|
@@ -42,4 +42,4 @@ class ModelManifestDict(TypedDict):
|
|
42
42
|
manifest_version: Required[str]
|
43
43
|
runtimes: Required[Dict[str, ModelRuntimeDict]]
|
44
44
|
methods: Required[List[ModelMethodDict]]
|
45
|
-
user_data: NotRequired[Dict[str,
|
45
|
+
user_data: NotRequired[Dict[str, Any]]
|
@@ -73,6 +73,7 @@ dtype_map = {{feature.name: feature.as_dtype() for feature in features}}
|
|
73
73
|
# Actual function
|
74
74
|
@vectorized(input=pd.DataFrame, max_batch_size=MAX_BATCH_SIZE)
|
75
75
|
def {function_name}(df: pd.DataFrame) -> dict:
|
76
|
-
|
76
|
+
df.columns = input_cols
|
77
|
+
input_df = df.astype(dtype=dtype_map)
|
77
78
|
predictions_df = runner(input_df[input_cols])
|
78
79
|
return predictions_df.to_dict("records")
|
@@ -1,13 +1,15 @@
|
|
1
|
+
import collections
|
1
2
|
import pathlib
|
2
3
|
from typing import Optional, TypedDict
|
3
4
|
|
4
5
|
from typing_extensions import NotRequired
|
5
6
|
|
6
7
|
from snowflake.ml._internal.utils import sql_identifier
|
7
|
-
from snowflake.ml.model import type_hints
|
8
|
+
from snowflake.ml.model import model_signature, type_hints
|
8
9
|
from snowflake.ml.model._model_composer.model_manifest import model_manifest_schema
|
9
10
|
from snowflake.ml.model._model_composer.model_method import function_generator
|
10
11
|
from snowflake.ml.model._packager.model_meta import model_meta as model_meta_api
|
12
|
+
from snowflake.snowpark._internal import type_utils
|
11
13
|
|
12
14
|
|
13
15
|
class ModelMethodOptions(TypedDict):
|
@@ -69,6 +71,22 @@ class ModelMethod:
|
|
69
71
|
if self.target_method not in self.model_meta.signatures.keys():
|
70
72
|
raise ValueError(f"Target method {self.target_method} is not available in the signatures of the model.")
|
71
73
|
|
74
|
+
@staticmethod
|
75
|
+
def _get_method_arg_from_feature(
|
76
|
+
feature: model_signature.BaseFeatureSpec, case_sensitive: bool = False
|
77
|
+
) -> model_manifest_schema.ModelMethodSignatureFieldWithName:
|
78
|
+
assert isinstance(feature, model_signature.FeatureSpec), "FeatureGroupSpec is not supported."
|
79
|
+
try:
|
80
|
+
feature_name = sql_identifier.SqlIdentifier(feature.name, case_sensitive=case_sensitive)
|
81
|
+
except ValueError as e:
|
82
|
+
raise ValueError(
|
83
|
+
f"Your feature {feature.name} cannot be resolved as valid SQL identifier. "
|
84
|
+
"Try specify `case_sensitive` as True."
|
85
|
+
) from e
|
86
|
+
return model_manifest_schema.ModelMethodSignatureFieldWithName(
|
87
|
+
name=feature_name.resolved(), type=type_utils.convert_sp_to_sf_type(feature.as_snowpark_type())
|
88
|
+
)
|
89
|
+
|
72
90
|
def save(
|
73
91
|
self, workspace_path: pathlib.Path, options: Optional[function_generator.FunctionGenerateOptions] = None
|
74
92
|
) -> model_manifest_schema.ModelMethodDict:
|
@@ -78,13 +96,26 @@ class ModelMethod:
|
|
78
96
|
self.target_method,
|
79
97
|
options=options,
|
80
98
|
)
|
99
|
+
input_list = [
|
100
|
+
ModelMethod._get_method_arg_from_feature(ft, case_sensitive=self.options.get("case_sensitive", False))
|
101
|
+
for ft in self.model_meta.signatures[self.target_method].inputs
|
102
|
+
]
|
103
|
+
input_name_counter = collections.Counter([input_info["name"] for input_info in input_list])
|
104
|
+
dup_input_names = [k for k, v in input_name_counter.items() if v > 1]
|
105
|
+
if dup_input_names:
|
106
|
+
raise ValueError(
|
107
|
+
f"Found duplicate input feature named resolved as {', '.join(dup_input_names)} in the method"
|
108
|
+
f" {self.target_method} This might because you have methods with same letters but different cases. "
|
109
|
+
"In this case, set case_sensitive as True for those methods to distinguish them."
|
110
|
+
)
|
111
|
+
|
81
112
|
return model_manifest_schema.ModelFunctionMethodDict(
|
82
|
-
name=self.method_name.
|
113
|
+
name=self.method_name.resolved(),
|
83
114
|
runtime=self.runtime_name,
|
84
115
|
type="FUNCTION",
|
85
116
|
handler=".".join(
|
86
117
|
[ModelMethod.FUNCTIONS_DIR_REL_PATH, self.target_method, self.function_generator.FUNCTION_NAME]
|
87
118
|
),
|
88
|
-
inputs=
|
119
|
+
inputs=input_list,
|
89
120
|
outputs=[model_manifest_schema.ModelMethodSignatureField(type="OBJECT")],
|
90
121
|
)
|
@@ -44,7 +44,7 @@ class ModelRuntime:
|
|
44
44
|
if self.runtime_env._snowpark_ml_version.local:
|
45
45
|
self.embed_local_ml_library = True
|
46
46
|
else:
|
47
|
-
snowml_server_availability = env_utils.
|
47
|
+
snowml_server_availability = env_utils.validate_requirements_in_information_schema(
|
48
48
|
session=session,
|
49
49
|
reqs=[requirements.Requirement(snowml_pkg_spec)],
|
50
50
|
python_version=snowml_env.PYTHON_VERSION,
|
@@ -59,7 +59,7 @@ def get_requirements_from_task(task: str, spcs_only: bool = False) -> List[model
|
|
59
59
|
return (
|
60
60
|
[model_env.ModelDependency(requirement="tokenizers>=0.13.3", pip_name="tokenizers")]
|
61
61
|
if spcs_only
|
62
|
-
else [model_env.ModelDependency(requirement="tokenizers", pip_name="tokenizers")]
|
62
|
+
else [model_env.ModelDependency(requirement="tokenizers<=0.13.2", pip_name="tokenizers")]
|
63
63
|
)
|
64
64
|
|
65
65
|
return []
|
@@ -170,6 +170,7 @@ class HuggingFacePipelineHandler(
|
|
170
170
|
" `snowflake.ml.model.models.huggingface_pipeline.HuggingFacePipelineModel` object. "
|
171
171
|
"Please make sure you are providing correct model signatures.",
|
172
172
|
UserWarning,
|
173
|
+
stacklevel=2,
|
173
174
|
)
|
174
175
|
else:
|
175
176
|
handlers_utils.validate_target_methods(model, target_methods)
|
@@ -179,6 +180,7 @@ class HuggingFacePipelineHandler(
|
|
179
180
|
+ "Model signature will automatically be inferred from pipeline task. "
|
180
181
|
+ "Or, you could specify model signature manually.",
|
181
182
|
UserWarning,
|
183
|
+
stacklevel=2,
|
182
184
|
)
|
183
185
|
if inferred_pipe_sig is None:
|
184
186
|
raise NotImplementedError(f"Cannot auto infer the signature of pipeline for task {task}")
|
@@ -1,4 +1,5 @@
|
|
1
1
|
import os
|
2
|
+
import warnings
|
2
3
|
from typing import TYPE_CHECKING, Callable, Dict, Optional, Type, cast, final
|
3
4
|
|
4
5
|
import cloudpickle
|
@@ -9,7 +10,7 @@ from typing_extensions import TypeGuard, Unpack
|
|
9
10
|
from snowflake.ml._internal import type_utils
|
10
11
|
from snowflake.ml.model import custom_model, model_signature, type_hints as model_types
|
11
12
|
from snowflake.ml.model._packager.model_env import model_env
|
12
|
-
from snowflake.ml.model._packager.model_handlers import _base
|
13
|
+
from snowflake.ml.model._packager.model_handlers import _base
|
13
14
|
from snowflake.ml.model._packager.model_handlers_migrator import base_migrator
|
14
15
|
from snowflake.ml.model._packager.model_meta import (
|
15
16
|
model_blob_meta,
|
@@ -78,34 +79,15 @@ class SnowMLModelHandler(_base.BaseModelHandler["BaseEstimator"]):
|
|
78
79
|
# Pipeline is inherited from BaseEstimator, so no need to add one more check
|
79
80
|
|
80
81
|
if not is_sub_model:
|
81
|
-
if
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
target_methods=kwargs.pop("target_methods", None),
|
88
|
-
default_target_methods=cls.DEFAULT_TARGET_METHODS,
|
89
|
-
)
|
90
|
-
|
91
|
-
def get_prediction(
|
92
|
-
target_method_name: str, sample_input: model_types.SupportedLocalDataType
|
93
|
-
) -> model_types.SupportedLocalDataType:
|
94
|
-
if not isinstance(sample_input, (pd.DataFrame,)):
|
95
|
-
sample_input = model_signature._convert_local_data_to_df(sample_input)
|
96
|
-
|
97
|
-
target_method = getattr(model, target_method_name, None)
|
98
|
-
assert callable(target_method)
|
99
|
-
predictions_df = target_method(sample_input)
|
100
|
-
return predictions_df
|
101
|
-
|
102
|
-
model_meta = handlers_utils.validate_signature(
|
103
|
-
model=model,
|
104
|
-
model_meta=model_meta,
|
105
|
-
target_methods=target_methods,
|
106
|
-
sample_input=sample_input,
|
107
|
-
get_prediction_fn=get_prediction,
|
82
|
+
if sample_input is not None or model_meta.signatures:
|
83
|
+
warnings.warn(
|
84
|
+
"Inferring model signature from sample input or providing model signature for Snowpark ML "
|
85
|
+
+ "Modeling model is not required. Model signature will automatically be inferred during fitting. ",
|
86
|
+
UserWarning,
|
87
|
+
stacklevel=2,
|
108
88
|
)
|
89
|
+
assert hasattr(model, "model_signatures"), "Model does not have model signatures as expected."
|
90
|
+
model_meta.signatures = getattr(model, "model_signatures", {})
|
109
91
|
|
110
92
|
model_blob_path = os.path.join(model_blobs_dir_path, name)
|
111
93
|
os.makedirs(model_blob_path, exist_ok=True)
|
@@ -72,20 +72,22 @@ def create_model_metadata(
|
|
72
72
|
"""
|
73
73
|
model_dir_path = os.path.normpath(model_dir_path)
|
74
74
|
embed_local_ml_library = kwargs.pop("embed_local_ml_library", False)
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
75
|
+
legacy_save = kwargs.pop("_legacy_save", False)
|
76
|
+
if embed_local_ml_library:
|
77
|
+
# Use the last one which is loaded first, that is mean, it is loaded from site-packages.
|
78
|
+
# We could make sure that user does not overwrite our library with their code follow the same naming.
|
79
|
+
snowml_path, snowml_start_path = file_utils.get_package_path(_SNOWFLAKE_ML_PKG_NAME, strategy="last")
|
80
|
+
if os.path.isdir(snowml_start_path):
|
81
|
+
path_to_copy = snowml_path
|
82
|
+
# If the package is zip-imported, then the path will be `../path_to_zip.zip/snowflake/ml`
|
83
|
+
# It is not a valid path in fact and we need to get the path to the zip file to verify it.
|
84
|
+
elif os.path.isfile(snowml_start_path):
|
85
|
+
extract_root = tempfile.mkdtemp()
|
86
|
+
with zipfile.ZipFile(os.path.abspath(snowml_start_path), mode="r", compression=zipfile.ZIP_DEFLATED) as zf:
|
87
|
+
zf.extractall(path=extract_root)
|
88
|
+
path_to_copy = os.path.join(extract_root, *(_SNOWFLAKE_ML_PKG_NAME.split(".")))
|
89
|
+
else:
|
90
|
+
raise ValueError("`snowflake.ml` is imported via a way that embedding local ML library is not supported.")
|
89
91
|
|
90
92
|
env = _create_env_for_model_metadata(
|
91
93
|
conda_dependencies=conda_dependencies,
|
@@ -106,10 +108,10 @@ def create_model_metadata(
|
|
106
108
|
)
|
107
109
|
|
108
110
|
code_dir_path = os.path.join(model_dir_path, MODEL_CODE_DIR)
|
109
|
-
if embed_local_ml_library or code_paths:
|
111
|
+
if (embed_local_ml_library and legacy_save) or code_paths:
|
110
112
|
os.makedirs(code_dir_path, exist_ok=True)
|
111
113
|
|
112
|
-
if embed_local_ml_library:
|
114
|
+
if embed_local_ml_library and legacy_save:
|
113
115
|
snowml_path_in_code = os.path.join(code_dir_path, _SNOWFLAKE_PKG_NAME)
|
114
116
|
os.makedirs(snowml_path_in_code, exist_ok=True)
|
115
117
|
file_utils.copy_file_or_tree(path_to_copy, snowml_path_in_code)
|
@@ -51,7 +51,7 @@ class SnowparkDataFrameHandler(base_handler.BaseDataHandler[snowflake.snowpark.D
|
|
51
51
|
data: snowflake.snowpark.DataFrame, role: Literal["input", "output"]
|
52
52
|
) -> Sequence[core.BaseFeatureSpec]:
|
53
53
|
return pandas_handler.PandasDataFrameHandler.infer_signature(
|
54
|
-
SnowparkDataFrameHandler.convert_to_df(data), role=role
|
54
|
+
SnowparkDataFrameHandler.convert_to_df(data.limit(n=1)), role=role
|
55
55
|
)
|
56
56
|
|
57
57
|
@staticmethod
|