snowflake-ml-python 1.4.0__py3-none-any.whl → 1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +77 -32
- snowflake/ml/_internal/exceptions/dataset_error_messages.py +5 -0
- snowflake/ml/_internal/exceptions/dataset_errors.py +24 -0
- snowflake/ml/_internal/exceptions/error_codes.py +3 -0
- snowflake/ml/_internal/lineage/data_source.py +10 -0
- snowflake/ml/_internal/lineage/dataset_dataframe.py +44 -0
- snowflake/ml/_internal/utils/identifier.py +3 -1
- snowflake/ml/_internal/utils/sql_identifier.py +2 -6
- snowflake/ml/dataset/__init__.py +10 -0
- snowflake/ml/dataset/dataset.py +454 -129
- snowflake/ml/dataset/dataset_factory.py +53 -0
- snowflake/ml/dataset/dataset_metadata.py +103 -0
- snowflake/ml/dataset/dataset_reader.py +202 -0
- snowflake/ml/feature_store/feature_store.py +531 -332
- snowflake/ml/feature_store/feature_view.py +40 -23
- snowflake/ml/fileset/embedded_stage_fs.py +146 -0
- snowflake/ml/fileset/sfcfs.py +56 -54
- snowflake/ml/fileset/snowfs.py +159 -0
- snowflake/ml/fileset/stage_fs.py +49 -17
- snowflake/ml/model/__init__.py +2 -2
- snowflake/ml/model/_api.py +16 -1
- snowflake/ml/model/_client/model/model_impl.py +27 -0
- snowflake/ml/model/_client/model/model_version_impl.py +137 -50
- snowflake/ml/model/_client/ops/model_ops.py +159 -40
- snowflake/ml/model/_client/sql/model.py +25 -2
- snowflake/ml/model/_client/sql/model_version.py +131 -2
- snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +5 -1
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +1 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +2 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +0 -5
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +21 -50
- snowflake/ml/model/_model_composer/model_composer.py +22 -1
- snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +38 -51
- snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +19 -1
- snowflake/ml/model/_model_composer/model_method/model_method.py +6 -10
- snowflake/ml/model/_packager/model_env/model_env.py +41 -0
- snowflake/ml/model/_packager/model_handlers/catboost.py +206 -0
- snowflake/ml/model/_packager/model_handlers/lightgbm.py +218 -0
- snowflake/ml/model/_packager/model_handlers/sklearn.py +3 -0
- snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
- snowflake/ml/model/_packager/model_meta/model_meta.py +37 -11
- snowflake/ml/model/_packager/model_meta/model_meta_schema.py +20 -1
- snowflake/ml/model/_packager/model_meta_migrator/migrator_plans.py +3 -1
- snowflake/ml/model/_packager/model_packager.py +2 -5
- snowflake/ml/model/{_model_composer/model_runtime/_runtime_requirements.py → _packager/model_runtime/_snowml_inference_alternative_requirements.py} +1 -1
- snowflake/ml/model/_packager/model_runtime/model_runtime.py +137 -0
- snowflake/ml/model/type_hints.py +21 -2
- snowflake/ml/modeling/_internal/estimator_utils.py +16 -11
- snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +4 -1
- snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py +55 -3
- snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_handlers.py +34 -18
- snowflake/ml/modeling/_internal/model_trainer.py +7 -0
- snowflake/ml/modeling/_internal/model_trainer_builder.py +42 -9
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +13 -14
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +29 -7
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +261 -16
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +246 -175
- snowflake/ml/modeling/cluster/affinity_propagation.py +246 -175
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +246 -175
- snowflake/ml/modeling/cluster/birch.py +248 -175
- snowflake/ml/modeling/cluster/bisecting_k_means.py +248 -175
- snowflake/ml/modeling/cluster/dbscan.py +246 -175
- snowflake/ml/modeling/cluster/feature_agglomeration.py +248 -175
- snowflake/ml/modeling/cluster/k_means.py +248 -175
- snowflake/ml/modeling/cluster/mean_shift.py +246 -175
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +248 -175
- snowflake/ml/modeling/cluster/optics.py +246 -175
- snowflake/ml/modeling/cluster/spectral_biclustering.py +246 -175
- snowflake/ml/modeling/cluster/spectral_clustering.py +246 -175
- snowflake/ml/modeling/cluster/spectral_coclustering.py +246 -175
- snowflake/ml/modeling/compose/column_transformer.py +248 -175
- snowflake/ml/modeling/compose/transformed_target_regressor.py +246 -175
- snowflake/ml/modeling/covariance/elliptic_envelope.py +246 -175
- snowflake/ml/modeling/covariance/empirical_covariance.py +246 -175
- snowflake/ml/modeling/covariance/graphical_lasso.py +246 -175
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +246 -175
- snowflake/ml/modeling/covariance/ledoit_wolf.py +246 -175
- snowflake/ml/modeling/covariance/min_cov_det.py +246 -175
- snowflake/ml/modeling/covariance/oas.py +246 -175
- snowflake/ml/modeling/covariance/shrunk_covariance.py +246 -175
- snowflake/ml/modeling/decomposition/dictionary_learning.py +248 -175
- snowflake/ml/modeling/decomposition/factor_analysis.py +248 -175
- snowflake/ml/modeling/decomposition/fast_ica.py +248 -175
- snowflake/ml/modeling/decomposition/incremental_pca.py +248 -175
- snowflake/ml/modeling/decomposition/kernel_pca.py +248 -175
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +248 -175
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +248 -175
- snowflake/ml/modeling/decomposition/pca.py +248 -175
- snowflake/ml/modeling/decomposition/sparse_pca.py +248 -175
- snowflake/ml/modeling/decomposition/truncated_svd.py +248 -175
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +248 -175
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +246 -175
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/bagging_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/bagging_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/isolation_forest.py +246 -175
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/stacking_regressor.py +248 -175
- snowflake/ml/modeling/ensemble/voting_classifier.py +248 -175
- snowflake/ml/modeling/ensemble/voting_regressor.py +248 -175
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +248 -175
- snowflake/ml/modeling/feature_selection/select_fdr.py +248 -175
- snowflake/ml/modeling/feature_selection/select_fpr.py +248 -175
- snowflake/ml/modeling/feature_selection/select_fwe.py +248 -175
- snowflake/ml/modeling/feature_selection/select_k_best.py +248 -175
- snowflake/ml/modeling/feature_selection/select_percentile.py +248 -175
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +248 -175
- snowflake/ml/modeling/feature_selection/variance_threshold.py +248 -175
- snowflake/ml/modeling/framework/_utils.py +8 -1
- snowflake/ml/modeling/framework/base.py +72 -37
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +246 -175
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +246 -175
- snowflake/ml/modeling/impute/iterative_imputer.py +248 -175
- snowflake/ml/modeling/impute/knn_imputer.py +248 -175
- snowflake/ml/modeling/impute/missing_indicator.py +248 -175
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +248 -175
- snowflake/ml/modeling/kernel_approximation/nystroem.py +248 -175
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +248 -175
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +248 -175
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +248 -175
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +246 -175
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +246 -175
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/ard_regression.py +246 -175
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +246 -175
- snowflake/ml/modeling/linear_model/elastic_net.py +246 -175
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +246 -175
- snowflake/ml/modeling/linear_model/gamma_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/huber_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/lars.py +246 -175
- snowflake/ml/modeling/linear_model/lars_cv.py +246 -175
- snowflake/ml/modeling/linear_model/lasso.py +246 -175
- snowflake/ml/modeling/linear_model/lasso_cv.py +246 -175
- snowflake/ml/modeling/linear_model/lasso_lars.py +246 -175
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +246 -175
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +246 -175
- snowflake/ml/modeling/linear_model/linear_regression.py +246 -175
- snowflake/ml/modeling/linear_model/logistic_regression.py +246 -175
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +246 -175
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +246 -175
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +246 -175
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +246 -175
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +246 -175
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +246 -175
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +246 -175
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/perceptron.py +246 -175
- snowflake/ml/modeling/linear_model/poisson_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/ransac_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/ridge.py +246 -175
- snowflake/ml/modeling/linear_model/ridge_classifier.py +246 -175
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +246 -175
- snowflake/ml/modeling/linear_model/ridge_cv.py +246 -175
- snowflake/ml/modeling/linear_model/sgd_classifier.py +246 -175
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +246 -175
- snowflake/ml/modeling/linear_model/sgd_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +246 -175
- snowflake/ml/modeling/manifold/isomap.py +248 -175
- snowflake/ml/modeling/manifold/mds.py +248 -175
- snowflake/ml/modeling/manifold/spectral_embedding.py +248 -175
- snowflake/ml/modeling/manifold/tsne.py +248 -175
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +246 -175
- snowflake/ml/modeling/mixture/gaussian_mixture.py +246 -175
- snowflake/ml/modeling/model_selection/grid_search_cv.py +63 -41
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +80 -38
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +246 -175
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +246 -175
- snowflake/ml/modeling/multiclass/output_code_classifier.py +246 -175
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +246 -175
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +246 -175
- snowflake/ml/modeling/naive_bayes/complement_nb.py +246 -175
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +246 -175
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +246 -175
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +246 -175
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +246 -175
- snowflake/ml/modeling/neighbors/kernel_density.py +246 -175
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +246 -175
- snowflake/ml/modeling/neighbors/nearest_centroid.py +246 -175
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +246 -175
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +248 -175
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +246 -175
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +246 -175
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +248 -175
- snowflake/ml/modeling/neural_network/mlp_classifier.py +246 -175
- snowflake/ml/modeling/neural_network/mlp_regressor.py +246 -175
- snowflake/ml/modeling/pipeline/pipeline.py +517 -35
- snowflake/ml/modeling/preprocessing/binarizer.py +1 -5
- snowflake/ml/modeling/preprocessing/k_bins_discretizer.py +1 -5
- snowflake/ml/modeling/preprocessing/label_encoder.py +1 -5
- snowflake/ml/modeling/preprocessing/max_abs_scaler.py +1 -5
- snowflake/ml/modeling/preprocessing/min_max_scaler.py +10 -12
- snowflake/ml/modeling/preprocessing/normalizer.py +1 -5
- snowflake/ml/modeling/preprocessing/one_hot_encoder.py +13 -5
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +1 -5
- snowflake/ml/modeling/preprocessing/polynomial_features.py +248 -175
- snowflake/ml/modeling/preprocessing/robust_scaler.py +1 -5
- snowflake/ml/modeling/preprocessing/standard_scaler.py +11 -11
- snowflake/ml/modeling/semi_supervised/label_propagation.py +246 -175
- snowflake/ml/modeling/semi_supervised/label_spreading.py +246 -175
- snowflake/ml/modeling/svm/linear_svc.py +246 -175
- snowflake/ml/modeling/svm/linear_svr.py +246 -175
- snowflake/ml/modeling/svm/nu_svc.py +246 -175
- snowflake/ml/modeling/svm/nu_svr.py +246 -175
- snowflake/ml/modeling/svm/svc.py +246 -175
- snowflake/ml/modeling/svm/svr.py +246 -175
- snowflake/ml/modeling/tree/decision_tree_classifier.py +246 -175
- snowflake/ml/modeling/tree/decision_tree_regressor.py +246 -175
- snowflake/ml/modeling/tree/extra_tree_classifier.py +246 -175
- snowflake/ml/modeling/tree/extra_tree_regressor.py +246 -175
- snowflake/ml/modeling/xgboost/xgb_classifier.py +246 -175
- snowflake/ml/modeling/xgboost/xgb_regressor.py +246 -175
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +246 -175
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +246 -175
- snowflake/ml/registry/model_registry.py +3 -149
- snowflake/ml/registry/registry.py +1 -1
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/METADATA +129 -57
- snowflake_ml_python-1.5.0.dist-info/RECORD +380 -0
- snowflake/ml/model/_model_composer/model_runtime/model_runtime.py +0 -97
- snowflake/ml/registry/_artifact_manager.py +0 -156
- snowflake/ml/registry/artifact.py +0 -46
- snowflake_ml_python-1.4.0.dist-info/RECORD +0 -370
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/WHEEL +0 -0
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,137 @@
|
|
1
|
+
import copy
|
2
|
+
import pathlib
|
3
|
+
import warnings
|
4
|
+
from typing import List, Literal, Optional
|
5
|
+
|
6
|
+
from packaging import requirements
|
7
|
+
|
8
|
+
from snowflake.ml._internal import env as snowml_env, env_utils, file_utils
|
9
|
+
from snowflake.ml.model._packager.model_env import model_env
|
10
|
+
from snowflake.ml.model._packager.model_meta import model_meta_schema
|
11
|
+
from snowflake.ml.model._packager.model_runtime import (
|
12
|
+
_snowml_inference_alternative_requirements,
|
13
|
+
)
|
14
|
+
|
15
|
+
_SNOWML_INFERENCE_ALTERNATIVE_DEPENDENCIES = [
|
16
|
+
str(env_utils.get_package_spec_with_supported_ops_only(requirements.Requirement(r)))
|
17
|
+
for r in _snowml_inference_alternative_requirements.REQUIREMENTS
|
18
|
+
]
|
19
|
+
|
20
|
+
|
21
|
+
class ModelRuntime:
|
22
|
+
"""Class to represent runtime in a model, which controls the runtime and version, imports and dependencies.
|
23
|
+
|
24
|
+
Attributes:
|
25
|
+
runtime_env: ModelEnv object representing the actual environment when deploying. The environment is based on
|
26
|
+
the environment from the packaged model with additional dependencies required to deploy.
|
27
|
+
imports: List of files to be imported in the created functions. At least packed model should be imported.
|
28
|
+
If the required Snowpark ML library is not available in the server-side, we will automatically pack the
|
29
|
+
local version as well as "snowflake-ml-python.zip" and added into the imports.
|
30
|
+
"""
|
31
|
+
|
32
|
+
RUNTIME_DIR_REL_PATH = "runtimes"
|
33
|
+
|
34
|
+
def __init__(
|
35
|
+
self,
|
36
|
+
name: str,
|
37
|
+
env: model_env.ModelEnv,
|
38
|
+
imports: Optional[List[pathlib.PurePosixPath]] = None,
|
39
|
+
is_gpu: bool = False,
|
40
|
+
server_availability_source: Literal["snowflake", "conda"] = "snowflake",
|
41
|
+
loading_from_file: bool = False,
|
42
|
+
) -> None:
|
43
|
+
self.name = name
|
44
|
+
self.runtime_env = copy.deepcopy(env)
|
45
|
+
self.imports = imports or []
|
46
|
+
|
47
|
+
if loading_from_file:
|
48
|
+
return
|
49
|
+
|
50
|
+
snowml_pkg_spec = f"{env_utils.SNOWPARK_ML_PKG_NAME}=={self.runtime_env.snowpark_ml_version}"
|
51
|
+
if self.runtime_env._snowpark_ml_version.local:
|
52
|
+
self.embed_local_ml_library = True
|
53
|
+
else:
|
54
|
+
if server_availability_source == "snowflake":
|
55
|
+
snowml_server_availability = (
|
56
|
+
len(
|
57
|
+
env_utils.get_matched_package_versions_in_information_schema_with_active_session(
|
58
|
+
reqs=[requirements.Requirement(snowml_pkg_spec)],
|
59
|
+
python_version=snowml_env.PYTHON_VERSION,
|
60
|
+
).get(env_utils.SNOWPARK_ML_PKG_NAME, [])
|
61
|
+
)
|
62
|
+
>= 1
|
63
|
+
)
|
64
|
+
else:
|
65
|
+
snowml_server_availability = (
|
66
|
+
len(
|
67
|
+
env_utils.get_matched_package_versions_in_snowflake_conda_channel(
|
68
|
+
req=requirements.Requirement(snowml_pkg_spec),
|
69
|
+
python_version=snowml_env.PYTHON_VERSION,
|
70
|
+
)
|
71
|
+
)
|
72
|
+
>= 1
|
73
|
+
)
|
74
|
+
self.embed_local_ml_library = not snowml_server_availability
|
75
|
+
|
76
|
+
additional_package = (
|
77
|
+
_SNOWML_INFERENCE_ALTERNATIVE_DEPENDENCIES if self.embed_local_ml_library else [snowml_pkg_spec]
|
78
|
+
)
|
79
|
+
|
80
|
+
self.runtime_env.include_if_absent(
|
81
|
+
[
|
82
|
+
model_env.ModelDependency(requirement=dep, pip_name=requirements.Requirement(dep).name)
|
83
|
+
for dep in additional_package
|
84
|
+
],
|
85
|
+
)
|
86
|
+
|
87
|
+
if is_gpu:
|
88
|
+
self.runtime_env.generate_env_for_cuda()
|
89
|
+
|
90
|
+
@property
|
91
|
+
def runtime_rel_path(self) -> pathlib.PurePosixPath:
|
92
|
+
return pathlib.PurePosixPath(ModelRuntime.RUNTIME_DIR_REL_PATH) / self.name
|
93
|
+
|
94
|
+
def save(self, packager_path: pathlib.Path) -> model_meta_schema.ModelRuntimeDict:
|
95
|
+
runtime_base_path = packager_path / self.runtime_rel_path
|
96
|
+
runtime_base_path.mkdir(parents=True, exist_ok=True)
|
97
|
+
|
98
|
+
if getattr(self, "embed_local_ml_library", False):
|
99
|
+
snowpark_ml_lib_path = runtime_base_path / "snowflake-ml-python.zip"
|
100
|
+
file_utils.zip_python_package(str(snowpark_ml_lib_path), "snowflake.ml")
|
101
|
+
snowpark_ml_lib_rel_path = pathlib.PurePosixPath(snowpark_ml_lib_path.relative_to(packager_path).as_posix())
|
102
|
+
self.imports.append(snowpark_ml_lib_rel_path)
|
103
|
+
|
104
|
+
self.runtime_env.conda_env_rel_path = self.runtime_rel_path / self.runtime_env.conda_env_rel_path
|
105
|
+
self.runtime_env.pip_requirements_rel_path = self.runtime_rel_path / self.runtime_env.pip_requirements_rel_path
|
106
|
+
|
107
|
+
env_dict = self.runtime_env.save_as_dict(packager_path)
|
108
|
+
|
109
|
+
return model_meta_schema.ModelRuntimeDict(
|
110
|
+
imports=list(map(str, self.imports)),
|
111
|
+
dependencies=model_meta_schema.ModelRuntimeDependenciesDict(
|
112
|
+
conda=env_dict["conda"],
|
113
|
+
pip=env_dict["pip"],
|
114
|
+
),
|
115
|
+
)
|
116
|
+
|
117
|
+
@staticmethod
|
118
|
+
def load(
|
119
|
+
packager_path: pathlib.Path,
|
120
|
+
name: str,
|
121
|
+
meta_env: model_env.ModelEnv,
|
122
|
+
loaded_dict: model_meta_schema.ModelRuntimeDict,
|
123
|
+
) -> "ModelRuntime":
|
124
|
+
env = model_env.ModelEnv()
|
125
|
+
env.python_version = meta_env.python_version
|
126
|
+
env.cuda_version = meta_env.cuda_version
|
127
|
+
env.snowpark_ml_version = meta_env.snowpark_ml_version
|
128
|
+
|
129
|
+
conda_env_rel_path = pathlib.PurePosixPath(loaded_dict["dependencies"]["conda"])
|
130
|
+
pip_requirements_rel_path = pathlib.PurePosixPath(loaded_dict["dependencies"]["pip"])
|
131
|
+
with warnings.catch_warnings():
|
132
|
+
warnings.simplefilter("ignore")
|
133
|
+
env.load_from_conda_file(packager_path / conda_env_rel_path)
|
134
|
+
env.load_from_pip_file(packager_path / pip_requirements_rel_path)
|
135
|
+
return ModelRuntime(
|
136
|
+
name=name, env=env, imports=list(map(pathlib.PurePosixPath, loaded_dict["imports"])), loading_from_file=True
|
137
|
+
)
|
snowflake/ml/model/type_hints.py
CHANGED
@@ -19,6 +19,8 @@ from snowflake.ml.model import deploy_platforms
|
|
19
19
|
from snowflake.ml.model._signatures import core
|
20
20
|
|
21
21
|
if TYPE_CHECKING:
|
22
|
+
import catboost
|
23
|
+
import lightgbm
|
22
24
|
import mlflow
|
23
25
|
import numpy as np
|
24
26
|
import pandas as pd
|
@@ -33,7 +35,6 @@ if TYPE_CHECKING:
|
|
33
35
|
import snowflake.ml.model.custom_model
|
34
36
|
import snowflake.ml.model.models.huggingface_pipeline
|
35
37
|
import snowflake.ml.model.models.llm
|
36
|
-
import snowflake.ml.model.models.sentence_transformers
|
37
38
|
import snowflake.snowpark
|
38
39
|
from snowflake.ml.modeling.framework import base # noqa: F401
|
39
40
|
|
@@ -69,6 +70,9 @@ _DataType = TypeVar("_DataType", bound=SupportedDataType)
|
|
69
70
|
CustomModelType = TypeVar("CustomModelType", bound="snowflake.ml.model.custom_model.CustomModel")
|
70
71
|
|
71
72
|
SupportedRequireSignatureModelType = Union[
|
73
|
+
"catboost.CatBoost",
|
74
|
+
"lightgbm.LGBMModel",
|
75
|
+
"lightgbm.Booster",
|
72
76
|
"snowflake.ml.model.custom_model.CustomModel",
|
73
77
|
"sklearn.base.BaseEstimator",
|
74
78
|
"sklearn.pipeline.Pipeline",
|
@@ -85,7 +89,6 @@ SupportedNoSignatureRequirementsModelType = Union[
|
|
85
89
|
"transformers.Pipeline",
|
86
90
|
"sentence_transformers.SentenceTransformer",
|
87
91
|
"snowflake.ml.model.models.huggingface_pipeline.HuggingFacePipelineModel",
|
88
|
-
"snowflake.ml.model.models.sentence_transformers.SentenceTransformer",
|
89
92
|
"snowflake.ml.model.models.llm.LLM",
|
90
93
|
]
|
91
94
|
|
@@ -98,11 +101,14 @@ Here is all acceptable types of Snowflake native model packaging and its handler
|
|
98
101
|
|
99
102
|
| Type | Handler File | Handler |
|
100
103
|
|---------------------------------|--------------|---------------------|
|
104
|
+
| catboost.CatBoost | catboost.py | _CatBoostModelHandler |
|
101
105
|
| snowflake.ml.model.custom_model.CustomModel | custom.py | _CustomModelHandler |
|
102
106
|
| sklearn.base.BaseEstimator | sklearn.py | _SKLModelHandler |
|
103
107
|
| sklearn.pipeline.Pipeline | sklearn.py | _SKLModelHandler |
|
104
108
|
| xgboost.XGBModel | xgboost.py | _XGBModelHandler |
|
105
109
|
| xgboost.Booster | xgboost.py | _XGBModelHandler |
|
110
|
+
| lightgbm.LGBMModel | lightgbm.py | _LGBMModelHandler |
|
111
|
+
| lightgbm.Booster | lightgbm.py | _LGBMModelHandler |
|
106
112
|
| snowflake.ml.framework.base.BaseEstimator | snowmlmodel.py | _SnowMLModelHandler |
|
107
113
|
| torch.nn.Module | pytroch.py | _PyTorchHandler |
|
108
114
|
| torch.jit.ScriptModule | torchscript.py | _TorchScriptHandler |
|
@@ -114,8 +120,10 @@ Here is all acceptable types of Snowflake native model packaging and its handler
|
|
114
120
|
"""
|
115
121
|
|
116
122
|
SupportedModelHandlerType = Literal[
|
123
|
+
"catboost",
|
117
124
|
"custom",
|
118
125
|
"huggingface_pipeline",
|
126
|
+
"lightgbm",
|
119
127
|
"mlflow",
|
120
128
|
"pytorch",
|
121
129
|
"sentence_transformers",
|
@@ -225,6 +233,11 @@ class BaseModelSaveOption(TypedDict):
|
|
225
233
|
method_options: NotRequired[Dict[str, ModelMethodSaveOptions]]
|
226
234
|
|
227
235
|
|
236
|
+
class CatBoostModelSaveOptions(BaseModelSaveOption):
|
237
|
+
target_methods: NotRequired[Sequence[str]]
|
238
|
+
cuda_version: NotRequired[str]
|
239
|
+
|
240
|
+
|
228
241
|
class CustomModelSaveOption(BaseModelSaveOption):
|
229
242
|
cuda_version: NotRequired[str]
|
230
243
|
|
@@ -238,6 +251,10 @@ class XGBModelSaveOptions(BaseModelSaveOption):
|
|
238
251
|
cuda_version: NotRequired[str]
|
239
252
|
|
240
253
|
|
254
|
+
class LGBMModelSaveOptions(BaseModelSaveOption):
|
255
|
+
target_methods: NotRequired[Sequence[str]]
|
256
|
+
|
257
|
+
|
241
258
|
class SNOWModelSaveOptions(BaseModelSaveOption):
|
242
259
|
target_methods: NotRequired[Sequence[str]]
|
243
260
|
|
@@ -279,7 +296,9 @@ class LLMSaveOptions(BaseModelSaveOption):
|
|
279
296
|
|
280
297
|
ModelSaveOption = Union[
|
281
298
|
BaseModelSaveOption,
|
299
|
+
CatBoostModelSaveOptions,
|
282
300
|
CustomModelSaveOption,
|
301
|
+
LGBMModelSaveOptions,
|
283
302
|
SKLModelSaveOptions,
|
284
303
|
XGBModelSaveOptions,
|
285
304
|
SNOWModelSaveOptions,
|
@@ -195,21 +195,26 @@ def handle_inference_result(
|
|
195
195
|
shape = transformed_numpy_array.shape
|
196
196
|
if len(shape) > 1:
|
197
197
|
if shape[1] != len(output_cols):
|
198
|
-
#
|
199
|
-
#
|
200
|
-
# It is hard to predict the response shape without using fragile introspection logic.
|
201
|
-
# So, to avoid that we are packing the results into a dataframe of shape (n_samples, 1) with
|
202
|
-
# each element being a list.
|
203
|
-
if len(output_cols) != 1:
|
204
|
-
raise TypeError(
|
205
|
-
"expected_output_cols must be same length as transformed array or should be of length 1."
|
206
|
-
f"Currently expected_output_cols shape is {len(output_cols)}, "
|
207
|
-
f"transformed array shape is {shape}. "
|
208
|
-
)
|
198
|
+
# Within UDF, it is not feasible to change the output cols because we need to
|
199
|
+
# query the output cols after UDF by the expected output cols
|
209
200
|
if not within_udf:
|
201
|
+
# The following lines are to generate the output cols to match the length of
|
202
|
+
# transformed_numpy_array
|
210
203
|
actual_output_cols = []
|
211
204
|
for i in range(shape[1]):
|
212
205
|
actual_output_cols.append(f"{output_cols[0]}_{i}")
|
213
206
|
output_cols = actual_output_cols
|
207
|
+
else:
|
208
|
+
# HeterogeneousEnsemble's transform method produce results with varying shapes
|
209
|
+
# from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes).
|
210
|
+
# It is hard to predict the response shape without using fragile introspection logic.
|
211
|
+
# So, to avoid that we are packing the results into a dataframe of shape (n_samples, 1) with
|
212
|
+
# each element being a list.
|
213
|
+
if len(output_cols) != 1:
|
214
|
+
raise TypeError(
|
215
|
+
"expected_output_cols must be same length as transformed array or should be of length 1."
|
216
|
+
f"Currently expected_output_cols shape is {len(output_cols)}, "
|
217
|
+
f"transformed array shape is {shape}. "
|
218
|
+
)
|
214
219
|
|
215
220
|
return transformed_numpy_array, output_cols
|
@@ -99,7 +99,10 @@ class PandasTransformHandlers:
|
|
99
99
|
original_exception=ValueError(
|
100
100
|
"The feature names should match with those that were passed during fit.\n"
|
101
101
|
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
102
|
-
f"Features
|
102
|
+
f"Features specified with `input_cols` in estimator "
|
103
|
+
f"{self.estimator.__class__.__name__} in the input dataframe: {input_cols}\n"
|
104
|
+
f"In your input dataset for current method '{inference_method}', the features are:"
|
105
|
+
f" {features_in_dataset}."
|
103
106
|
),
|
104
107
|
)
|
105
108
|
input_df = dataset[columns_to_select]
|
@@ -3,6 +3,8 @@ from typing import List, Optional, Tuple
|
|
3
3
|
|
4
4
|
import pandas as pd
|
5
5
|
|
6
|
+
from snowflake.ml.modeling._internal.estimator_utils import handle_inference_result
|
7
|
+
|
6
8
|
|
7
9
|
class PandasModelTrainer:
|
8
10
|
"""
|
@@ -72,11 +74,61 @@ class PandasModelTrainer:
|
|
72
74
|
Tuple[pd.DataFrame, object]: [predicted dataset, estimator]
|
73
75
|
"""
|
74
76
|
assert hasattr(self.estimator, "fit_predict") # make type checker happy
|
75
|
-
|
76
|
-
result = self.estimator.fit_predict(**args)
|
77
|
+
result = self.estimator.fit_predict(X=self.dataset[self.input_cols])
|
77
78
|
result_df = pd.DataFrame(data=result, columns=expected_output_cols_list)
|
78
79
|
if drop_input_cols:
|
79
80
|
result_df = result_df
|
80
81
|
else:
|
81
|
-
|
82
|
+
# in case the output column name overlap with the input column names,
|
83
|
+
# remove the ones in input column names
|
84
|
+
remove_dataset_col_name_exist_in_output_col = list(
|
85
|
+
set(self.dataset.columns) - set(expected_output_cols_list)
|
86
|
+
)
|
87
|
+
result_df = pd.concat([self.dataset[remove_dataset_col_name_exist_in_output_col], result_df], axis=1)
|
88
|
+
return (result_df, self.estimator)
|
89
|
+
|
90
|
+
def train_fit_transform(
|
91
|
+
self,
|
92
|
+
expected_output_cols_list: List[str],
|
93
|
+
drop_input_cols: Optional[bool] = False,
|
94
|
+
) -> Tuple[pd.DataFrame, object]:
|
95
|
+
"""Trains the model using specified features and target columns from the dataset.
|
96
|
+
This API is different from fit itself because it would also provide the transform
|
97
|
+
output.
|
98
|
+
|
99
|
+
Args:
|
100
|
+
expected_output_cols_list (List[str]): The output columns
|
101
|
+
name as a list. Defaults to None.
|
102
|
+
drop_input_cols (Optional[bool]): Boolean to determine whether to
|
103
|
+
drop the input columns from the output dataset.
|
104
|
+
|
105
|
+
Returns:
|
106
|
+
Tuple[pd.DataFrame, object]: [transformed dataset, estimator]
|
107
|
+
"""
|
108
|
+
assert hasattr(self.estimator, "fit") # make type checker happy
|
109
|
+
assert hasattr(self.estimator, "fit_transform") # make type checker happy
|
110
|
+
|
111
|
+
argspec = inspect.getfullargspec(self.estimator.fit)
|
112
|
+
args = {"X": self.dataset[self.input_cols]}
|
113
|
+
if self.label_cols:
|
114
|
+
label_arg_name = "Y" if "Y" in argspec.args else "y"
|
115
|
+
args[label_arg_name] = self.dataset[self.label_cols].squeeze()
|
116
|
+
|
117
|
+
if self.sample_weight_col is not None and "sample_weight" in argspec.args:
|
118
|
+
args["sample_weight"] = self.dataset[self.sample_weight_col].squeeze()
|
119
|
+
|
120
|
+
inference_res = self.estimator.fit_transform(**args)
|
121
|
+
|
122
|
+
transformed_numpy_array, output_cols = handle_inference_result(
|
123
|
+
inference_res=inference_res, output_cols=expected_output_cols_list, inference_method="fit_transform"
|
124
|
+
)
|
125
|
+
|
126
|
+
result_df = pd.DataFrame(data=transformed_numpy_array, columns=output_cols)
|
127
|
+
if drop_input_cols:
|
128
|
+
result_df = result_df
|
129
|
+
else:
|
130
|
+
# in case the output column name overlap with the input column names,
|
131
|
+
# remove the ones in input column names
|
132
|
+
remove_dataset_col_name_exist_in_output_col = list(set(self.dataset.columns) - set(output_cols))
|
133
|
+
result_df = pd.concat([self.dataset[remove_dataset_col_name_exist_in_output_col], result_df], axis=1)
|
82
134
|
return (result_df, self.estimator)
|
@@ -72,24 +72,40 @@ class MLRuntimeTransformHandlers:
|
|
72
72
|
|
73
73
|
"""
|
74
74
|
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
75
|
+
mlrs_inference_methods = ["predict", "predict_proba", "predict_log_proba"]
|
76
|
+
|
77
|
+
if inference_method in mlrs_inference_methods:
|
78
|
+
result_df = self.client.inference(
|
79
|
+
estimator=self.estimator,
|
80
|
+
dataset=self.dataset,
|
81
|
+
inference_method=inference_method,
|
82
|
+
input_cols=input_cols,
|
83
|
+
output_cols=expected_output_cols,
|
84
|
+
drop_input_cols=drop_input_cols,
|
85
|
+
)
|
86
|
+
|
87
|
+
else:
|
88
|
+
handler = SnowparkTransformHandlers(
|
89
|
+
dataset=self.dataset,
|
90
|
+
estimator=self.estimator,
|
91
|
+
class_name=self._class_name,
|
92
|
+
subproject=self._subproject,
|
93
|
+
autogenerated=self._autogenerated,
|
94
|
+
)
|
95
|
+
result_df = handler.batch_inference(
|
96
|
+
inference_method,
|
97
|
+
input_cols,
|
98
|
+
expected_output_cols,
|
99
|
+
session,
|
100
|
+
dependencies,
|
101
|
+
drop_input_cols,
|
102
|
+
expected_output_cols_type,
|
103
|
+
*args,
|
104
|
+
**kwargs,
|
105
|
+
)
|
106
|
+
|
107
|
+
assert isinstance(result_df, DataFrame) # mypy - The MLRS return types are annotated as `object`.
|
108
|
+
return result_df
|
93
109
|
|
94
110
|
def score(
|
95
111
|
self,
|
@@ -22,3 +22,10 @@ class ModelTrainer(Protocol):
|
|
22
22
|
drop_input_cols: Optional[bool] = False,
|
23
23
|
) -> Tuple[Union[DataFrame, pd.DataFrame], object]:
|
24
24
|
raise NotImplementedError
|
25
|
+
|
26
|
+
def train_fit_transform(
|
27
|
+
self,
|
28
|
+
expected_output_cols_list: List[str],
|
29
|
+
drop_input_cols: Optional[bool] = False,
|
30
|
+
) -> Tuple[Union[DataFrame, pd.DataFrame], object]:
|
31
|
+
raise NotImplementedError
|
@@ -138,21 +138,13 @@ class ModelTrainerBuilder:
|
|
138
138
|
cls,
|
139
139
|
estimator: object,
|
140
140
|
dataset: Union[DataFrame, pd.DataFrame],
|
141
|
-
input_cols:
|
141
|
+
input_cols: List[str],
|
142
142
|
autogenerated: bool = False,
|
143
143
|
subproject: str = "",
|
144
144
|
) -> ModelTrainer:
|
145
145
|
"""
|
146
146
|
Builder method that creates an appropriate ModelTrainer instance based on the given params.
|
147
147
|
"""
|
148
|
-
if input_cols is None:
|
149
|
-
raise exceptions.SnowflakeMLException(
|
150
|
-
error_code=error_codes.NOT_FOUND,
|
151
|
-
original_exception=ValueError(
|
152
|
-
"The input column names (input_cols) is None.\n"
|
153
|
-
"Please put your input_cols when initializing the estimator\n"
|
154
|
-
),
|
155
|
-
)
|
156
148
|
if isinstance(dataset, pd.DataFrame):
|
157
149
|
return PandasModelTrainer(
|
158
150
|
estimator=estimator,
|
@@ -179,3 +171,44 @@ class ModelTrainerBuilder:
|
|
179
171
|
f"Unexpected dataset type: {type(dataset)}."
|
180
172
|
"Supported dataset types: snowpark.DataFrame, pandas.DataFrame."
|
181
173
|
)
|
174
|
+
|
175
|
+
@classmethod
|
176
|
+
def build_fit_transform(
|
177
|
+
cls,
|
178
|
+
estimator: object,
|
179
|
+
dataset: Union[DataFrame, pd.DataFrame],
|
180
|
+
input_cols: List[str],
|
181
|
+
label_cols: Optional[List[str]] = None,
|
182
|
+
sample_weight_col: Optional[str] = None,
|
183
|
+
autogenerated: bool = False,
|
184
|
+
subproject: str = "",
|
185
|
+
) -> ModelTrainer:
|
186
|
+
"""
|
187
|
+
Builder method that creates an appropriate ModelTrainer instance based on the given params.
|
188
|
+
"""
|
189
|
+
if isinstance(dataset, pd.DataFrame):
|
190
|
+
return PandasModelTrainer(
|
191
|
+
estimator=estimator,
|
192
|
+
dataset=dataset,
|
193
|
+
input_cols=input_cols,
|
194
|
+
label_cols=label_cols,
|
195
|
+
sample_weight_col=sample_weight_col,
|
196
|
+
)
|
197
|
+
elif isinstance(dataset, DataFrame):
|
198
|
+
trainer_klass = SnowparkModelTrainer
|
199
|
+
init_args = {
|
200
|
+
"estimator": estimator,
|
201
|
+
"dataset": dataset,
|
202
|
+
"session": dataset._session,
|
203
|
+
"input_cols": input_cols,
|
204
|
+
"label_cols": label_cols,
|
205
|
+
"sample_weight_col": sample_weight_col,
|
206
|
+
"autogenerated": autogenerated,
|
207
|
+
"subproject": subproject,
|
208
|
+
}
|
209
|
+
return trainer_klass(**init_args) # type: ignore[arg-type]
|
210
|
+
else:
|
211
|
+
raise TypeError(
|
212
|
+
f"Unexpected dataset type: {type(dataset)}."
|
213
|
+
"Supported dataset types: snowpark.DataFrame, pandas.DataFrame."
|
214
|
+
)
|
@@ -955,22 +955,21 @@ class DistributedHPOTrainer(SnowparkModelTrainer):
|
|
955
955
|
X, y, indices, params_to_evaluate, base_estimator, fit_and_score_kwargs = _load_data_into_udf()
|
956
956
|
self.X = X
|
957
957
|
self.y = y
|
958
|
-
self.
|
958
|
+
self.test_indices = indices
|
959
959
|
self.params_to_evaluate = params_to_evaluate
|
960
960
|
self.base_estimator = base_estimator
|
961
961
|
self.fit_and_score_kwargs = fit_and_score_kwargs
|
962
962
|
self.fit_score_params: List[Any] = []
|
963
|
+
self.cached_train_test_indices = []
|
964
|
+
# Calculate the full index here to avoid duplicate calculation (which consumes a lot of memory)
|
965
|
+
full_index = np.arange(DATA_LENGTH)
|
966
|
+
for i in range(n_splits):
|
967
|
+
self.cached_train_test_indices.extend(
|
968
|
+
[[np.setdiff1d(full_index, self.test_indices[i]), self.test_indices[i]]]
|
969
|
+
)
|
963
970
|
|
964
971
|
def process(self, idx: int, params_idx: int, cv_idx: int) -> None:
|
965
|
-
|
966
|
-
parameters = self.params_to_evaluate[params_idx]
|
967
|
-
# 2. Calculate the cross validator indices
|
968
|
-
# cross validator's indices: we stored test indices only (to save space);
|
969
|
-
# use the full index to re-construct each train index back.
|
970
|
-
full_index = np.array([i for i in range(DATA_LENGTH)])
|
971
|
-
test_index = self.indices[cv_idx]
|
972
|
-
train_index = np.setdiff1d(full_index, test_index)
|
973
|
-
self.fit_score_params.extend([[idx, (params_idx, parameters), (cv_idx, (train_index, test_index))]])
|
972
|
+
self.fit_score_params.extend([[idx, params_idx, cv_idx]])
|
974
973
|
|
975
974
|
def end_partition(self) -> Iterator[Tuple[int, str]]:
|
976
975
|
from sklearn.base import clone
|
@@ -984,14 +983,14 @@ class DistributedHPOTrainer(SnowparkModelTrainer):
|
|
984
983
|
clone(self.base_estimator),
|
985
984
|
self.X,
|
986
985
|
self.y,
|
987
|
-
train=
|
988
|
-
test=
|
989
|
-
parameters=
|
986
|
+
train=self.cached_train_test_indices[split_idx][0],
|
987
|
+
test=self.cached_train_test_indices[split_idx][1],
|
988
|
+
parameters=self.params_to_evaluate[cand_idx],
|
990
989
|
split_progress=(split_idx, n_splits),
|
991
990
|
candidate_progress=(cand_idx, n_candidates),
|
992
991
|
**self.fit_and_score_kwargs, # load sample weight here
|
993
992
|
)
|
994
|
-
for _,
|
993
|
+
for _, cand_idx, split_idx in self.fit_score_params
|
995
994
|
)
|
996
995
|
|
997
996
|
binary_cv_results = None
|
@@ -9,7 +9,11 @@ import cloudpickle as cp
|
|
9
9
|
import pandas as pd
|
10
10
|
|
11
11
|
from snowflake.ml._internal import telemetry
|
12
|
-
from snowflake.ml._internal.utils import
|
12
|
+
from snowflake.ml._internal.utils import (
|
13
|
+
identifier,
|
14
|
+
pkg_version_utils,
|
15
|
+
snowpark_dataframe_utils,
|
16
|
+
)
|
13
17
|
from snowflake.ml._internal.utils.query_result_checker import SqlResultValidator
|
14
18
|
from snowflake.ml._internal.utils.temp_file_utils import (
|
15
19
|
cleanup_temp_files,
|
@@ -91,6 +95,7 @@ class SnowparkTransformHandlers:
|
|
91
95
|
A new dataset of the same type as the input dataset.
|
92
96
|
"""
|
93
97
|
|
98
|
+
dependencies = self._get_validated_snowpark_dependencies(session, dependencies)
|
94
99
|
dataset = self.dataset
|
95
100
|
estimator = self.estimator
|
96
101
|
# Register vectorized UDF for batch inference
|
@@ -136,7 +141,7 @@ class SnowparkTransformHandlers:
|
|
136
141
|
estimator.n_jobs = 1
|
137
142
|
inference_res = getattr(estimator, inference_method)(input_df, *args, **kwargs)
|
138
143
|
|
139
|
-
transformed_numpy_array,
|
144
|
+
transformed_numpy_array, _ = handle_inference_result(
|
140
145
|
inference_res=inference_res,
|
141
146
|
output_cols=expected_output_cols,
|
142
147
|
inference_method=inference_method,
|
@@ -144,13 +149,13 @@ class SnowparkTransformHandlers:
|
|
144
149
|
)
|
145
150
|
|
146
151
|
if len(transformed_numpy_array.shape) > 1:
|
147
|
-
if transformed_numpy_array.shape[1] != len(
|
152
|
+
if transformed_numpy_array.shape[1] != len(expected_output_cols):
|
148
153
|
series = pd.Series(transformed_numpy_array.tolist())
|
149
|
-
transformed_pandas_df = pd.DataFrame(series, columns=
|
154
|
+
transformed_pandas_df = pd.DataFrame(series, columns=expected_output_cols)
|
150
155
|
else:
|
151
|
-
transformed_pandas_df = pd.DataFrame(transformed_numpy_array.tolist(), columns=
|
156
|
+
transformed_pandas_df = pd.DataFrame(transformed_numpy_array.tolist(), columns=expected_output_cols)
|
152
157
|
else:
|
153
|
-
transformed_pandas_df = pd.DataFrame(transformed_numpy_array, columns=
|
158
|
+
transformed_pandas_df = pd.DataFrame(transformed_numpy_array, columns=expected_output_cols)
|
154
159
|
|
155
160
|
return transformed_pandas_df.to_dict("records") # type: ignore[no-any-return]
|
156
161
|
|
@@ -210,7 +215,8 @@ class SnowparkTransformHandlers:
|
|
210
215
|
Returns:
|
211
216
|
An accuracy score for the model on the given test data.
|
212
217
|
"""
|
213
|
-
|
218
|
+
dependencies = self._get_validated_snowpark_dependencies(session, dependencies)
|
219
|
+
dependencies.append("snowflake-snowpark-python")
|
214
220
|
dataset = self.dataset
|
215
221
|
estimator = self.estimator
|
216
222
|
dataset = snowpark_dataframe_utils.cast_snowpark_dataframe_column_types(dataset)
|
@@ -335,3 +341,19 @@ class SnowparkTransformHandlers:
|
|
335
341
|
cleanup_temp_files([local_score_file_name])
|
336
342
|
|
337
343
|
return score
|
344
|
+
|
345
|
+
def _get_validated_snowpark_dependencies(self, session: Session, dependencies: List[str]) -> List[str]:
|
346
|
+
"""A helper function to validate dependencies and return the available packages that exists
|
347
|
+
in the snowflake anaconda channel
|
348
|
+
|
349
|
+
Args:
|
350
|
+
session: the active snowpark Session
|
351
|
+
dependencies: unvalidated dependencies
|
352
|
+
|
353
|
+
Returns:
|
354
|
+
A list of packages present in the snoflake conda channel.
|
355
|
+
"""
|
356
|
+
|
357
|
+
return pkg_version_utils.get_valid_pkg_versions_supported_in_snowflake_conda_channel(
|
358
|
+
pkg_versions=dependencies, session=session, subproject=self._subproject
|
359
|
+
)
|