snowflake-ml-python 1.4.1__py3-none-any.whl → 1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +66 -31
- snowflake/ml/_internal/exceptions/dataset_error_messages.py +5 -0
- snowflake/ml/_internal/exceptions/dataset_errors.py +24 -0
- snowflake/ml/_internal/exceptions/error_codes.py +3 -0
- snowflake/ml/_internal/lineage/data_source.py +10 -0
- snowflake/ml/_internal/lineage/dataset_dataframe.py +44 -0
- snowflake/ml/dataset/__init__.py +10 -0
- snowflake/ml/dataset/dataset.py +454 -129
- snowflake/ml/dataset/dataset_factory.py +53 -0
- snowflake/ml/dataset/dataset_metadata.py +103 -0
- snowflake/ml/dataset/dataset_reader.py +202 -0
- snowflake/ml/feature_store/feature_store.py +408 -282
- snowflake/ml/feature_store/feature_view.py +37 -8
- snowflake/ml/fileset/embedded_stage_fs.py +146 -0
- snowflake/ml/fileset/sfcfs.py +0 -4
- snowflake/ml/fileset/snowfs.py +159 -0
- snowflake/ml/fileset/stage_fs.py +1 -4
- snowflake/ml/model/__init__.py +2 -2
- snowflake/ml/model/_api.py +16 -1
- snowflake/ml/model/_client/model/model_impl.py +27 -0
- snowflake/ml/model/_client/model/model_version_impl.py +135 -0
- snowflake/ml/model/_client/ops/model_ops.py +137 -67
- snowflake/ml/model/_client/sql/model.py +16 -14
- snowflake/ml/model/_client/sql/model_version.py +109 -1
- snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +5 -1
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +1 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +2 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +0 -5
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +21 -50
- snowflake/ml/model/_model_composer/model_composer.py +22 -1
- snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +22 -0
- snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +11 -0
- snowflake/ml/model/_packager/model_env/model_env.py +41 -0
- snowflake/ml/model/_packager/model_meta/model_meta.py +1 -5
- snowflake/ml/model/_packager/model_packager.py +0 -3
- snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py +55 -3
- snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_handlers.py +34 -18
- snowflake/ml/modeling/_internal/model_trainer.py +7 -0
- snowflake/ml/modeling/_internal/model_trainer_builder.py +42 -9
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +24 -2
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +261 -16
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -52
- snowflake/ml/modeling/cluster/affinity_propagation.py +51 -52
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -52
- snowflake/ml/modeling/cluster/birch.py +53 -52
- snowflake/ml/modeling/cluster/bisecting_k_means.py +53 -52
- snowflake/ml/modeling/cluster/dbscan.py +51 -52
- snowflake/ml/modeling/cluster/feature_agglomeration.py +53 -52
- snowflake/ml/modeling/cluster/k_means.py +53 -52
- snowflake/ml/modeling/cluster/mean_shift.py +51 -52
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +53 -52
- snowflake/ml/modeling/cluster/optics.py +51 -52
- snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -52
- snowflake/ml/modeling/cluster/spectral_clustering.py +51 -52
- snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -52
- snowflake/ml/modeling/compose/column_transformer.py +53 -52
- snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -52
- snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -52
- snowflake/ml/modeling/covariance/empirical_covariance.py +51 -52
- snowflake/ml/modeling/covariance/graphical_lasso.py +51 -52
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -52
- snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -52
- snowflake/ml/modeling/covariance/min_cov_det.py +51 -52
- snowflake/ml/modeling/covariance/oas.py +51 -52
- snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -52
- snowflake/ml/modeling/decomposition/dictionary_learning.py +53 -52
- snowflake/ml/modeling/decomposition/factor_analysis.py +53 -52
- snowflake/ml/modeling/decomposition/fast_ica.py +53 -52
- snowflake/ml/modeling/decomposition/incremental_pca.py +53 -52
- snowflake/ml/modeling/decomposition/kernel_pca.py +53 -52
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +53 -52
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +53 -52
- snowflake/ml/modeling/decomposition/pca.py +53 -52
- snowflake/ml/modeling/decomposition/sparse_pca.py +53 -52
- snowflake/ml/modeling/decomposition/truncated_svd.py +53 -52
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +53 -52
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -52
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -52
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -52
- snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -52
- snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -52
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -52
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -52
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -52
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -52
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -52
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -52
- snowflake/ml/modeling/ensemble/isolation_forest.py +51 -52
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -52
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -52
- snowflake/ml/modeling/ensemble/stacking_regressor.py +53 -52
- snowflake/ml/modeling/ensemble/voting_classifier.py +53 -52
- snowflake/ml/modeling/ensemble/voting_regressor.py +53 -52
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +53 -52
- snowflake/ml/modeling/feature_selection/select_fdr.py +53 -52
- snowflake/ml/modeling/feature_selection/select_fpr.py +53 -52
- snowflake/ml/modeling/feature_selection/select_fwe.py +53 -52
- snowflake/ml/modeling/feature_selection/select_k_best.py +53 -52
- snowflake/ml/modeling/feature_selection/select_percentile.py +53 -52
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +53 -52
- snowflake/ml/modeling/feature_selection/variance_threshold.py +53 -52
- snowflake/ml/modeling/framework/base.py +63 -36
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -52
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -52
- snowflake/ml/modeling/impute/iterative_imputer.py +53 -52
- snowflake/ml/modeling/impute/knn_imputer.py +53 -52
- snowflake/ml/modeling/impute/missing_indicator.py +53 -52
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +53 -52
- snowflake/ml/modeling/kernel_approximation/nystroem.py +53 -52
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +53 -52
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +53 -52
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +53 -52
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -52
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -52
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -52
- snowflake/ml/modeling/linear_model/ard_regression.py +51 -52
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -52
- snowflake/ml/modeling/linear_model/elastic_net.py +51 -52
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -52
- snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -52
- snowflake/ml/modeling/linear_model/huber_regressor.py +51 -52
- snowflake/ml/modeling/linear_model/lars.py +51 -52
- snowflake/ml/modeling/linear_model/lars_cv.py +51 -52
- snowflake/ml/modeling/linear_model/lasso.py +51 -52
- snowflake/ml/modeling/linear_model/lasso_cv.py +51 -52
- snowflake/ml/modeling/linear_model/lasso_lars.py +51 -52
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -52
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -52
- snowflake/ml/modeling/linear_model/linear_regression.py +51 -52
- snowflake/ml/modeling/linear_model/logistic_regression.py +51 -52
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -52
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -52
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -52
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -52
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -52
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -52
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -52
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -52
- snowflake/ml/modeling/linear_model/perceptron.py +51 -52
- snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -52
- snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -52
- snowflake/ml/modeling/linear_model/ridge.py +51 -52
- snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -52
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -52
- snowflake/ml/modeling/linear_model/ridge_cv.py +51 -52
- snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -52
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -52
- snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -52
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -52
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -52
- snowflake/ml/modeling/manifold/isomap.py +53 -52
- snowflake/ml/modeling/manifold/mds.py +53 -52
- snowflake/ml/modeling/manifold/spectral_embedding.py +53 -52
- snowflake/ml/modeling/manifold/tsne.py +53 -52
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -52
- snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -52
- snowflake/ml/modeling/model_selection/grid_search_cv.py +21 -23
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +38 -20
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -52
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -52
- snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -52
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -52
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -52
- snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -52
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -52
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -52
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -52
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -52
- snowflake/ml/modeling/neighbors/kernel_density.py +51 -52
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -52
- snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -52
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -52
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +53 -52
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -52
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -52
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +53 -52
- snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -52
- snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -52
- snowflake/ml/modeling/pipeline/pipeline.py +514 -32
- snowflake/ml/modeling/preprocessing/one_hot_encoder.py +12 -0
- snowflake/ml/modeling/preprocessing/polynomial_features.py +53 -52
- snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -52
- snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -52
- snowflake/ml/modeling/svm/linear_svc.py +51 -52
- snowflake/ml/modeling/svm/linear_svr.py +51 -52
- snowflake/ml/modeling/svm/nu_svc.py +51 -52
- snowflake/ml/modeling/svm/nu_svr.py +51 -52
- snowflake/ml/modeling/svm/svc.py +51 -52
- snowflake/ml/modeling/svm/svr.py +51 -52
- snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -52
- snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -52
- snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -52
- snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -52
- snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -52
- snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -52
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -52
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -52
- snowflake/ml/registry/model_registry.py +3 -149
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.4.1.dist-info → snowflake_ml_python-1.5.0.dist-info}/METADATA +63 -2
- {snowflake_ml_python-1.4.1.dist-info → snowflake_ml_python-1.5.0.dist-info}/RECORD +204 -196
- snowflake/ml/registry/_artifact_manager.py +0 -156
- snowflake/ml/registry/artifact.py +0 -46
- {snowflake_ml_python-1.4.1.dist-info → snowflake_ml_python-1.5.0.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.4.1.dist-info → snowflake_ml_python-1.5.0.dist-info}/WHEEL +0 -0
- {snowflake_ml_python-1.4.1.dist-info → snowflake_ml_python-1.5.0.dist-info}/top_level.txt +0 -0
snowflake/ml/dataset/dataset.py
CHANGED
@@ -1,161 +1,486 @@
|
|
1
1
|
import json
|
2
|
-
import
|
3
|
-
from
|
4
|
-
from typing import Any, Dict, List, Optional
|
2
|
+
import warnings
|
3
|
+
from datetime import datetime
|
4
|
+
from typing import Any, Dict, List, Optional, Tuple, Union
|
5
5
|
|
6
|
-
from snowflake
|
7
|
-
from snowflake.
|
6
|
+
from snowflake import snowpark
|
7
|
+
from snowflake.ml._internal import telemetry
|
8
|
+
from snowflake.ml._internal.exceptions import (
|
9
|
+
dataset_error_messages,
|
10
|
+
dataset_errors,
|
11
|
+
error_codes,
|
12
|
+
exceptions as snowml_exceptions,
|
13
|
+
)
|
14
|
+
from snowflake.ml._internal.lineage import data_source
|
15
|
+
from snowflake.ml._internal.utils import (
|
16
|
+
formatting,
|
17
|
+
identifier,
|
18
|
+
query_result_checker,
|
19
|
+
snowpark_dataframe_utils,
|
20
|
+
)
|
21
|
+
from snowflake.ml.dataset import dataset_metadata, dataset_reader
|
22
|
+
from snowflake.snowpark import exceptions as snowpark_exceptions, functions
|
8
23
|
|
24
|
+
_PROJECT = "Dataset"
|
25
|
+
_TELEMETRY_STATEMENT_PARAMS = telemetry.get_function_usage_statement_params(_PROJECT)
|
26
|
+
_METADATA_MAX_QUERY_LENGTH = 10000
|
27
|
+
_DATASET_VERSION_NAME_COL = "version"
|
9
28
|
|
10
|
-
def _get_val_or_null(val: Any) -> Any:
|
11
|
-
return val if val is not None else "null"
|
12
29
|
|
30
|
+
class DatasetVersion:
|
31
|
+
"""Represents a version of a Snowflake Dataset"""
|
13
32
|
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
33
|
+
@telemetry.send_api_usage_telemetry(project=_PROJECT)
|
34
|
+
def __init__(
|
35
|
+
self,
|
36
|
+
dataset: "Dataset",
|
37
|
+
version: str,
|
38
|
+
) -> None:
|
39
|
+
"""Initialize a DatasetVersion object.
|
18
40
|
|
41
|
+
Args:
|
42
|
+
dataset: The parent Snowflake Dataset.
|
43
|
+
version: Dataset version name.
|
44
|
+
"""
|
45
|
+
self._parent = dataset
|
46
|
+
self._version = version
|
47
|
+
self._session: snowpark.Session = self._parent._session
|
19
48
|
|
20
|
-
|
49
|
+
self._properties: Optional[Dict[str, Any]] = None
|
50
|
+
self._raw_metadata: Optional[Dict[str, Any]] = None
|
51
|
+
self._metadata: Optional[dataset_metadata.DatasetMetadata] = None
|
21
52
|
|
53
|
+
@property
|
54
|
+
def name(self) -> str:
|
55
|
+
return self._version
|
22
56
|
|
23
|
-
@
|
24
|
-
|
25
|
-
|
26
|
-
|
57
|
+
@property
|
58
|
+
def created_on(self) -> datetime:
|
59
|
+
timestamp = self._get_property("created_on")
|
60
|
+
assert isinstance(timestamp, datetime)
|
61
|
+
return timestamp
|
27
62
|
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
63
|
+
@property
|
64
|
+
def comment(self) -> Optional[str]:
|
65
|
+
comment: Optional[str] = self._get_property("comment")
|
66
|
+
return comment
|
32
67
|
|
33
|
-
|
68
|
+
def _get_property(self, property_name: str, default: Any = None) -> Any:
|
69
|
+
if self._properties is None:
|
70
|
+
sql_result = (
|
71
|
+
query_result_checker.SqlResultValidator(
|
72
|
+
self._session,
|
73
|
+
f"SHOW VERSIONS LIKE '{self._version}' IN DATASET {self._parent.fully_qualified_name}",
|
74
|
+
statement_params=_TELEMETRY_STATEMENT_PARAMS,
|
75
|
+
)
|
76
|
+
.has_dimensions(expected_rows=1)
|
77
|
+
.validate()
|
78
|
+
)
|
79
|
+
self._properties = sql_result[0].as_dict(True)
|
80
|
+
return self._properties.get(property_name, default)
|
81
|
+
|
82
|
+
def _get_metadata(self) -> Optional[dataset_metadata.DatasetMetadata]:
|
83
|
+
if self._raw_metadata is None:
|
84
|
+
self._raw_metadata = json.loads(self._get_property("metadata", "{}"))
|
85
|
+
try:
|
86
|
+
self._metadata = (
|
87
|
+
dataset_metadata.DatasetMetadata.from_json(self._raw_metadata) if self._raw_metadata else None
|
88
|
+
)
|
89
|
+
except ValueError as e:
|
90
|
+
warnings.warn(f"Metadata parsing failed with error: {e}", UserWarning, stacklevel=2)
|
91
|
+
return self._metadata
|
34
92
|
|
35
|
-
|
36
|
-
|
37
|
-
|
93
|
+
def _get_exclude_cols(self) -> List[str]:
|
94
|
+
metadata = self._get_metadata()
|
95
|
+
if metadata is None:
|
96
|
+
return []
|
97
|
+
cols = []
|
98
|
+
if metadata.exclude_cols:
|
99
|
+
cols.extend(metadata.exclude_cols)
|
100
|
+
if metadata.label_cols:
|
101
|
+
cols.extend(metadata.label_cols)
|
102
|
+
return cols
|
38
103
|
|
39
|
-
def
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
"features": json.dumps(self.features),
|
48
|
-
}
|
49
|
-
return json.dumps(state_dict)
|
104
|
+
def url(self) -> str:
|
105
|
+
"""Returns the URL of the DatasetVersion contents in Snowflake.
|
106
|
+
|
107
|
+
Returns:
|
108
|
+
Snowflake URL string.
|
109
|
+
"""
|
110
|
+
path = f"snow://dataset/{self._parent.fully_qualified_name}/versions/{self._version}/"
|
111
|
+
return path
|
50
112
|
|
51
|
-
@
|
52
|
-
def
|
53
|
-
|
54
|
-
return
|
55
|
-
|
56
|
-
connection_params=json.loads(json_dict["connection_params"]),
|
57
|
-
features=json.loads(json_dict["features"]),
|
113
|
+
@telemetry.send_api_usage_telemetry(project=_PROJECT)
|
114
|
+
def list_files(self, subdir: Optional[str] = None) -> List[snowpark.Row]:
|
115
|
+
"""Get the list of remote file paths for the current DatasetVersion."""
|
116
|
+
return self._session.sql(f"LIST {self.url()}{subdir or ''}").collect(
|
117
|
+
statement_params=_TELEMETRY_STATEMENT_PARAMS
|
58
118
|
)
|
59
119
|
|
120
|
+
def __repr__(self) -> str:
|
121
|
+
return f"{self.__class__.__name__}(dataset='{self._parent.fully_qualified_name}', version='{self.name}')"
|
60
122
|
|
61
|
-
class Dataset(Artifact):
|
62
|
-
"""Metadata of dataset."""
|
63
123
|
|
124
|
+
class Dataset:
|
125
|
+
"""Represents a Snowflake Dataset which is organized into versions."""
|
126
|
+
|
127
|
+
@telemetry.send_api_usage_telemetry(project=_PROJECT)
|
64
128
|
def __init__(
|
65
129
|
self,
|
66
|
-
session: Session,
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
timestamp_col: Optional[str] = None,
|
72
|
-
label_cols: Optional[List[str]] = None,
|
73
|
-
feature_store_metadata: Optional[FeatureStoreMetadata] = None,
|
74
|
-
desc: str = "",
|
130
|
+
session: snowpark.Session,
|
131
|
+
database: str,
|
132
|
+
schema: str,
|
133
|
+
name: str,
|
134
|
+
selected_version: Optional[str] = None,
|
75
135
|
) -> None:
|
76
|
-
"""Initialize
|
136
|
+
"""Initialize a lazily evaluated Dataset object"""
|
137
|
+
self._session = session
|
138
|
+
self._db = database
|
139
|
+
self._schema = schema
|
140
|
+
self._name = name
|
141
|
+
self._fully_qualified_name = identifier.get_schema_level_object_identifier(database, schema, name)
|
142
|
+
|
143
|
+
self._version = DatasetVersion(self, selected_version) if selected_version else None
|
144
|
+
self._reader: Optional[dataset_reader.DatasetReader] = None
|
145
|
+
|
146
|
+
@property
|
147
|
+
def fully_qualified_name(self) -> str:
|
148
|
+
return self._fully_qualified_name
|
149
|
+
|
150
|
+
@property
|
151
|
+
def selected_version(self) -> Optional[DatasetVersion]:
|
152
|
+
return self._version
|
153
|
+
|
154
|
+
@property
|
155
|
+
def read(self) -> dataset_reader.DatasetReader:
|
156
|
+
if not self.selected_version:
|
157
|
+
raise snowml_exceptions.SnowflakeMLException(
|
158
|
+
error_code=error_codes.INVALID_ATTRIBUTE,
|
159
|
+
original_exception=RuntimeError("No Dataset version selected."),
|
160
|
+
)
|
161
|
+
if self._reader is None:
|
162
|
+
v = self.selected_version
|
163
|
+
self._reader = dataset_reader.DatasetReader(
|
164
|
+
self._session,
|
165
|
+
[
|
166
|
+
data_source.DataSource(
|
167
|
+
fully_qualified_name=self._fully_qualified_name,
|
168
|
+
version=v.name,
|
169
|
+
url=v.url(),
|
170
|
+
exclude_cols=v._get_exclude_cols(),
|
171
|
+
)
|
172
|
+
],
|
173
|
+
)
|
174
|
+
return self._reader
|
175
|
+
|
176
|
+
@staticmethod
|
177
|
+
@telemetry.send_api_usage_telemetry(project=_PROJECT)
|
178
|
+
def load(session: snowpark.Session, name: str) -> "Dataset":
|
179
|
+
"""
|
180
|
+
Load an existing Snowflake Dataset. DatasetVersions can be created from the Dataset object
|
181
|
+
using `Dataset.create_version()` and loaded with `Dataset.version()`.
|
77
182
|
|
78
183
|
Args:
|
79
|
-
session:
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
184
|
+
session: Snowpark Session to interact with Snowflake backend.
|
185
|
+
name: Name of dataset to load. May optionally be a schema-level identifier.
|
186
|
+
|
187
|
+
Returns:
|
188
|
+
Dataset object representing loaded dataset
|
189
|
+
|
190
|
+
Raises:
|
191
|
+
ValueError: name is not a valid Snowflake identifier
|
192
|
+
DatasetNotExistError: Specified Dataset does not exist
|
193
|
+
|
194
|
+
# noqa: DAR402
|
89
195
|
"""
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
196
|
+
db, schema, ds_name = _get_schema_level_identifier(session, name)
|
197
|
+
_validate_dataset_exists(session, db, schema, ds_name)
|
198
|
+
return Dataset(session, db, schema, ds_name)
|
199
|
+
|
200
|
+
@staticmethod
|
201
|
+
@telemetry.send_api_usage_telemetry(project=_PROJECT)
|
202
|
+
def create(session: snowpark.Session, name: str, exist_ok: bool = False) -> "Dataset":
|
203
|
+
"""
|
204
|
+
Create a new Snowflake Dataset. DatasetVersions can created from the Dataset object
|
205
|
+
using `Dataset.create_version()` and loaded with `Dataset.version()`.
|
206
|
+
|
207
|
+
Args:
|
208
|
+
session: Snowpark Session to interact with Snowflake backend.
|
209
|
+
name: Name of dataset to create. May optionally be a schema-level identifier.
|
210
|
+
exist_ok: If False, raises an exception if specified Dataset already exists
|
211
|
+
|
212
|
+
Returns:
|
213
|
+
Dataset object representing created dataset
|
214
|
+
|
215
|
+
Raises:
|
216
|
+
ValueError: name is not a valid Snowflake identifier
|
217
|
+
DatasetExistError: Specified Dataset already exists
|
218
|
+
DatasetError: Dataset creation failed
|
219
|
+
|
220
|
+
# noqa: DAR401
|
221
|
+
# noqa: DAR402
|
222
|
+
"""
|
223
|
+
db, schema, ds_name = _get_schema_level_identifier(session, name)
|
224
|
+
ds_fqn = identifier.get_schema_level_object_identifier(db, schema, ds_name)
|
225
|
+
query = f"CREATE DATASET{' IF NOT EXISTS' if exist_ok else ''} {ds_fqn}"
|
226
|
+
try:
|
227
|
+
session.sql(query).collect(statement_params=_TELEMETRY_STATEMENT_PARAMS)
|
228
|
+
return Dataset(session, db, schema, ds_name)
|
229
|
+
except snowpark_exceptions.SnowparkClientException as e:
|
230
|
+
# Snowpark wraps the Python Connector error code in the head of the error message.
|
231
|
+
if e.message.startswith(dataset_errors.ERRNO_OBJECT_ALREADY_EXISTS):
|
232
|
+
raise snowml_exceptions.SnowflakeMLException(
|
233
|
+
error_code=error_codes.OBJECT_ALREADY_EXISTS,
|
234
|
+
original_exception=dataset_errors.DatasetExistError(
|
235
|
+
dataset_error_messages.DATASET_ALREADY_EXISTS.format(name)
|
236
|
+
),
|
237
|
+
) from e
|
238
|
+
else:
|
239
|
+
raise
|
240
|
+
|
241
|
+
@telemetry.send_api_usage_telemetry(project=_PROJECT)
|
242
|
+
def list_versions(self, detailed: bool = False) -> Union[List[str], List[snowpark.Row]]:
|
243
|
+
"""Return list of versions"""
|
244
|
+
versions = self._list_versions()
|
245
|
+
versions.sort(key=lambda r: r[_DATASET_VERSION_NAME_COL])
|
246
|
+
if not detailed:
|
247
|
+
return [r[_DATASET_VERSION_NAME_COL] for r in versions]
|
248
|
+
return versions
|
249
|
+
|
250
|
+
@telemetry.send_api_usage_telemetry(project=_PROJECT)
|
251
|
+
def select_version(self, version: str) -> "Dataset":
|
252
|
+
"""Return a new Dataset instance with the specified version selected.
|
253
|
+
|
254
|
+
Args:
|
255
|
+
version: Dataset version name.
|
256
|
+
|
257
|
+
Returns:
|
258
|
+
Dataset object.
|
259
|
+
"""
|
260
|
+
self._validate_version_exists(version)
|
261
|
+
return Dataset(self._session, self._db, self._schema, self._name, version)
|
262
|
+
|
263
|
+
@telemetry.send_api_usage_telemetry(project=_PROJECT)
|
264
|
+
def create_version(
|
265
|
+
self,
|
266
|
+
version: str,
|
267
|
+
input_dataframe: snowpark.DataFrame,
|
268
|
+
shuffle: bool = False,
|
269
|
+
exclude_cols: Optional[List[str]] = None,
|
270
|
+
label_cols: Optional[List[str]] = None,
|
271
|
+
properties: Optional[dataset_metadata.DatasetPropertiesType] = None,
|
272
|
+
partition_by: Optional[str] = None,
|
273
|
+
comment: Optional[str] = None,
|
274
|
+
) -> "Dataset":
|
275
|
+
"""Create a new version of the current Dataset.
|
276
|
+
|
277
|
+
The result Dataset object captures the query result deterministically as stage files.
|
278
|
+
|
279
|
+
Args:
|
280
|
+
version: Dataset version name. Data contents are materialized to the Dataset entity.
|
281
|
+
input_dataframe: A Snowpark DataFrame which yields the Dataset contents.
|
282
|
+
shuffle: A boolean represents whether the data should be shuffled globally. Default to be false.
|
283
|
+
exclude_cols: Name of column(s) in dataset to be excluded during training/testing (e.g. timestamp).
|
284
|
+
label_cols: Name of column(s) in dataset that contains labels.
|
285
|
+
properties: Custom metadata properties, saved under `DatasetMetadata.properties`
|
286
|
+
partition_by: Optional partitioning scheme within the new Dataset version.
|
287
|
+
comment: A descriptive comment about this dataset.
|
288
|
+
|
289
|
+
Returns:
|
290
|
+
A Dataset object with the newly created version selected.
|
291
|
+
|
292
|
+
Raises:
|
293
|
+
SnowflakeMLException: The Dataset no longer exists.
|
294
|
+
SnowflakeMLException: The specified Dataset version already exists.
|
295
|
+
snowpark_exceptions.SnowparkClientException: An error occurred during Dataset creation.
|
296
|
+
|
297
|
+
Note: During the generation of stage files, data casting will occur. The casting rules are as follows::
|
298
|
+
- Data casting:
|
299
|
+
- DecimalType(NUMBER):
|
300
|
+
- If its scale is zero, cast to BIGINT
|
301
|
+
- If its scale is non-zero, cast to FLOAT
|
302
|
+
- DoubleType(DOUBLE): Cast to FLOAT.
|
303
|
+
- ByteType(TINYINT): Cast to SMALLINT.
|
304
|
+
- ShortType(SMALLINT):Cast to SMALLINT.
|
305
|
+
- IntegerType(INT): Cast to INT.
|
306
|
+
- LongType(BIGINT): Cast to BIGINT.
|
307
|
+
- No action:
|
308
|
+
- FloatType(FLOAT): No action.
|
309
|
+
- StringType(String): No action.
|
310
|
+
- BinaryType(BINARY): No action.
|
311
|
+
- BooleanType(BOOLEAN): No action.
|
312
|
+
- Not supported:
|
313
|
+
- ArrayType(ARRAY): Not supported. A warning will be logged.
|
314
|
+
- MapType(OBJECT): Not supported. A warning will be logged.
|
315
|
+
- TimestampType(TIMESTAMP): Not supported. A warning will be logged.
|
316
|
+
- TimeType(TIME): Not supported. A warning will be logged.
|
317
|
+
- DateType(DATE): Not supported. A warning will be logged.
|
318
|
+
- VariantType(VARIANT): Not supported. A warning will be logged.
|
319
|
+
"""
|
320
|
+
casted_df = snowpark_dataframe_utils.cast_snowpark_dataframe(input_dataframe)
|
321
|
+
|
322
|
+
if shuffle:
|
323
|
+
casted_df = casted_df.order_by(functions.random())
|
324
|
+
|
325
|
+
source_query = json.dumps(input_dataframe.queries)
|
326
|
+
if len(source_query) > _METADATA_MAX_QUERY_LENGTH:
|
327
|
+
warnings.warn(
|
328
|
+
"Source query exceeded max query length, dropping from metadata (limit=%d, actual=%d)"
|
329
|
+
% (_METADATA_MAX_QUERY_LENGTH, len(source_query)),
|
330
|
+
stacklevel=2,
|
123
331
|
)
|
332
|
+
source_query = "<query too long>"
|
124
333
|
|
125
|
-
|
126
|
-
|
127
|
-
"
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
"feature_store_metadata": _wrap_embedded_str(self.feature_store_metadata.to_json())
|
134
|
-
if self.feature_store_metadata is not None
|
135
|
-
else "null",
|
136
|
-
"schema_version": self.schema_version,
|
137
|
-
"desc": self.desc,
|
138
|
-
}
|
139
|
-
return json.dumps(state_dict)
|
140
|
-
|
141
|
-
@classmethod
|
142
|
-
def from_json(cls, json_str: str, session: Session) -> "Dataset":
|
143
|
-
json_dict = json.loads(json_str, strict=False)
|
144
|
-
json_dict["df"] = session.sql(json_dict.pop("df_query"))
|
145
|
-
|
146
|
-
fs_meta_json = json_dict["feature_store_metadata"]
|
147
|
-
json_dict["feature_store_metadata"] = (
|
148
|
-
FeatureStoreMetadata.from_json(fs_meta_json) if fs_meta_json != "null" else None
|
334
|
+
metadata = dataset_metadata.DatasetMetadata(
|
335
|
+
source_query=source_query,
|
336
|
+
owner=self._session.sql("SELECT CURRENT_USER()").collect(statement_params=_TELEMETRY_STATEMENT_PARAMS)[0][
|
337
|
+
"CURRENT_USER()"
|
338
|
+
],
|
339
|
+
exclude_cols=exclude_cols,
|
340
|
+
label_cols=label_cols,
|
341
|
+
properties=properties,
|
149
342
|
)
|
150
343
|
|
151
|
-
|
152
|
-
|
344
|
+
post_actions = casted_df._plan.post_actions
|
345
|
+
try:
|
346
|
+
# Execute all but the last query, final query gets passed to ALTER DATASET ADD VERSION
|
347
|
+
query = casted_df._plan.queries[-1].sql.strip()
|
348
|
+
if len(casted_df._plan.queries) > 1:
|
349
|
+
casted_df._plan.queries = casted_df._plan.queries[:-1]
|
350
|
+
casted_df._plan.post_actions = []
|
351
|
+
casted_df.collect(statement_params=_TELEMETRY_STATEMENT_PARAMS)
|
352
|
+
sql_command = "ALTER DATASET {} ADD VERSION '{}' FROM ({})".format(
|
353
|
+
self.fully_qualified_name,
|
354
|
+
version,
|
355
|
+
query,
|
356
|
+
)
|
357
|
+
if partition_by:
|
358
|
+
sql_command += f" PARTITION BY {partition_by}"
|
359
|
+
if comment:
|
360
|
+
sql_command += f" COMMENT={formatting.format_value_for_select(comment)}"
|
361
|
+
sql_command += f" METADATA=$${metadata.to_json()}$$"
|
362
|
+
self._session.sql(sql_command).collect(statement_params=_TELEMETRY_STATEMENT_PARAMS)
|
363
|
+
|
364
|
+
return Dataset(self._session, self._db, self._schema, self._name, version)
|
153
365
|
|
154
|
-
|
155
|
-
|
156
|
-
|
366
|
+
except snowpark_exceptions.SnowparkClientException as e:
|
367
|
+
if e.message.startswith(dataset_errors.ERRNO_DATASET_NOT_EXIST):
|
368
|
+
raise snowml_exceptions.SnowflakeMLException(
|
369
|
+
error_code=error_codes.NOT_FOUND,
|
370
|
+
original_exception=dataset_errors.DatasetNotExistError(
|
371
|
+
dataset_error_messages.DATASET_NOT_EXIST.format(self.fully_qualified_name)
|
372
|
+
),
|
373
|
+
) from e
|
374
|
+
elif (
|
375
|
+
e.message.startswith(dataset_errors.ERRNO_DATASET_VERSION_ALREADY_EXISTS)
|
376
|
+
or e.message.startswith(dataset_errors.ERRNO_VERSION_ALREADY_EXISTS)
|
377
|
+
or e.message.startswith(dataset_errors.ERRNO_FILES_ALREADY_EXISTING)
|
378
|
+
):
|
379
|
+
raise snowml_exceptions.SnowflakeMLException(
|
380
|
+
error_code=error_codes.OBJECT_ALREADY_EXISTS,
|
381
|
+
original_exception=dataset_errors.DatasetExistError(
|
382
|
+
dataset_error_messages.DATASET_VERSION_ALREADY_EXISTS.format(self.fully_qualified_name, version)
|
383
|
+
),
|
384
|
+
) from e
|
385
|
+
else:
|
386
|
+
raise
|
387
|
+
finally:
|
388
|
+
for action in post_actions:
|
389
|
+
self._session.sql(action.sql.strip()).collect(statement_params=_TELEMETRY_STATEMENT_PARAMS)
|
157
390
|
|
158
|
-
|
391
|
+
@telemetry.send_api_usage_telemetry(project=_PROJECT)
|
392
|
+
def delete_version(self, version_name: str) -> None:
|
393
|
+
"""Delete the Dataset version
|
159
394
|
|
160
|
-
|
161
|
-
|
395
|
+
Args:
|
396
|
+
version_name: Name of version to delete from Dataset
|
397
|
+
|
398
|
+
Raises:
|
399
|
+
SnowflakeMLException: An error occurred when the DatasetVersion cannot get deleted.
|
400
|
+
"""
|
401
|
+
delete_sql = f"ALTER DATASET {self.fully_qualified_name} DROP VERSION '{version_name}'"
|
402
|
+
try:
|
403
|
+
self._session.sql(delete_sql).collect(
|
404
|
+
statement_params=_TELEMETRY_STATEMENT_PARAMS,
|
405
|
+
)
|
406
|
+
except snowpark_exceptions.SnowparkClientException as e:
|
407
|
+
raise snowml_exceptions.SnowflakeMLException(
|
408
|
+
error_code=error_codes.SNOWML_DELETE_FAILED,
|
409
|
+
original_exception=dataset_errors.DatasetCannotDeleteError(str(e)),
|
410
|
+
) from e
|
411
|
+
return
|
412
|
+
|
413
|
+
@telemetry.send_api_usage_telemetry(project=_PROJECT)
|
414
|
+
def delete(self) -> None:
|
415
|
+
"""Delete Dataset and all contained versions"""
|
416
|
+
# TODO: Check and warn if any versions exist
|
417
|
+
self._session.sql(f"DROP DATASET {self.fully_qualified_name}").collect(
|
418
|
+
statement_params=_TELEMETRY_STATEMENT_PARAMS
|
419
|
+
)
|
420
|
+
|
421
|
+
def _list_versions(self, pattern: Optional[str] = None) -> List[snowpark.Row]:
|
422
|
+
"""Return list of versions"""
|
423
|
+
try:
|
424
|
+
pattern_clause = f" LIKE '{pattern}'" if pattern else ""
|
425
|
+
return (
|
426
|
+
query_result_checker.SqlResultValidator(
|
427
|
+
self._session,
|
428
|
+
f"SHOW VERSIONS{pattern_clause} IN DATASET {self.fully_qualified_name}",
|
429
|
+
statement_params=_TELEMETRY_STATEMENT_PARAMS,
|
430
|
+
)
|
431
|
+
.has_column(_DATASET_VERSION_NAME_COL, allow_empty=True)
|
432
|
+
.validate()
|
433
|
+
)
|
434
|
+
except snowpark_exceptions.SnowparkClientException as e:
|
435
|
+
# Snowpark wraps the Python Connector error code in the head of the error message.
|
436
|
+
if e.message.startswith(dataset_errors.ERRNO_OBJECT_NOT_EXIST):
|
437
|
+
raise snowml_exceptions.SnowflakeMLException(
|
438
|
+
error_code=error_codes.NOT_FOUND,
|
439
|
+
original_exception=dataset_errors.DatasetNotExistError(
|
440
|
+
dataset_error_messages.DATASET_NOT_EXIST.format(self.fully_qualified_name)
|
441
|
+
),
|
442
|
+
) from e
|
443
|
+
else:
|
444
|
+
raise
|
445
|
+
|
446
|
+
def _validate_version_exists(self, version: str) -> None:
|
447
|
+
"""Verify that the requested version exists. Raises DatasetNotExist if version not found"""
|
448
|
+
matches = self._list_versions(version)
|
449
|
+
matches = [m for m in matches if m[_DATASET_VERSION_NAME_COL] == version] # Case sensitive match
|
450
|
+
if len(matches) == 0:
|
451
|
+
raise snowml_exceptions.SnowflakeMLException(
|
452
|
+
error_code=error_codes.NOT_FOUND,
|
453
|
+
original_exception=dataset_errors.DatasetNotExistError(
|
454
|
+
dataset_error_messages.DATASET_VERSION_NOT_EXIST.format(self.fully_qualified_name, version)
|
455
|
+
),
|
456
|
+
)
|
457
|
+
|
458
|
+
|
459
|
+
# Utility methods
|
460
|
+
|
461
|
+
|
462
|
+
def _get_schema_level_identifier(session: snowpark.Session, dataset_name: str) -> Tuple[str, str, str]:
|
463
|
+
"""Resolve a dataset name into a validated schema-level location identifier"""
|
464
|
+
db, schema, object_name, others = identifier.parse_schema_level_object_identifier(dataset_name)
|
465
|
+
if others:
|
466
|
+
raise ValueError(f"Invalid identifier: unexpected '{others}'")
|
467
|
+
db = db or session.get_current_database()
|
468
|
+
schema = schema or session.get_current_schema()
|
469
|
+
return str(db), str(schema), str(object_name)
|
470
|
+
|
471
|
+
|
472
|
+
def _validate_dataset_exists(session: snowpark.Session, db: str, schema: str, dataset_name: str) -> None:
|
473
|
+
# FIXME: Once we switch version to SQL Identifiers we can just use version check with version=''
|
474
|
+
dataset_name = identifier.resolve_identifier(dataset_name)
|
475
|
+
if len(dataset_name) > 0 and dataset_name[0] == '"' and dataset_name[-1] == '"':
|
476
|
+
dataset_name = identifier.get_unescaped_names(dataset_name)
|
477
|
+
# Case sensitive match
|
478
|
+
query = f"show datasets like '{dataset_name}' in schema {db}.{schema} starts with '{dataset_name}'"
|
479
|
+
ds_matches = session.sql(query).count()
|
480
|
+
if ds_matches == 0:
|
481
|
+
raise snowml_exceptions.SnowflakeMLException(
|
482
|
+
error_code=error_codes.NOT_FOUND,
|
483
|
+
original_exception=dataset_errors.DatasetNotExistError(
|
484
|
+
dataset_error_messages.DATASET_NOT_EXIST.format(dataset_name)
|
485
|
+
),
|
486
|
+
)
|
@@ -0,0 +1,53 @@
|
|
1
|
+
from typing import Any
|
2
|
+
|
3
|
+
from snowflake import snowpark
|
4
|
+
from snowflake.ml._internal import telemetry
|
5
|
+
from snowflake.ml.dataset import dataset
|
6
|
+
|
7
|
+
_PROJECT = "Dataset"
|
8
|
+
|
9
|
+
|
10
|
+
@telemetry.send_api_usage_telemetry(project=_PROJECT)
|
11
|
+
def create_from_dataframe(
|
12
|
+
session: snowpark.Session,
|
13
|
+
name: str,
|
14
|
+
version: str,
|
15
|
+
input_dataframe: snowpark.DataFrame,
|
16
|
+
**version_kwargs: Any,
|
17
|
+
) -> dataset.Dataset:
|
18
|
+
"""
|
19
|
+
Create a new versioned Dataset from a DataFrame and returns
|
20
|
+
a DatasetReader for the newly created Dataset version.
|
21
|
+
|
22
|
+
Args:
|
23
|
+
session: The Snowpark Session instance to use.
|
24
|
+
name: The dataset name
|
25
|
+
version: The dataset version name
|
26
|
+
input_dataframe: DataFrame containing data to be saved to the created Dataset.
|
27
|
+
version_kwargs: Keyword arguments passed to dataset version creation.
|
28
|
+
See `Dataset.create_version()` documentation for supported arguments.
|
29
|
+
|
30
|
+
Returns:
|
31
|
+
A Dataset object.
|
32
|
+
"""
|
33
|
+
ds: dataset.Dataset = dataset.Dataset.create(session, name, exist_ok=True)
|
34
|
+
ds.create_version(version, input_dataframe=input_dataframe, **version_kwargs)
|
35
|
+
ds = ds.select_version(version) # select_version returns a new copy
|
36
|
+
return ds
|
37
|
+
|
38
|
+
|
39
|
+
@telemetry.send_api_usage_telemetry(project=_PROJECT)
|
40
|
+
def load_dataset(session: snowpark.Session, name: str, version: str) -> dataset.Dataset:
|
41
|
+
"""
|
42
|
+
Load a versioned Dataset into a DatasetReader.
|
43
|
+
|
44
|
+
Args:
|
45
|
+
session: The Snowpark Session instance to use.
|
46
|
+
name: The dataset name.
|
47
|
+
version: The dataset version name.
|
48
|
+
|
49
|
+
Returns:
|
50
|
+
A DatasetReader object.
|
51
|
+
"""
|
52
|
+
ds: dataset.Dataset = dataset.Dataset.load(session, name).select_version(version)
|
53
|
+
return ds
|