snowflake-ml-python 1.5.0__py3-none-any.whl → 1.5.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/cortex/_sentiment.py +7 -4
- snowflake/ml/_internal/env_utils.py +6 -0
- snowflake/ml/_internal/lineage/lineage_utils.py +95 -0
- snowflake/ml/_internal/telemetry.py +1 -0
- snowflake/ml/_internal/utils/identifier.py +1 -1
- snowflake/ml/_internal/utils/sql_identifier.py +14 -1
- snowflake/ml/_internal/utils/temp_file_utils.py +5 -2
- snowflake/ml/dataset/__init__.py +2 -1
- snowflake/ml/dataset/dataset.py +4 -3
- snowflake/ml/dataset/dataset_reader.py +5 -8
- snowflake/ml/feature_store/__init__.py +6 -0
- snowflake/ml/feature_store/access_manager.py +283 -0
- snowflake/ml/feature_store/feature_store.py +160 -100
- snowflake/ml/feature_store/feature_view.py +30 -19
- snowflake/ml/fileset/embedded_stage_fs.py +15 -12
- snowflake/ml/fileset/snowfs.py +2 -30
- snowflake/ml/fileset/stage_fs.py +25 -7
- snowflake/ml/model/_client/model/model_impl.py +46 -39
- snowflake/ml/model/_client/model/model_version_impl.py +24 -2
- snowflake/ml/model/_client/ops/metadata_ops.py +27 -4
- snowflake/ml/model/_client/ops/model_ops.py +174 -16
- snowflake/ml/model/_client/sql/_base.py +34 -0
- snowflake/ml/model/_client/sql/model.py +32 -39
- snowflake/ml/model/_client/sql/model_version.py +111 -42
- snowflake/ml/model/_client/sql/stage.py +6 -32
- snowflake/ml/model/_client/sql/tag.py +32 -56
- snowflake/ml/model/_model_composer/model_composer.py +8 -4
- snowflake/ml/model/_packager/model_handlers/mlflow.py +2 -1
- snowflake/ml/model/_packager/model_meta/model_meta.py +1 -3
- snowflake/ml/model/_packager/model_runtime/model_runtime.py +3 -27
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +90 -142
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_search_udf_file.py +159 -0
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +81 -3
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +8 -1
- snowflake/ml/modeling/cluster/affinity_propagation.py +8 -1
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +8 -1
- snowflake/ml/modeling/cluster/birch.py +8 -1
- snowflake/ml/modeling/cluster/bisecting_k_means.py +8 -1
- snowflake/ml/modeling/cluster/dbscan.py +8 -1
- snowflake/ml/modeling/cluster/feature_agglomeration.py +8 -1
- snowflake/ml/modeling/cluster/k_means.py +8 -1
- snowflake/ml/modeling/cluster/mean_shift.py +8 -1
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +8 -1
- snowflake/ml/modeling/cluster/optics.py +8 -1
- snowflake/ml/modeling/cluster/spectral_biclustering.py +8 -1
- snowflake/ml/modeling/cluster/spectral_clustering.py +8 -1
- snowflake/ml/modeling/cluster/spectral_coclustering.py +8 -1
- snowflake/ml/modeling/compose/column_transformer.py +8 -1
- snowflake/ml/modeling/compose/transformed_target_regressor.py +8 -1
- snowflake/ml/modeling/covariance/elliptic_envelope.py +8 -1
- snowflake/ml/modeling/covariance/empirical_covariance.py +8 -1
- snowflake/ml/modeling/covariance/graphical_lasso.py +8 -1
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +8 -1
- snowflake/ml/modeling/covariance/ledoit_wolf.py +8 -1
- snowflake/ml/modeling/covariance/min_cov_det.py +8 -1
- snowflake/ml/modeling/covariance/oas.py +8 -1
- snowflake/ml/modeling/covariance/shrunk_covariance.py +8 -1
- snowflake/ml/modeling/decomposition/dictionary_learning.py +8 -1
- snowflake/ml/modeling/decomposition/factor_analysis.py +8 -1
- snowflake/ml/modeling/decomposition/fast_ica.py +8 -1
- snowflake/ml/modeling/decomposition/incremental_pca.py +8 -1
- snowflake/ml/modeling/decomposition/kernel_pca.py +8 -1
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +8 -1
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +8 -1
- snowflake/ml/modeling/decomposition/pca.py +8 -1
- snowflake/ml/modeling/decomposition/sparse_pca.py +8 -1
- snowflake/ml/modeling/decomposition/truncated_svd.py +8 -1
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +8 -1
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +8 -1
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +8 -1
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +8 -1
- snowflake/ml/modeling/ensemble/bagging_classifier.py +8 -1
- snowflake/ml/modeling/ensemble/bagging_regressor.py +8 -1
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +8 -1
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +8 -1
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +8 -1
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +8 -1
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +8 -1
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +8 -1
- snowflake/ml/modeling/ensemble/isolation_forest.py +8 -1
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +8 -1
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +8 -1
- snowflake/ml/modeling/ensemble/stacking_regressor.py +8 -1
- snowflake/ml/modeling/ensemble/voting_classifier.py +8 -1
- snowflake/ml/modeling/ensemble/voting_regressor.py +8 -1
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +8 -1
- snowflake/ml/modeling/feature_selection/select_fdr.py +8 -1
- snowflake/ml/modeling/feature_selection/select_fpr.py +8 -1
- snowflake/ml/modeling/feature_selection/select_fwe.py +8 -1
- snowflake/ml/modeling/feature_selection/select_k_best.py +8 -1
- snowflake/ml/modeling/feature_selection/select_percentile.py +8 -1
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +8 -1
- snowflake/ml/modeling/feature_selection/variance_threshold.py +8 -1
- snowflake/ml/modeling/framework/base.py +4 -3
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +8 -1
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +8 -1
- snowflake/ml/modeling/impute/iterative_imputer.py +8 -1
- snowflake/ml/modeling/impute/knn_imputer.py +8 -1
- snowflake/ml/modeling/impute/missing_indicator.py +8 -1
- snowflake/ml/modeling/impute/simple_imputer.py +21 -2
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +8 -1
- snowflake/ml/modeling/kernel_approximation/nystroem.py +8 -1
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +8 -1
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +8 -1
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +8 -1
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +8 -1
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +8 -1
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +8 -1
- snowflake/ml/modeling/linear_model/ard_regression.py +8 -1
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +8 -1
- snowflake/ml/modeling/linear_model/elastic_net.py +8 -1
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +8 -1
- snowflake/ml/modeling/linear_model/gamma_regressor.py +8 -1
- snowflake/ml/modeling/linear_model/huber_regressor.py +8 -1
- snowflake/ml/modeling/linear_model/lars.py +8 -1
- snowflake/ml/modeling/linear_model/lars_cv.py +8 -1
- snowflake/ml/modeling/linear_model/lasso.py +8 -1
- snowflake/ml/modeling/linear_model/lasso_cv.py +8 -1
- snowflake/ml/modeling/linear_model/lasso_lars.py +8 -1
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +8 -1
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +8 -1
- snowflake/ml/modeling/linear_model/linear_regression.py +8 -1
- snowflake/ml/modeling/linear_model/logistic_regression.py +8 -1
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +8 -1
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +8 -1
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +8 -1
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +8 -1
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +8 -1
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +8 -1
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +8 -1
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +8 -1
- snowflake/ml/modeling/linear_model/perceptron.py +8 -1
- snowflake/ml/modeling/linear_model/poisson_regressor.py +8 -1
- snowflake/ml/modeling/linear_model/ransac_regressor.py +8 -1
- snowflake/ml/modeling/linear_model/ridge.py +8 -1
- snowflake/ml/modeling/linear_model/ridge_classifier.py +8 -1
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +8 -1
- snowflake/ml/modeling/linear_model/ridge_cv.py +8 -1
- snowflake/ml/modeling/linear_model/sgd_classifier.py +8 -1
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +8 -1
- snowflake/ml/modeling/linear_model/sgd_regressor.py +8 -1
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +8 -1
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +8 -1
- snowflake/ml/modeling/manifold/isomap.py +8 -1
- snowflake/ml/modeling/manifold/mds.py +8 -1
- snowflake/ml/modeling/manifold/spectral_embedding.py +8 -1
- snowflake/ml/modeling/manifold/tsne.py +8 -1
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +8 -1
- snowflake/ml/modeling/mixture/gaussian_mixture.py +8 -1
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +8 -1
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +8 -1
- snowflake/ml/modeling/multiclass/output_code_classifier.py +8 -1
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +8 -1
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +8 -1
- snowflake/ml/modeling/naive_bayes/complement_nb.py +8 -1
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +8 -1
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +8 -1
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +8 -1
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +8 -1
- snowflake/ml/modeling/neighbors/kernel_density.py +8 -1
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +8 -1
- snowflake/ml/modeling/neighbors/nearest_centroid.py +8 -1
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +8 -1
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +8 -1
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +8 -1
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +8 -1
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +8 -1
- snowflake/ml/modeling/neural_network/mlp_classifier.py +8 -1
- snowflake/ml/modeling/neural_network/mlp_regressor.py +8 -1
- snowflake/ml/modeling/parameters/enable_anonymous_sproc.py +5 -0
- snowflake/ml/modeling/pipeline/pipeline.py +27 -7
- snowflake/ml/modeling/preprocessing/polynomial_features.py +8 -1
- snowflake/ml/modeling/semi_supervised/label_propagation.py +8 -1
- snowflake/ml/modeling/semi_supervised/label_spreading.py +8 -1
- snowflake/ml/modeling/svm/linear_svc.py +8 -1
- snowflake/ml/modeling/svm/linear_svr.py +8 -1
- snowflake/ml/modeling/svm/nu_svc.py +8 -1
- snowflake/ml/modeling/svm/nu_svr.py +8 -1
- snowflake/ml/modeling/svm/svc.py +8 -1
- snowflake/ml/modeling/svm/svr.py +8 -1
- snowflake/ml/modeling/tree/decision_tree_classifier.py +8 -1
- snowflake/ml/modeling/tree/decision_tree_regressor.py +8 -1
- snowflake/ml/modeling/tree/extra_tree_classifier.py +8 -1
- snowflake/ml/modeling/tree/extra_tree_regressor.py +8 -1
- snowflake/ml/modeling/xgboost/xgb_classifier.py +8 -1
- snowflake/ml/modeling/xgboost/xgb_regressor.py +8 -1
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +8 -1
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +8 -1
- snowflake/ml/registry/_manager/model_manager.py +95 -8
- snowflake/ml/registry/registry.py +10 -1
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.5.0.dist-info → snowflake_ml_python-1.5.2.dist-info}/METADATA +66 -10
- {snowflake_ml_python-1.5.0.dist-info → snowflake_ml_python-1.5.2.dist-info}/RECORD +196 -192
- snowflake/ml/_internal/lineage/dataset_dataframe.py +0 -44
- {snowflake_ml_python-1.5.0.dist-info → snowflake_ml_python-1.5.2.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.5.0.dist-info → snowflake_ml_python-1.5.2.dist-info}/WHEEL +0 -0
- {snowflake_ml_python-1.5.0.dist-info → snowflake_ml_python-1.5.2.dist-info}/top_level.txt +0 -0
snowflake/cortex/_sentiment.py
CHANGED
@@ -11,7 +11,7 @@ from snowflake.ml._internal import telemetry
|
|
11
11
|
)
|
12
12
|
def Sentiment(
|
13
13
|
text: Union[str, snowpark.Column], session: Optional[snowpark.Session] = None
|
14
|
-
) -> Union[
|
14
|
+
) -> Union[float, snowpark.Column]:
|
15
15
|
"""Sentiment calls into the LLM inference service to perform sentiment analysis on the input text.
|
16
16
|
|
17
17
|
Args:
|
@@ -21,11 +21,14 @@ def Sentiment(
|
|
21
21
|
Returns:
|
22
22
|
A column of floats. 1 represents positive sentiment, -1 represents negative sentiment.
|
23
23
|
"""
|
24
|
-
|
25
24
|
return _sentiment_impl("snowflake.cortex.sentiment", text, session=session)
|
26
25
|
|
27
26
|
|
28
27
|
def _sentiment_impl(
|
29
28
|
function: str, text: Union[str, snowpark.Column], session: Optional[snowpark.Session] = None
|
30
|
-
) -> Union[
|
31
|
-
|
29
|
+
) -> Union[float, snowpark.Column]:
|
30
|
+
|
31
|
+
output = call_sql_function(function, session, text)
|
32
|
+
if isinstance(output, snowpark.Column):
|
33
|
+
return output
|
34
|
+
return float(output)
|
@@ -553,6 +553,9 @@ def load_conda_env_file(
|
|
553
553
|
A tuple of Dict of conda dependencies after validated, optional pip requirements if exist
|
554
554
|
and a string 'major.minor.patchlevel' of python version.
|
555
555
|
"""
|
556
|
+
if not path.exists():
|
557
|
+
return collections.defaultdict(list), None, None
|
558
|
+
|
556
559
|
with open(path, encoding="utf-8") as f:
|
557
560
|
env = yaml.safe_load(stream=f)
|
558
561
|
|
@@ -603,6 +606,9 @@ def load_requirements_file(path: pathlib.Path) -> List[requirements.Requirement]
|
|
603
606
|
Returns:
|
604
607
|
List of dependencies string after validated.
|
605
608
|
"""
|
609
|
+
if not path.exists():
|
610
|
+
return []
|
611
|
+
|
606
612
|
with open(path, encoding="utf-8") as f:
|
607
613
|
reqs = f.readlines()
|
608
614
|
|
@@ -0,0 +1,95 @@
|
|
1
|
+
import copy
|
2
|
+
import functools
|
3
|
+
from typing import Any, Callable, List
|
4
|
+
|
5
|
+
from snowflake import snowpark
|
6
|
+
from snowflake.ml._internal.lineage import data_source
|
7
|
+
|
8
|
+
DATA_SOURCES_ATTR = "_data_sources"
|
9
|
+
|
10
|
+
|
11
|
+
def _get_datasources(*args: Any) -> List[data_source.DataSource]:
|
12
|
+
"""Helper method for extracting data sources attribute from DataFrames in an argument list"""
|
13
|
+
result = []
|
14
|
+
for arg in args:
|
15
|
+
srcs = getattr(arg, DATA_SOURCES_ATTR, None)
|
16
|
+
if isinstance(srcs, list) and all(isinstance(s, data_source.DataSource) for s in srcs):
|
17
|
+
result += srcs
|
18
|
+
return result
|
19
|
+
|
20
|
+
|
21
|
+
def _wrap_func(
|
22
|
+
fn: Callable[..., snowpark.DataFrame], data_sources: List[data_source.DataSource]
|
23
|
+
) -> Callable[..., snowpark.DataFrame]:
|
24
|
+
"""Wrap a DataFrame transform function to propagate data_sources to derived DataFrames."""
|
25
|
+
|
26
|
+
@functools.wraps(fn)
|
27
|
+
def wrapped(*args: Any, **kwargs: Any) -> snowpark.DataFrame:
|
28
|
+
df = fn(*args, **kwargs)
|
29
|
+
patch_dataframe(df, data_sources=data_sources, inplace=True)
|
30
|
+
return df
|
31
|
+
|
32
|
+
return wrapped
|
33
|
+
|
34
|
+
|
35
|
+
def patch_dataframe(
|
36
|
+
df: snowpark.DataFrame, data_sources: List[data_source.DataSource], inplace: bool = False
|
37
|
+
) -> snowpark.DataFrame:
|
38
|
+
"""
|
39
|
+
Monkey patch a DataFrame to add attach the provided data_sources as an attribute of the DataFrame.
|
40
|
+
Also patches the DataFrame's transformation functions to propagate the new data sources attribute to
|
41
|
+
derived DataFrames.
|
42
|
+
|
43
|
+
Args:
|
44
|
+
df: DataFrame to be patched
|
45
|
+
data_sources: List of data sources for the DataFrame
|
46
|
+
inplace: If True, patches to DataFrame in-place. If False, creates a shallow copy of the DataFrame.
|
47
|
+
|
48
|
+
Returns:
|
49
|
+
Patched DataFrame
|
50
|
+
"""
|
51
|
+
# Instance-level monkey-patches
|
52
|
+
funcs = [
|
53
|
+
"_with_plan",
|
54
|
+
"_lateral",
|
55
|
+
"group_by",
|
56
|
+
"group_by_grouping_sets",
|
57
|
+
"cube",
|
58
|
+
"pivot",
|
59
|
+
"rollup",
|
60
|
+
"cache_result",
|
61
|
+
"_to_df", # RelationalGroupedDataFrame
|
62
|
+
]
|
63
|
+
if not inplace:
|
64
|
+
df = copy.copy(df)
|
65
|
+
setattr(df, DATA_SOURCES_ATTR, data_sources)
|
66
|
+
for func in funcs:
|
67
|
+
fn = getattr(df, func, None)
|
68
|
+
if fn is not None:
|
69
|
+
setattr(df, func, _wrap_func(fn, data_sources=data_sources))
|
70
|
+
return df
|
71
|
+
|
72
|
+
|
73
|
+
def _wrap_class_func(fn: Callable[..., snowpark.DataFrame]) -> Callable[..., snowpark.DataFrame]:
|
74
|
+
@functools.wraps(fn)
|
75
|
+
def wrapped(*args: Any, **kwargs: Any) -> snowpark.DataFrame:
|
76
|
+
df = fn(*args, **kwargs)
|
77
|
+
data_sources = _get_datasources(*args) + _get_datasources(*kwargs.values())
|
78
|
+
if data_sources:
|
79
|
+
patch_dataframe(df, data_sources, inplace=True)
|
80
|
+
return df
|
81
|
+
|
82
|
+
return wrapped
|
83
|
+
|
84
|
+
|
85
|
+
# Class-level monkey-patches
|
86
|
+
for klass, func_list in {
|
87
|
+
snowpark.DataFrame: [
|
88
|
+
"__copy__",
|
89
|
+
],
|
90
|
+
snowpark.RelationalGroupedDataFrame: [],
|
91
|
+
}.items():
|
92
|
+
assert isinstance(func_list, list) # mypy
|
93
|
+
for func in func_list:
|
94
|
+
fn = getattr(klass, func)
|
95
|
+
setattr(klass, func, _wrap_class_func(fn))
|
@@ -50,6 +50,7 @@ class TelemetryField(enum.Enum):
|
|
50
50
|
# types of telemetry
|
51
51
|
TYPE_FUNCTION_USAGE = "function_usage"
|
52
52
|
TYPE_SNOWML_SPCS_USAGE = "snowml_spcs_usage"
|
53
|
+
TYPE_SNOWML_PIPELINE_USAGE = "snowml_pipeline_usage"
|
53
54
|
# message keys for telemetry
|
54
55
|
KEY_PROJECT = "project"
|
55
56
|
KEY_SUBPROJECT = "subproject"
|
@@ -156,7 +156,7 @@ def parse_schema_level_object_identifier(
|
|
156
156
|
"""
|
157
157
|
res = _SF_SCHEMA_LEVEL_OBJECT_RE.fullmatch(path)
|
158
158
|
if not res:
|
159
|
-
raise ValueError(f"Invalid identifier. It should start with database.schema.
|
159
|
+
raise ValueError(f"Invalid identifier. It should start with database.schema.object. Getting {path}")
|
160
160
|
return (
|
161
161
|
res.group("db"),
|
162
162
|
res.group("schema"),
|
@@ -1,4 +1,4 @@
|
|
1
|
-
from typing import List
|
1
|
+
from typing import List, Optional, Tuple
|
2
2
|
|
3
3
|
from snowflake.ml._internal.utils import identifier
|
4
4
|
|
@@ -79,3 +79,16 @@ class SqlIdentifier(str):
|
|
79
79
|
|
80
80
|
def to_sql_identifiers(list_of_str: List[str], *, case_sensitive: bool = False) -> List[SqlIdentifier]:
|
81
81
|
return [SqlIdentifier(val, case_sensitive=case_sensitive) for val in list_of_str]
|
82
|
+
|
83
|
+
|
84
|
+
def parse_fully_qualified_name(
|
85
|
+
name: str,
|
86
|
+
) -> Tuple[Optional[SqlIdentifier], Optional[SqlIdentifier], SqlIdentifier]:
|
87
|
+
db, schema, object, _ = identifier.parse_schema_level_object_identifier(name)
|
88
|
+
|
89
|
+
assert name is not None, f"Unable parse the input name `{name}` as fully qualified."
|
90
|
+
return (
|
91
|
+
SqlIdentifier(db) if db else None,
|
92
|
+
SqlIdentifier(schema) if schema else None,
|
93
|
+
SqlIdentifier(object),
|
94
|
+
)
|
@@ -8,14 +8,17 @@ from absl.logging import logging
|
|
8
8
|
logger = logging.getLogger(__name__)
|
9
9
|
|
10
10
|
|
11
|
-
def get_temp_file_path() -> str:
|
11
|
+
def get_temp_file_path(prefix: str = "") -> str:
|
12
12
|
"""Returns a new random temp file path.
|
13
13
|
|
14
|
+
Args:
|
15
|
+
prefix: A prefix to the temp file path, this can help add stored file information. Defaults to None.
|
16
|
+
|
14
17
|
Returns:
|
15
18
|
A new temp file path.
|
16
19
|
"""
|
17
20
|
# TODO(snandamuri): Use in-memory filesystem for temp files.
|
18
|
-
local_file = tempfile.NamedTemporaryFile(delete=True)
|
21
|
+
local_file = tempfile.NamedTemporaryFile(prefix=prefix, delete=True)
|
19
22
|
local_file_name = local_file.name
|
20
23
|
local_file.close()
|
21
24
|
return local_file_name
|
snowflake/ml/dataset/__init__.py
CHANGED
@@ -1,9 +1,10 @@
|
|
1
|
-
from .dataset import Dataset
|
1
|
+
from .dataset import Dataset, DatasetVersion
|
2
2
|
from .dataset_factory import create_from_dataframe, load_dataset
|
3
3
|
from .dataset_reader import DatasetReader
|
4
4
|
|
5
5
|
__all__ = [
|
6
6
|
"Dataset",
|
7
|
+
"DatasetVersion",
|
7
8
|
"DatasetReader",
|
8
9
|
"create_from_dataframe",
|
9
10
|
"load_dataset",
|
snowflake/ml/dataset/dataset.py
CHANGED
@@ -73,10 +73,11 @@ class DatasetVersion:
|
|
73
73
|
f"SHOW VERSIONS LIKE '{self._version}' IN DATASET {self._parent.fully_qualified_name}",
|
74
74
|
statement_params=_TELEMETRY_STATEMENT_PARAMS,
|
75
75
|
)
|
76
|
-
.
|
76
|
+
.has_column(_DATASET_VERSION_NAME_COL, allow_empty=False)
|
77
77
|
.validate()
|
78
78
|
)
|
79
|
-
|
79
|
+
(match_row,) = (r for r in sql_result if r[_DATASET_VERSION_NAME_COL] == self._version)
|
80
|
+
self._properties = match_row.as_dict(True)
|
80
81
|
return self._properties.get(property_name, default)
|
81
82
|
|
82
83
|
def _get_metadata(self) -> Optional[dataset_metadata.DatasetMetadata]:
|
@@ -283,7 +284,7 @@ class Dataset:
|
|
283
284
|
exclude_cols: Name of column(s) in dataset to be excluded during training/testing (e.g. timestamp).
|
284
285
|
label_cols: Name of column(s) in dataset that contains labels.
|
285
286
|
properties: Custom metadata properties, saved under `DatasetMetadata.properties`
|
286
|
-
partition_by: Optional partitioning scheme within the new Dataset version.
|
287
|
+
partition_by: Optional SQL expression to use as the partitioning scheme within the new Dataset version.
|
287
288
|
comment: A descriptive comment about this dataset.
|
288
289
|
|
289
290
|
Returns:
|
@@ -1,10 +1,11 @@
|
|
1
1
|
from typing import Any, List
|
2
2
|
|
3
3
|
import pandas as pd
|
4
|
+
from pyarrow import parquet as pq
|
4
5
|
|
5
6
|
from snowflake import snowpark
|
6
7
|
from snowflake.ml._internal import telemetry
|
7
|
-
from snowflake.ml._internal.lineage import data_source,
|
8
|
+
from snowflake.ml._internal.lineage import data_source, lineage_utils
|
8
9
|
from snowflake.ml._internal.utils import import_utils
|
9
10
|
from snowflake.ml.fileset import snowfs
|
10
11
|
|
@@ -185,7 +186,7 @@ class DatasetReader:
|
|
185
186
|
combined_df = dfs[0]
|
186
187
|
for df in dfs[1:]:
|
187
188
|
combined_df = combined_df.union_all_by_name(df)
|
188
|
-
return
|
189
|
+
return lineage_utils.patch_dataframe(combined_df, data_sources=self._sources, inplace=True)
|
189
190
|
|
190
191
|
@telemetry.send_api_usage_telemetry(project=_PROJECT, subproject=_SUBPROJECT)
|
191
192
|
def to_pandas(self) -> pd.DataFrame:
|
@@ -194,9 +195,5 @@ class DatasetReader:
|
|
194
195
|
if not files:
|
195
196
|
return pd.DataFrame() # Return empty DataFrame
|
196
197
|
self._fs.optimize_read(files)
|
197
|
-
|
198
|
-
|
199
|
-
with self._fs.open(file) as fp:
|
200
|
-
pd_dfs.append(pd.read_parquet(fp))
|
201
|
-
pd_df = pd_dfs[0] if len(pd_dfs) == 1 else pd.concat(pd_dfs, ignore_index=True, copy=False)
|
202
|
-
return pd_df
|
198
|
+
pd_ds = pq.ParquetDataset(files, filesystem=self._fs)
|
199
|
+
return pd_ds.read_pandas().to_pandas()
|
@@ -2,8 +2,14 @@ import os
|
|
2
2
|
|
3
3
|
from snowflake.ml._internal import init_utils
|
4
4
|
|
5
|
+
from .access_manager import setup_feature_store
|
6
|
+
|
5
7
|
pkg_dir = os.path.dirname(os.path.abspath(__file__))
|
6
8
|
pkg_name = __name__
|
7
9
|
exportable_classes = init_utils.fetch_classes_from_modules_in_pkg_dir(pkg_dir=pkg_dir, pkg_name=pkg_name)
|
8
10
|
for k, v in exportable_classes.items():
|
9
11
|
globals()[k] = v
|
12
|
+
|
13
|
+
__all__ = list(exportable_classes.keys()) + [
|
14
|
+
"setup_feature_store",
|
15
|
+
]
|
@@ -0,0 +1,283 @@
|
|
1
|
+
from dataclasses import asdict, dataclass
|
2
|
+
from enum import Enum
|
3
|
+
from typing import Dict, List, Optional
|
4
|
+
from warnings import warn
|
5
|
+
|
6
|
+
from snowflake.ml._internal import telemetry
|
7
|
+
from snowflake.ml._internal.utils.query_result_checker import SqlResultValidator
|
8
|
+
from snowflake.ml._internal.utils.sql_identifier import SqlIdentifier
|
9
|
+
from snowflake.ml.feature_store.feature_store import (
|
10
|
+
_FEATURE_STORE_OBJECT_TAG,
|
11
|
+
_FEATURE_VIEW_METADATA_TAG,
|
12
|
+
CreationMode,
|
13
|
+
FeatureStore,
|
14
|
+
)
|
15
|
+
from snowflake.snowpark import Session, exceptions
|
16
|
+
|
17
|
+
_PROJECT = "FeatureStore"
|
18
|
+
_ALL_OBJECTS = "@ALL_OBJECTS" # Special flag to mark "all+future" grants
|
19
|
+
|
20
|
+
|
21
|
+
class _FeatureStoreRole(Enum):
|
22
|
+
NONE = 0 # For testing purposes
|
23
|
+
CONSUMER = 1
|
24
|
+
PRODUCER = 2
|
25
|
+
|
26
|
+
|
27
|
+
@dataclass(frozen=True)
|
28
|
+
class _Privilege:
|
29
|
+
object_type: str
|
30
|
+
object_name: str
|
31
|
+
privileges: List[str]
|
32
|
+
scope: Optional[str] = None
|
33
|
+
|
34
|
+
|
35
|
+
@dataclass(frozen=True)
|
36
|
+
class _SessionInfo:
|
37
|
+
database: SqlIdentifier
|
38
|
+
schema: SqlIdentifier
|
39
|
+
warehouse: SqlIdentifier
|
40
|
+
|
41
|
+
|
42
|
+
# Lists of permissions as tuples of (OBJECT_TYPE, [PRIVILEGES, ...])
|
43
|
+
_PRE_INIT_PRIVILEGES: Dict[_FeatureStoreRole, List[_Privilege]] = {
|
44
|
+
_FeatureStoreRole.PRODUCER: [
|
45
|
+
_Privilege("DATABASE", "{database}", ["USAGE"]),
|
46
|
+
_Privilege("SCHEMA", "{database}.{schema}", ["USAGE"]),
|
47
|
+
_Privilege(
|
48
|
+
"SCHEMA",
|
49
|
+
"{database}.{schema}",
|
50
|
+
[
|
51
|
+
"CREATE DYNAMIC TABLE",
|
52
|
+
"CREATE TAG",
|
53
|
+
"CREATE VIEW",
|
54
|
+
"CREATE TASK",
|
55
|
+
"CREATE TABLE",
|
56
|
+
],
|
57
|
+
),
|
58
|
+
_Privilege(
|
59
|
+
"SCHEMA",
|
60
|
+
"{database}.{schema}",
|
61
|
+
[
|
62
|
+
"CREATE DATASET", # Handle DATASET privilege separately since it may not be enabled
|
63
|
+
],
|
64
|
+
),
|
65
|
+
_Privilege("DYNAMIC TABLE", _ALL_OBJECTS, ["OPERATE"], "SCHEMA {database}.{schema}"),
|
66
|
+
_Privilege("TASK", _ALL_OBJECTS, ["OPERATE"], "SCHEMA {database}.{schema}"),
|
67
|
+
],
|
68
|
+
_FeatureStoreRole.CONSUMER: [
|
69
|
+
_Privilege("DATABASE", "{database}", ["USAGE"]),
|
70
|
+
_Privilege("SCHEMA", "{database}.{schema}", ["USAGE"]),
|
71
|
+
_Privilege("DYNAMIC TABLE", _ALL_OBJECTS, ["SELECT", "MONITOR"], "SCHEMA {database}.{schema}"),
|
72
|
+
_Privilege("VIEW", _ALL_OBJECTS, ["SELECT", "REFERENCES"], "SCHEMA {database}.{schema}"),
|
73
|
+
_Privilege("TABLE", _ALL_OBJECTS, ["SELECT", "REFERENCES"], "SCHEMA {database}.{schema}"),
|
74
|
+
_Privilege("DATASET", _ALL_OBJECTS, ["USAGE"], "SCHEMA {database}.{schema}"),
|
75
|
+
# User should decide whether they want to grant warehouse usage to CONSUMER
|
76
|
+
# _Privilege("WAREHOUSE", "{warehouse}", ["USAGE"]),
|
77
|
+
],
|
78
|
+
_FeatureStoreRole.NONE: [],
|
79
|
+
}
|
80
|
+
|
81
|
+
_POST_INIT_PRIVILEGES: Dict[_FeatureStoreRole, List[_Privilege]] = {
|
82
|
+
_FeatureStoreRole.PRODUCER: [
|
83
|
+
_Privilege("TAG", f"{{database}}.{{schema}}.{_FEATURE_VIEW_METADATA_TAG}", ["APPLY"]),
|
84
|
+
_Privilege("TAG", f"{{database}}.{{schema}}.{_FEATURE_STORE_OBJECT_TAG}", ["APPLY"]),
|
85
|
+
],
|
86
|
+
_FeatureStoreRole.CONSUMER: [],
|
87
|
+
_FeatureStoreRole.NONE: [],
|
88
|
+
}
|
89
|
+
|
90
|
+
|
91
|
+
def _grant_privileges(
|
92
|
+
session: Session, role_name: str, privileges: List[_Privilege], session_info: _SessionInfo
|
93
|
+
) -> None:
|
94
|
+
session_info_dict = asdict(session_info)
|
95
|
+
for p in privileges:
|
96
|
+
if p.object_name == _ALL_OBJECTS:
|
97
|
+
# Ensure obj is plural
|
98
|
+
obj = p.object_type.upper()
|
99
|
+
if not obj.endswith("S"):
|
100
|
+
obj += "S"
|
101
|
+
grant_objects = [f"{prefix} {obj}" for prefix in ("FUTURE", "ALL")]
|
102
|
+
else:
|
103
|
+
grant_objects = [f"{p.object_type} {p.object_name.format(**session_info_dict)}"]
|
104
|
+
try:
|
105
|
+
for grant_object in grant_objects:
|
106
|
+
query = f"GRANT {','.join(p.privileges)} ON {grant_object}"
|
107
|
+
if p.scope:
|
108
|
+
query += f" IN {p.scope.format(**session_info_dict)}"
|
109
|
+
query += f" TO ROLE {role_name}"
|
110
|
+
session.sql(query).collect()
|
111
|
+
except exceptions.SnowparkSQLException as e:
|
112
|
+
if any(
|
113
|
+
s in e.message
|
114
|
+
for s in (
|
115
|
+
"Ask your account admin",
|
116
|
+
"Object type or Class",
|
117
|
+
p.object_type,
|
118
|
+
)
|
119
|
+
):
|
120
|
+
warn(
|
121
|
+
f"Failed to grant privilege for {p.object_type}: {e.message}",
|
122
|
+
UserWarning,
|
123
|
+
stacklevel=1,
|
124
|
+
)
|
125
|
+
else:
|
126
|
+
raise
|
127
|
+
|
128
|
+
|
129
|
+
def _configure_pre_init_privileges(
|
130
|
+
session: Session,
|
131
|
+
session_info: _SessionInfo,
|
132
|
+
roles_to_create: Dict[_FeatureStoreRole, str],
|
133
|
+
) -> None:
|
134
|
+
"""
|
135
|
+
Configure Feature Store role privileges. Must be run with ACCOUNTADMIN
|
136
|
+
or a role with `MANAGE GRANTS` privilege.
|
137
|
+
|
138
|
+
See https://docs.snowflake.com/en/sql-reference/sql/grant-privilege for more information
|
139
|
+
about privilege grants in Snowflake.
|
140
|
+
|
141
|
+
Args:
|
142
|
+
session: Snowpark Session to interact with Snowflake backend.
|
143
|
+
session_info: Session info like database and schema for the FeatureStore instance.
|
144
|
+
roles_to_create: Producer and optional consumer roles to create.
|
145
|
+
"""
|
146
|
+
|
147
|
+
# Create schema if not already exists
|
148
|
+
(create_rst,) = (
|
149
|
+
SqlResultValidator(
|
150
|
+
session,
|
151
|
+
f"CREATE SCHEMA IF NOT EXISTS {session_info.database}.{session_info.schema}",
|
152
|
+
)
|
153
|
+
.has_dimensions(expected_rows=1)
|
154
|
+
.has_column("status")
|
155
|
+
.validate()
|
156
|
+
)
|
157
|
+
schema_created = create_rst["status"].endswith("successfully created.")
|
158
|
+
|
159
|
+
# Pass schema ownership from admin to PRODUCER
|
160
|
+
if schema_created:
|
161
|
+
# TODO: we are missing a test case for this code path
|
162
|
+
session.sql(
|
163
|
+
f"GRANT OWNERSHIP ON SCHEMA {session_info.database}.{session_info.schema} "
|
164
|
+
f"TO ROLE {roles_to_create[_FeatureStoreRole.PRODUCER]}"
|
165
|
+
).collect()
|
166
|
+
|
167
|
+
# Grant privileges to roles
|
168
|
+
for role_type, role in roles_to_create.items():
|
169
|
+
_grant_privileges(session, role, _PRE_INIT_PRIVILEGES[role_type], session_info)
|
170
|
+
|
171
|
+
|
172
|
+
def _configure_post_init_privileges(
|
173
|
+
session: Session,
|
174
|
+
session_info: _SessionInfo,
|
175
|
+
roles_to_create: Dict[_FeatureStoreRole, str],
|
176
|
+
) -> None:
|
177
|
+
for role_type, role in roles_to_create.items():
|
178
|
+
_grant_privileges(session, role, _POST_INIT_PRIVILEGES[role_type], session_info)
|
179
|
+
|
180
|
+
|
181
|
+
def _configure_role_hierarchy(
|
182
|
+
session: Session,
|
183
|
+
producer_role: str,
|
184
|
+
consumer_role: Optional[str],
|
185
|
+
) -> None:
|
186
|
+
"""
|
187
|
+
Create Feature Store roles and configure role hierarchy hierarchy. Must be run with
|
188
|
+
ACCOUNTADMIN or a role with `CREATE ROLE` privilege.
|
189
|
+
|
190
|
+
See https://docs.snowflake.com/en/sql-reference/sql/grant-privilege for more information
|
191
|
+
about privilege grants in Snowflake.
|
192
|
+
|
193
|
+
Args:
|
194
|
+
session: Snowpark Session to interact with Snowflake backend.
|
195
|
+
producer_role: Name of producer role to be configured.
|
196
|
+
consumer_role: Name of consumer role to be configured.
|
197
|
+
"""
|
198
|
+
# Create the necessary roles and build role hierarchy
|
199
|
+
producer_role = SqlIdentifier(producer_role)
|
200
|
+
session.sql(f"CREATE ROLE IF NOT EXISTS {producer_role}").collect()
|
201
|
+
session.sql(f"GRANT ROLE {producer_role} TO ROLE SYSADMIN").collect()
|
202
|
+
session.sql(f"GRANT ROLE {producer_role} TO ROLE {session.get_current_role()}").collect()
|
203
|
+
|
204
|
+
if consumer_role is not None:
|
205
|
+
consumer_role = SqlIdentifier(consumer_role)
|
206
|
+
session.sql(f"CREATE ROLE IF NOT EXISTS {consumer_role}").collect()
|
207
|
+
session.sql(f"GRANT ROLE {consumer_role} TO ROLE {producer_role}").collect()
|
208
|
+
|
209
|
+
|
210
|
+
@telemetry.send_api_usage_telemetry(project=_PROJECT)
|
211
|
+
def setup_feature_store(
|
212
|
+
session: Session,
|
213
|
+
database: str,
|
214
|
+
schema: str,
|
215
|
+
warehouse: str,
|
216
|
+
producer_role: str = "FS_PRODUCER",
|
217
|
+
consumer_role: Optional[str] = None,
|
218
|
+
) -> FeatureStore:
|
219
|
+
"""
|
220
|
+
Sets up a new Feature Store including role/privilege setup. Must be run with ACCOUNTADMIN
|
221
|
+
or a role with `MANAGE GRANTS` and `CREATE ROLE` privileges.
|
222
|
+
|
223
|
+
See https://docs.snowflake.com/en/sql-reference/sql/grant-privilege for more information
|
224
|
+
about privilege grants in Snowflake.
|
225
|
+
|
226
|
+
Args:
|
227
|
+
session: Snowpark Session to interact with Snowflake backend.
|
228
|
+
database: Database to create the FeatureStore instance.
|
229
|
+
schema: Schema to create the FeatureStore instance.
|
230
|
+
warehouse: Default warehouse for Feature Store compute.
|
231
|
+
producer_role: Name of producer role to be configured.
|
232
|
+
consumer_role: Name of consumer role to be configured. If not specified, consumer role won't be created.
|
233
|
+
|
234
|
+
Returns:
|
235
|
+
Feature Store instance.
|
236
|
+
|
237
|
+
Raises:
|
238
|
+
exceptions.SnowparkSQLException: Insufficient privileges.
|
239
|
+
"""
|
240
|
+
|
241
|
+
database = SqlIdentifier(database)
|
242
|
+
schema = SqlIdentifier(schema)
|
243
|
+
warehouse = SqlIdentifier(warehouse)
|
244
|
+
session_info = _SessionInfo(
|
245
|
+
SqlIdentifier(database),
|
246
|
+
SqlIdentifier(schema),
|
247
|
+
SqlIdentifier(warehouse),
|
248
|
+
)
|
249
|
+
|
250
|
+
try:
|
251
|
+
roles_to_create = {_FeatureStoreRole.PRODUCER: producer_role}
|
252
|
+
if consumer_role is not None:
|
253
|
+
roles_to_create.update({_FeatureStoreRole.CONSUMER: consumer_role})
|
254
|
+
_configure_role_hierarchy(session, producer_role=producer_role, consumer_role=consumer_role)
|
255
|
+
except exceptions.SnowparkSQLException:
|
256
|
+
# Error can be safely ignored if roles already exist and hierarchy is already built
|
257
|
+
for _, role in roles_to_create.items():
|
258
|
+
# Ensure roles already exist
|
259
|
+
if session.sql(f"SHOW ROLES LIKE '{role}' STARTS WITH '{role}'").count() == 0:
|
260
|
+
raise
|
261
|
+
|
262
|
+
if consumer_role is not None:
|
263
|
+
# Ensure hierarchy already configured
|
264
|
+
consumer_grants = session.sql(f"SHOW GRANTS ON ROLE {consumer_role}").collect()
|
265
|
+
if not any(r["granted_to"] == "ROLE" and r["grantee_name"] == producer_role for r in consumer_grants):
|
266
|
+
raise
|
267
|
+
|
268
|
+
# Do any pre-FeatureStore.__init__() privilege setup
|
269
|
+
_configure_pre_init_privileges(session, session_info, roles_to_create)
|
270
|
+
|
271
|
+
# Use PRODUCER role to create and operate new Feature Store
|
272
|
+
current_role = session.get_current_role()
|
273
|
+
assert current_role is not None # to make mypy happy
|
274
|
+
try:
|
275
|
+
session.use_role(producer_role)
|
276
|
+
fs = FeatureStore(session, database, schema, warehouse, creation_mode=CreationMode.CREATE_IF_NOT_EXIST)
|
277
|
+
finally:
|
278
|
+
session.use_role(current_role)
|
279
|
+
|
280
|
+
# Do any post-FeatureStore.__init__() privilege setup
|
281
|
+
_configure_post_init_privileges(session, session_info, roles_to_create)
|
282
|
+
|
283
|
+
return fs
|