snowflake-ml-python 1.5.0__py3-none-any.whl → 1.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (197) hide show
  1. snowflake/cortex/_sentiment.py +7 -4
  2. snowflake/ml/_internal/env_utils.py +6 -0
  3. snowflake/ml/_internal/lineage/lineage_utils.py +95 -0
  4. snowflake/ml/_internal/telemetry.py +1 -0
  5. snowflake/ml/_internal/utils/identifier.py +1 -1
  6. snowflake/ml/_internal/utils/sql_identifier.py +14 -1
  7. snowflake/ml/_internal/utils/temp_file_utils.py +5 -2
  8. snowflake/ml/dataset/__init__.py +2 -1
  9. snowflake/ml/dataset/dataset.py +4 -3
  10. snowflake/ml/dataset/dataset_reader.py +5 -8
  11. snowflake/ml/feature_store/__init__.py +6 -0
  12. snowflake/ml/feature_store/access_manager.py +283 -0
  13. snowflake/ml/feature_store/feature_store.py +160 -100
  14. snowflake/ml/feature_store/feature_view.py +30 -19
  15. snowflake/ml/fileset/embedded_stage_fs.py +15 -12
  16. snowflake/ml/fileset/snowfs.py +2 -30
  17. snowflake/ml/fileset/stage_fs.py +25 -7
  18. snowflake/ml/model/_client/model/model_impl.py +46 -39
  19. snowflake/ml/model/_client/model/model_version_impl.py +24 -2
  20. snowflake/ml/model/_client/ops/metadata_ops.py +27 -4
  21. snowflake/ml/model/_client/ops/model_ops.py +174 -16
  22. snowflake/ml/model/_client/sql/_base.py +34 -0
  23. snowflake/ml/model/_client/sql/model.py +32 -39
  24. snowflake/ml/model/_client/sql/model_version.py +111 -42
  25. snowflake/ml/model/_client/sql/stage.py +6 -32
  26. snowflake/ml/model/_client/sql/tag.py +32 -56
  27. snowflake/ml/model/_model_composer/model_composer.py +8 -4
  28. snowflake/ml/model/_packager/model_handlers/mlflow.py +2 -1
  29. snowflake/ml/model/_packager/model_meta/model_meta.py +1 -3
  30. snowflake/ml/model/_packager/model_runtime/model_runtime.py +3 -27
  31. snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +90 -142
  32. snowflake/ml/modeling/_internal/snowpark_implementations/distributed_search_udf_file.py +159 -0
  33. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +81 -3
  34. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +8 -1
  35. snowflake/ml/modeling/cluster/affinity_propagation.py +8 -1
  36. snowflake/ml/modeling/cluster/agglomerative_clustering.py +8 -1
  37. snowflake/ml/modeling/cluster/birch.py +8 -1
  38. snowflake/ml/modeling/cluster/bisecting_k_means.py +8 -1
  39. snowflake/ml/modeling/cluster/dbscan.py +8 -1
  40. snowflake/ml/modeling/cluster/feature_agglomeration.py +8 -1
  41. snowflake/ml/modeling/cluster/k_means.py +8 -1
  42. snowflake/ml/modeling/cluster/mean_shift.py +8 -1
  43. snowflake/ml/modeling/cluster/mini_batch_k_means.py +8 -1
  44. snowflake/ml/modeling/cluster/optics.py +8 -1
  45. snowflake/ml/modeling/cluster/spectral_biclustering.py +8 -1
  46. snowflake/ml/modeling/cluster/spectral_clustering.py +8 -1
  47. snowflake/ml/modeling/cluster/spectral_coclustering.py +8 -1
  48. snowflake/ml/modeling/compose/column_transformer.py +8 -1
  49. snowflake/ml/modeling/compose/transformed_target_regressor.py +8 -1
  50. snowflake/ml/modeling/covariance/elliptic_envelope.py +8 -1
  51. snowflake/ml/modeling/covariance/empirical_covariance.py +8 -1
  52. snowflake/ml/modeling/covariance/graphical_lasso.py +8 -1
  53. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +8 -1
  54. snowflake/ml/modeling/covariance/ledoit_wolf.py +8 -1
  55. snowflake/ml/modeling/covariance/min_cov_det.py +8 -1
  56. snowflake/ml/modeling/covariance/oas.py +8 -1
  57. snowflake/ml/modeling/covariance/shrunk_covariance.py +8 -1
  58. snowflake/ml/modeling/decomposition/dictionary_learning.py +8 -1
  59. snowflake/ml/modeling/decomposition/factor_analysis.py +8 -1
  60. snowflake/ml/modeling/decomposition/fast_ica.py +8 -1
  61. snowflake/ml/modeling/decomposition/incremental_pca.py +8 -1
  62. snowflake/ml/modeling/decomposition/kernel_pca.py +8 -1
  63. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +8 -1
  64. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +8 -1
  65. snowflake/ml/modeling/decomposition/pca.py +8 -1
  66. snowflake/ml/modeling/decomposition/sparse_pca.py +8 -1
  67. snowflake/ml/modeling/decomposition/truncated_svd.py +8 -1
  68. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +8 -1
  69. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +8 -1
  70. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +8 -1
  71. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +8 -1
  72. snowflake/ml/modeling/ensemble/bagging_classifier.py +8 -1
  73. snowflake/ml/modeling/ensemble/bagging_regressor.py +8 -1
  74. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +8 -1
  75. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +8 -1
  76. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +8 -1
  77. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +8 -1
  78. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +8 -1
  79. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +8 -1
  80. snowflake/ml/modeling/ensemble/isolation_forest.py +8 -1
  81. snowflake/ml/modeling/ensemble/random_forest_classifier.py +8 -1
  82. snowflake/ml/modeling/ensemble/random_forest_regressor.py +8 -1
  83. snowflake/ml/modeling/ensemble/stacking_regressor.py +8 -1
  84. snowflake/ml/modeling/ensemble/voting_classifier.py +8 -1
  85. snowflake/ml/modeling/ensemble/voting_regressor.py +8 -1
  86. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +8 -1
  87. snowflake/ml/modeling/feature_selection/select_fdr.py +8 -1
  88. snowflake/ml/modeling/feature_selection/select_fpr.py +8 -1
  89. snowflake/ml/modeling/feature_selection/select_fwe.py +8 -1
  90. snowflake/ml/modeling/feature_selection/select_k_best.py +8 -1
  91. snowflake/ml/modeling/feature_selection/select_percentile.py +8 -1
  92. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +8 -1
  93. snowflake/ml/modeling/feature_selection/variance_threshold.py +8 -1
  94. snowflake/ml/modeling/framework/base.py +4 -3
  95. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +8 -1
  96. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +8 -1
  97. snowflake/ml/modeling/impute/iterative_imputer.py +8 -1
  98. snowflake/ml/modeling/impute/knn_imputer.py +8 -1
  99. snowflake/ml/modeling/impute/missing_indicator.py +8 -1
  100. snowflake/ml/modeling/impute/simple_imputer.py +21 -2
  101. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +8 -1
  102. snowflake/ml/modeling/kernel_approximation/nystroem.py +8 -1
  103. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +8 -1
  104. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +8 -1
  105. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +8 -1
  106. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +8 -1
  107. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +8 -1
  108. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +8 -1
  109. snowflake/ml/modeling/linear_model/ard_regression.py +8 -1
  110. snowflake/ml/modeling/linear_model/bayesian_ridge.py +8 -1
  111. snowflake/ml/modeling/linear_model/elastic_net.py +8 -1
  112. snowflake/ml/modeling/linear_model/elastic_net_cv.py +8 -1
  113. snowflake/ml/modeling/linear_model/gamma_regressor.py +8 -1
  114. snowflake/ml/modeling/linear_model/huber_regressor.py +8 -1
  115. snowflake/ml/modeling/linear_model/lars.py +8 -1
  116. snowflake/ml/modeling/linear_model/lars_cv.py +8 -1
  117. snowflake/ml/modeling/linear_model/lasso.py +8 -1
  118. snowflake/ml/modeling/linear_model/lasso_cv.py +8 -1
  119. snowflake/ml/modeling/linear_model/lasso_lars.py +8 -1
  120. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +8 -1
  121. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +8 -1
  122. snowflake/ml/modeling/linear_model/linear_regression.py +8 -1
  123. snowflake/ml/modeling/linear_model/logistic_regression.py +8 -1
  124. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +8 -1
  125. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +8 -1
  126. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +8 -1
  127. snowflake/ml/modeling/linear_model/multi_task_lasso.py +8 -1
  128. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +8 -1
  129. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +8 -1
  130. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +8 -1
  131. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +8 -1
  132. snowflake/ml/modeling/linear_model/perceptron.py +8 -1
  133. snowflake/ml/modeling/linear_model/poisson_regressor.py +8 -1
  134. snowflake/ml/modeling/linear_model/ransac_regressor.py +8 -1
  135. snowflake/ml/modeling/linear_model/ridge.py +8 -1
  136. snowflake/ml/modeling/linear_model/ridge_classifier.py +8 -1
  137. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +8 -1
  138. snowflake/ml/modeling/linear_model/ridge_cv.py +8 -1
  139. snowflake/ml/modeling/linear_model/sgd_classifier.py +8 -1
  140. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +8 -1
  141. snowflake/ml/modeling/linear_model/sgd_regressor.py +8 -1
  142. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +8 -1
  143. snowflake/ml/modeling/linear_model/tweedie_regressor.py +8 -1
  144. snowflake/ml/modeling/manifold/isomap.py +8 -1
  145. snowflake/ml/modeling/manifold/mds.py +8 -1
  146. snowflake/ml/modeling/manifold/spectral_embedding.py +8 -1
  147. snowflake/ml/modeling/manifold/tsne.py +8 -1
  148. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +8 -1
  149. snowflake/ml/modeling/mixture/gaussian_mixture.py +8 -1
  150. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +8 -1
  151. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +8 -1
  152. snowflake/ml/modeling/multiclass/output_code_classifier.py +8 -1
  153. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +8 -1
  154. snowflake/ml/modeling/naive_bayes/categorical_nb.py +8 -1
  155. snowflake/ml/modeling/naive_bayes/complement_nb.py +8 -1
  156. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +8 -1
  157. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +8 -1
  158. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +8 -1
  159. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +8 -1
  160. snowflake/ml/modeling/neighbors/kernel_density.py +8 -1
  161. snowflake/ml/modeling/neighbors/local_outlier_factor.py +8 -1
  162. snowflake/ml/modeling/neighbors/nearest_centroid.py +8 -1
  163. snowflake/ml/modeling/neighbors/nearest_neighbors.py +8 -1
  164. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +8 -1
  165. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +8 -1
  166. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +8 -1
  167. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +8 -1
  168. snowflake/ml/modeling/neural_network/mlp_classifier.py +8 -1
  169. snowflake/ml/modeling/neural_network/mlp_regressor.py +8 -1
  170. snowflake/ml/modeling/parameters/enable_anonymous_sproc.py +5 -0
  171. snowflake/ml/modeling/pipeline/pipeline.py +27 -7
  172. snowflake/ml/modeling/preprocessing/polynomial_features.py +8 -1
  173. snowflake/ml/modeling/semi_supervised/label_propagation.py +8 -1
  174. snowflake/ml/modeling/semi_supervised/label_spreading.py +8 -1
  175. snowflake/ml/modeling/svm/linear_svc.py +8 -1
  176. snowflake/ml/modeling/svm/linear_svr.py +8 -1
  177. snowflake/ml/modeling/svm/nu_svc.py +8 -1
  178. snowflake/ml/modeling/svm/nu_svr.py +8 -1
  179. snowflake/ml/modeling/svm/svc.py +8 -1
  180. snowflake/ml/modeling/svm/svr.py +8 -1
  181. snowflake/ml/modeling/tree/decision_tree_classifier.py +8 -1
  182. snowflake/ml/modeling/tree/decision_tree_regressor.py +8 -1
  183. snowflake/ml/modeling/tree/extra_tree_classifier.py +8 -1
  184. snowflake/ml/modeling/tree/extra_tree_regressor.py +8 -1
  185. snowflake/ml/modeling/xgboost/xgb_classifier.py +8 -1
  186. snowflake/ml/modeling/xgboost/xgb_regressor.py +8 -1
  187. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +8 -1
  188. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +8 -1
  189. snowflake/ml/registry/_manager/model_manager.py +95 -8
  190. snowflake/ml/registry/registry.py +10 -1
  191. snowflake/ml/version.py +1 -1
  192. {snowflake_ml_python-1.5.0.dist-info → snowflake_ml_python-1.5.2.dist-info}/METADATA +66 -10
  193. {snowflake_ml_python-1.5.0.dist-info → snowflake_ml_python-1.5.2.dist-info}/RECORD +196 -192
  194. snowflake/ml/_internal/lineage/dataset_dataframe.py +0 -44
  195. {snowflake_ml_python-1.5.0.dist-info → snowflake_ml_python-1.5.2.dist-info}/LICENSE.txt +0 -0
  196. {snowflake_ml_python-1.5.0.dist-info → snowflake_ml_python-1.5.2.dist-info}/WHEEL +0 -0
  197. {snowflake_ml_python-1.5.0.dist-info → snowflake_ml_python-1.5.2.dist-info}/top_level.txt +0 -0
@@ -11,7 +11,7 @@ from snowflake.ml._internal import telemetry
11
11
  )
12
12
  def Sentiment(
13
13
  text: Union[str, snowpark.Column], session: Optional[snowpark.Session] = None
14
- ) -> Union[str, snowpark.Column]:
14
+ ) -> Union[float, snowpark.Column]:
15
15
  """Sentiment calls into the LLM inference service to perform sentiment analysis on the input text.
16
16
 
17
17
  Args:
@@ -21,11 +21,14 @@ def Sentiment(
21
21
  Returns:
22
22
  A column of floats. 1 represents positive sentiment, -1 represents negative sentiment.
23
23
  """
24
-
25
24
  return _sentiment_impl("snowflake.cortex.sentiment", text, session=session)
26
25
 
27
26
 
28
27
  def _sentiment_impl(
29
28
  function: str, text: Union[str, snowpark.Column], session: Optional[snowpark.Session] = None
30
- ) -> Union[str, snowpark.Column]:
31
- return call_sql_function(function, session, text)
29
+ ) -> Union[float, snowpark.Column]:
30
+
31
+ output = call_sql_function(function, session, text)
32
+ if isinstance(output, snowpark.Column):
33
+ return output
34
+ return float(output)
@@ -553,6 +553,9 @@ def load_conda_env_file(
553
553
  A tuple of Dict of conda dependencies after validated, optional pip requirements if exist
554
554
  and a string 'major.minor.patchlevel' of python version.
555
555
  """
556
+ if not path.exists():
557
+ return collections.defaultdict(list), None, None
558
+
556
559
  with open(path, encoding="utf-8") as f:
557
560
  env = yaml.safe_load(stream=f)
558
561
 
@@ -603,6 +606,9 @@ def load_requirements_file(path: pathlib.Path) -> List[requirements.Requirement]
603
606
  Returns:
604
607
  List of dependencies string after validated.
605
608
  """
609
+ if not path.exists():
610
+ return []
611
+
606
612
  with open(path, encoding="utf-8") as f:
607
613
  reqs = f.readlines()
608
614
 
@@ -0,0 +1,95 @@
1
+ import copy
2
+ import functools
3
+ from typing import Any, Callable, List
4
+
5
+ from snowflake import snowpark
6
+ from snowflake.ml._internal.lineage import data_source
7
+
8
+ DATA_SOURCES_ATTR = "_data_sources"
9
+
10
+
11
+ def _get_datasources(*args: Any) -> List[data_source.DataSource]:
12
+ """Helper method for extracting data sources attribute from DataFrames in an argument list"""
13
+ result = []
14
+ for arg in args:
15
+ srcs = getattr(arg, DATA_SOURCES_ATTR, None)
16
+ if isinstance(srcs, list) and all(isinstance(s, data_source.DataSource) for s in srcs):
17
+ result += srcs
18
+ return result
19
+
20
+
21
+ def _wrap_func(
22
+ fn: Callable[..., snowpark.DataFrame], data_sources: List[data_source.DataSource]
23
+ ) -> Callable[..., snowpark.DataFrame]:
24
+ """Wrap a DataFrame transform function to propagate data_sources to derived DataFrames."""
25
+
26
+ @functools.wraps(fn)
27
+ def wrapped(*args: Any, **kwargs: Any) -> snowpark.DataFrame:
28
+ df = fn(*args, **kwargs)
29
+ patch_dataframe(df, data_sources=data_sources, inplace=True)
30
+ return df
31
+
32
+ return wrapped
33
+
34
+
35
+ def patch_dataframe(
36
+ df: snowpark.DataFrame, data_sources: List[data_source.DataSource], inplace: bool = False
37
+ ) -> snowpark.DataFrame:
38
+ """
39
+ Monkey patch a DataFrame to add attach the provided data_sources as an attribute of the DataFrame.
40
+ Also patches the DataFrame's transformation functions to propagate the new data sources attribute to
41
+ derived DataFrames.
42
+
43
+ Args:
44
+ df: DataFrame to be patched
45
+ data_sources: List of data sources for the DataFrame
46
+ inplace: If True, patches to DataFrame in-place. If False, creates a shallow copy of the DataFrame.
47
+
48
+ Returns:
49
+ Patched DataFrame
50
+ """
51
+ # Instance-level monkey-patches
52
+ funcs = [
53
+ "_with_plan",
54
+ "_lateral",
55
+ "group_by",
56
+ "group_by_grouping_sets",
57
+ "cube",
58
+ "pivot",
59
+ "rollup",
60
+ "cache_result",
61
+ "_to_df", # RelationalGroupedDataFrame
62
+ ]
63
+ if not inplace:
64
+ df = copy.copy(df)
65
+ setattr(df, DATA_SOURCES_ATTR, data_sources)
66
+ for func in funcs:
67
+ fn = getattr(df, func, None)
68
+ if fn is not None:
69
+ setattr(df, func, _wrap_func(fn, data_sources=data_sources))
70
+ return df
71
+
72
+
73
+ def _wrap_class_func(fn: Callable[..., snowpark.DataFrame]) -> Callable[..., snowpark.DataFrame]:
74
+ @functools.wraps(fn)
75
+ def wrapped(*args: Any, **kwargs: Any) -> snowpark.DataFrame:
76
+ df = fn(*args, **kwargs)
77
+ data_sources = _get_datasources(*args) + _get_datasources(*kwargs.values())
78
+ if data_sources:
79
+ patch_dataframe(df, data_sources, inplace=True)
80
+ return df
81
+
82
+ return wrapped
83
+
84
+
85
+ # Class-level monkey-patches
86
+ for klass, func_list in {
87
+ snowpark.DataFrame: [
88
+ "__copy__",
89
+ ],
90
+ snowpark.RelationalGroupedDataFrame: [],
91
+ }.items():
92
+ assert isinstance(func_list, list) # mypy
93
+ for func in func_list:
94
+ fn = getattr(klass, func)
95
+ setattr(klass, func, _wrap_class_func(fn))
@@ -50,6 +50,7 @@ class TelemetryField(enum.Enum):
50
50
  # types of telemetry
51
51
  TYPE_FUNCTION_USAGE = "function_usage"
52
52
  TYPE_SNOWML_SPCS_USAGE = "snowml_spcs_usage"
53
+ TYPE_SNOWML_PIPELINE_USAGE = "snowml_pipeline_usage"
53
54
  # message keys for telemetry
54
55
  KEY_PROJECT = "project"
55
56
  KEY_SUBPROJECT = "subproject"
@@ -156,7 +156,7 @@ def parse_schema_level_object_identifier(
156
156
  """
157
157
  res = _SF_SCHEMA_LEVEL_OBJECT_RE.fullmatch(path)
158
158
  if not res:
159
- raise ValueError(f"Invalid identifier. It should start with database.schema.stage. Getting {path}")
159
+ raise ValueError(f"Invalid identifier. It should start with database.schema.object. Getting {path}")
160
160
  return (
161
161
  res.group("db"),
162
162
  res.group("schema"),
@@ -1,4 +1,4 @@
1
- from typing import List
1
+ from typing import List, Optional, Tuple
2
2
 
3
3
  from snowflake.ml._internal.utils import identifier
4
4
 
@@ -79,3 +79,16 @@ class SqlIdentifier(str):
79
79
 
80
80
  def to_sql_identifiers(list_of_str: List[str], *, case_sensitive: bool = False) -> List[SqlIdentifier]:
81
81
  return [SqlIdentifier(val, case_sensitive=case_sensitive) for val in list_of_str]
82
+
83
+
84
+ def parse_fully_qualified_name(
85
+ name: str,
86
+ ) -> Tuple[Optional[SqlIdentifier], Optional[SqlIdentifier], SqlIdentifier]:
87
+ db, schema, object, _ = identifier.parse_schema_level_object_identifier(name)
88
+
89
+ assert name is not None, f"Unable parse the input name `{name}` as fully qualified."
90
+ return (
91
+ SqlIdentifier(db) if db else None,
92
+ SqlIdentifier(schema) if schema else None,
93
+ SqlIdentifier(object),
94
+ )
@@ -8,14 +8,17 @@ from absl.logging import logging
8
8
  logger = logging.getLogger(__name__)
9
9
 
10
10
 
11
- def get_temp_file_path() -> str:
11
+ def get_temp_file_path(prefix: str = "") -> str:
12
12
  """Returns a new random temp file path.
13
13
 
14
+ Args:
15
+ prefix: A prefix to the temp file path, this can help add stored file information. Defaults to None.
16
+
14
17
  Returns:
15
18
  A new temp file path.
16
19
  """
17
20
  # TODO(snandamuri): Use in-memory filesystem for temp files.
18
- local_file = tempfile.NamedTemporaryFile(delete=True)
21
+ local_file = tempfile.NamedTemporaryFile(prefix=prefix, delete=True)
19
22
  local_file_name = local_file.name
20
23
  local_file.close()
21
24
  return local_file_name
@@ -1,9 +1,10 @@
1
- from .dataset import Dataset
1
+ from .dataset import Dataset, DatasetVersion
2
2
  from .dataset_factory import create_from_dataframe, load_dataset
3
3
  from .dataset_reader import DatasetReader
4
4
 
5
5
  __all__ = [
6
6
  "Dataset",
7
+ "DatasetVersion",
7
8
  "DatasetReader",
8
9
  "create_from_dataframe",
9
10
  "load_dataset",
@@ -73,10 +73,11 @@ class DatasetVersion:
73
73
  f"SHOW VERSIONS LIKE '{self._version}' IN DATASET {self._parent.fully_qualified_name}",
74
74
  statement_params=_TELEMETRY_STATEMENT_PARAMS,
75
75
  )
76
- .has_dimensions(expected_rows=1)
76
+ .has_column(_DATASET_VERSION_NAME_COL, allow_empty=False)
77
77
  .validate()
78
78
  )
79
- self._properties = sql_result[0].as_dict(True)
79
+ (match_row,) = (r for r in sql_result if r[_DATASET_VERSION_NAME_COL] == self._version)
80
+ self._properties = match_row.as_dict(True)
80
81
  return self._properties.get(property_name, default)
81
82
 
82
83
  def _get_metadata(self) -> Optional[dataset_metadata.DatasetMetadata]:
@@ -283,7 +284,7 @@ class Dataset:
283
284
  exclude_cols: Name of column(s) in dataset to be excluded during training/testing (e.g. timestamp).
284
285
  label_cols: Name of column(s) in dataset that contains labels.
285
286
  properties: Custom metadata properties, saved under `DatasetMetadata.properties`
286
- partition_by: Optional partitioning scheme within the new Dataset version.
287
+ partition_by: Optional SQL expression to use as the partitioning scheme within the new Dataset version.
287
288
  comment: A descriptive comment about this dataset.
288
289
 
289
290
  Returns:
@@ -1,10 +1,11 @@
1
1
  from typing import Any, List
2
2
 
3
3
  import pandas as pd
4
+ from pyarrow import parquet as pq
4
5
 
5
6
  from snowflake import snowpark
6
7
  from snowflake.ml._internal import telemetry
7
- from snowflake.ml._internal.lineage import data_source, dataset_dataframe
8
+ from snowflake.ml._internal.lineage import data_source, lineage_utils
8
9
  from snowflake.ml._internal.utils import import_utils
9
10
  from snowflake.ml.fileset import snowfs
10
11
 
@@ -185,7 +186,7 @@ class DatasetReader:
185
186
  combined_df = dfs[0]
186
187
  for df in dfs[1:]:
187
188
  combined_df = combined_df.union_all_by_name(df)
188
- return dataset_dataframe.DatasetDataFrame.from_dataframe(combined_df, data_sources=self._sources, inplace=True)
189
+ return lineage_utils.patch_dataframe(combined_df, data_sources=self._sources, inplace=True)
189
190
 
190
191
  @telemetry.send_api_usage_telemetry(project=_PROJECT, subproject=_SUBPROJECT)
191
192
  def to_pandas(self) -> pd.DataFrame:
@@ -194,9 +195,5 @@ class DatasetReader:
194
195
  if not files:
195
196
  return pd.DataFrame() # Return empty DataFrame
196
197
  self._fs.optimize_read(files)
197
- pd_dfs = []
198
- for file in files:
199
- with self._fs.open(file) as fp:
200
- pd_dfs.append(pd.read_parquet(fp))
201
- pd_df = pd_dfs[0] if len(pd_dfs) == 1 else pd.concat(pd_dfs, ignore_index=True, copy=False)
202
- return pd_df
198
+ pd_ds = pq.ParquetDataset(files, filesystem=self._fs)
199
+ return pd_ds.read_pandas().to_pandas()
@@ -2,8 +2,14 @@ import os
2
2
 
3
3
  from snowflake.ml._internal import init_utils
4
4
 
5
+ from .access_manager import setup_feature_store
6
+
5
7
  pkg_dir = os.path.dirname(os.path.abspath(__file__))
6
8
  pkg_name = __name__
7
9
  exportable_classes = init_utils.fetch_classes_from_modules_in_pkg_dir(pkg_dir=pkg_dir, pkg_name=pkg_name)
8
10
  for k, v in exportable_classes.items():
9
11
  globals()[k] = v
12
+
13
+ __all__ = list(exportable_classes.keys()) + [
14
+ "setup_feature_store",
15
+ ]
@@ -0,0 +1,283 @@
1
+ from dataclasses import asdict, dataclass
2
+ from enum import Enum
3
+ from typing import Dict, List, Optional
4
+ from warnings import warn
5
+
6
+ from snowflake.ml._internal import telemetry
7
+ from snowflake.ml._internal.utils.query_result_checker import SqlResultValidator
8
+ from snowflake.ml._internal.utils.sql_identifier import SqlIdentifier
9
+ from snowflake.ml.feature_store.feature_store import (
10
+ _FEATURE_STORE_OBJECT_TAG,
11
+ _FEATURE_VIEW_METADATA_TAG,
12
+ CreationMode,
13
+ FeatureStore,
14
+ )
15
+ from snowflake.snowpark import Session, exceptions
16
+
17
+ _PROJECT = "FeatureStore"
18
+ _ALL_OBJECTS = "@ALL_OBJECTS" # Special flag to mark "all+future" grants
19
+
20
+
21
+ class _FeatureStoreRole(Enum):
22
+ NONE = 0 # For testing purposes
23
+ CONSUMER = 1
24
+ PRODUCER = 2
25
+
26
+
27
+ @dataclass(frozen=True)
28
+ class _Privilege:
29
+ object_type: str
30
+ object_name: str
31
+ privileges: List[str]
32
+ scope: Optional[str] = None
33
+
34
+
35
+ @dataclass(frozen=True)
36
+ class _SessionInfo:
37
+ database: SqlIdentifier
38
+ schema: SqlIdentifier
39
+ warehouse: SqlIdentifier
40
+
41
+
42
+ # Lists of permissions as tuples of (OBJECT_TYPE, [PRIVILEGES, ...])
43
+ _PRE_INIT_PRIVILEGES: Dict[_FeatureStoreRole, List[_Privilege]] = {
44
+ _FeatureStoreRole.PRODUCER: [
45
+ _Privilege("DATABASE", "{database}", ["USAGE"]),
46
+ _Privilege("SCHEMA", "{database}.{schema}", ["USAGE"]),
47
+ _Privilege(
48
+ "SCHEMA",
49
+ "{database}.{schema}",
50
+ [
51
+ "CREATE DYNAMIC TABLE",
52
+ "CREATE TAG",
53
+ "CREATE VIEW",
54
+ "CREATE TASK",
55
+ "CREATE TABLE",
56
+ ],
57
+ ),
58
+ _Privilege(
59
+ "SCHEMA",
60
+ "{database}.{schema}",
61
+ [
62
+ "CREATE DATASET", # Handle DATASET privilege separately since it may not be enabled
63
+ ],
64
+ ),
65
+ _Privilege("DYNAMIC TABLE", _ALL_OBJECTS, ["OPERATE"], "SCHEMA {database}.{schema}"),
66
+ _Privilege("TASK", _ALL_OBJECTS, ["OPERATE"], "SCHEMA {database}.{schema}"),
67
+ ],
68
+ _FeatureStoreRole.CONSUMER: [
69
+ _Privilege("DATABASE", "{database}", ["USAGE"]),
70
+ _Privilege("SCHEMA", "{database}.{schema}", ["USAGE"]),
71
+ _Privilege("DYNAMIC TABLE", _ALL_OBJECTS, ["SELECT", "MONITOR"], "SCHEMA {database}.{schema}"),
72
+ _Privilege("VIEW", _ALL_OBJECTS, ["SELECT", "REFERENCES"], "SCHEMA {database}.{schema}"),
73
+ _Privilege("TABLE", _ALL_OBJECTS, ["SELECT", "REFERENCES"], "SCHEMA {database}.{schema}"),
74
+ _Privilege("DATASET", _ALL_OBJECTS, ["USAGE"], "SCHEMA {database}.{schema}"),
75
+ # User should decide whether they want to grant warehouse usage to CONSUMER
76
+ # _Privilege("WAREHOUSE", "{warehouse}", ["USAGE"]),
77
+ ],
78
+ _FeatureStoreRole.NONE: [],
79
+ }
80
+
81
+ _POST_INIT_PRIVILEGES: Dict[_FeatureStoreRole, List[_Privilege]] = {
82
+ _FeatureStoreRole.PRODUCER: [
83
+ _Privilege("TAG", f"{{database}}.{{schema}}.{_FEATURE_VIEW_METADATA_TAG}", ["APPLY"]),
84
+ _Privilege("TAG", f"{{database}}.{{schema}}.{_FEATURE_STORE_OBJECT_TAG}", ["APPLY"]),
85
+ ],
86
+ _FeatureStoreRole.CONSUMER: [],
87
+ _FeatureStoreRole.NONE: [],
88
+ }
89
+
90
+
91
+ def _grant_privileges(
92
+ session: Session, role_name: str, privileges: List[_Privilege], session_info: _SessionInfo
93
+ ) -> None:
94
+ session_info_dict = asdict(session_info)
95
+ for p in privileges:
96
+ if p.object_name == _ALL_OBJECTS:
97
+ # Ensure obj is plural
98
+ obj = p.object_type.upper()
99
+ if not obj.endswith("S"):
100
+ obj += "S"
101
+ grant_objects = [f"{prefix} {obj}" for prefix in ("FUTURE", "ALL")]
102
+ else:
103
+ grant_objects = [f"{p.object_type} {p.object_name.format(**session_info_dict)}"]
104
+ try:
105
+ for grant_object in grant_objects:
106
+ query = f"GRANT {','.join(p.privileges)} ON {grant_object}"
107
+ if p.scope:
108
+ query += f" IN {p.scope.format(**session_info_dict)}"
109
+ query += f" TO ROLE {role_name}"
110
+ session.sql(query).collect()
111
+ except exceptions.SnowparkSQLException as e:
112
+ if any(
113
+ s in e.message
114
+ for s in (
115
+ "Ask your account admin",
116
+ "Object type or Class",
117
+ p.object_type,
118
+ )
119
+ ):
120
+ warn(
121
+ f"Failed to grant privilege for {p.object_type}: {e.message}",
122
+ UserWarning,
123
+ stacklevel=1,
124
+ )
125
+ else:
126
+ raise
127
+
128
+
129
+ def _configure_pre_init_privileges(
130
+ session: Session,
131
+ session_info: _SessionInfo,
132
+ roles_to_create: Dict[_FeatureStoreRole, str],
133
+ ) -> None:
134
+ """
135
+ Configure Feature Store role privileges. Must be run with ACCOUNTADMIN
136
+ or a role with `MANAGE GRANTS` privilege.
137
+
138
+ See https://docs.snowflake.com/en/sql-reference/sql/grant-privilege for more information
139
+ about privilege grants in Snowflake.
140
+
141
+ Args:
142
+ session: Snowpark Session to interact with Snowflake backend.
143
+ session_info: Session info like database and schema for the FeatureStore instance.
144
+ roles_to_create: Producer and optional consumer roles to create.
145
+ """
146
+
147
+ # Create schema if not already exists
148
+ (create_rst,) = (
149
+ SqlResultValidator(
150
+ session,
151
+ f"CREATE SCHEMA IF NOT EXISTS {session_info.database}.{session_info.schema}",
152
+ )
153
+ .has_dimensions(expected_rows=1)
154
+ .has_column("status")
155
+ .validate()
156
+ )
157
+ schema_created = create_rst["status"].endswith("successfully created.")
158
+
159
+ # Pass schema ownership from admin to PRODUCER
160
+ if schema_created:
161
+ # TODO: we are missing a test case for this code path
162
+ session.sql(
163
+ f"GRANT OWNERSHIP ON SCHEMA {session_info.database}.{session_info.schema} "
164
+ f"TO ROLE {roles_to_create[_FeatureStoreRole.PRODUCER]}"
165
+ ).collect()
166
+
167
+ # Grant privileges to roles
168
+ for role_type, role in roles_to_create.items():
169
+ _grant_privileges(session, role, _PRE_INIT_PRIVILEGES[role_type], session_info)
170
+
171
+
172
+ def _configure_post_init_privileges(
173
+ session: Session,
174
+ session_info: _SessionInfo,
175
+ roles_to_create: Dict[_FeatureStoreRole, str],
176
+ ) -> None:
177
+ for role_type, role in roles_to_create.items():
178
+ _grant_privileges(session, role, _POST_INIT_PRIVILEGES[role_type], session_info)
179
+
180
+
181
+ def _configure_role_hierarchy(
182
+ session: Session,
183
+ producer_role: str,
184
+ consumer_role: Optional[str],
185
+ ) -> None:
186
+ """
187
+ Create Feature Store roles and configure role hierarchy hierarchy. Must be run with
188
+ ACCOUNTADMIN or a role with `CREATE ROLE` privilege.
189
+
190
+ See https://docs.snowflake.com/en/sql-reference/sql/grant-privilege for more information
191
+ about privilege grants in Snowflake.
192
+
193
+ Args:
194
+ session: Snowpark Session to interact with Snowflake backend.
195
+ producer_role: Name of producer role to be configured.
196
+ consumer_role: Name of consumer role to be configured.
197
+ """
198
+ # Create the necessary roles and build role hierarchy
199
+ producer_role = SqlIdentifier(producer_role)
200
+ session.sql(f"CREATE ROLE IF NOT EXISTS {producer_role}").collect()
201
+ session.sql(f"GRANT ROLE {producer_role} TO ROLE SYSADMIN").collect()
202
+ session.sql(f"GRANT ROLE {producer_role} TO ROLE {session.get_current_role()}").collect()
203
+
204
+ if consumer_role is not None:
205
+ consumer_role = SqlIdentifier(consumer_role)
206
+ session.sql(f"CREATE ROLE IF NOT EXISTS {consumer_role}").collect()
207
+ session.sql(f"GRANT ROLE {consumer_role} TO ROLE {producer_role}").collect()
208
+
209
+
210
+ @telemetry.send_api_usage_telemetry(project=_PROJECT)
211
+ def setup_feature_store(
212
+ session: Session,
213
+ database: str,
214
+ schema: str,
215
+ warehouse: str,
216
+ producer_role: str = "FS_PRODUCER",
217
+ consumer_role: Optional[str] = None,
218
+ ) -> FeatureStore:
219
+ """
220
+ Sets up a new Feature Store including role/privilege setup. Must be run with ACCOUNTADMIN
221
+ or a role with `MANAGE GRANTS` and `CREATE ROLE` privileges.
222
+
223
+ See https://docs.snowflake.com/en/sql-reference/sql/grant-privilege for more information
224
+ about privilege grants in Snowflake.
225
+
226
+ Args:
227
+ session: Snowpark Session to interact with Snowflake backend.
228
+ database: Database to create the FeatureStore instance.
229
+ schema: Schema to create the FeatureStore instance.
230
+ warehouse: Default warehouse for Feature Store compute.
231
+ producer_role: Name of producer role to be configured.
232
+ consumer_role: Name of consumer role to be configured. If not specified, consumer role won't be created.
233
+
234
+ Returns:
235
+ Feature Store instance.
236
+
237
+ Raises:
238
+ exceptions.SnowparkSQLException: Insufficient privileges.
239
+ """
240
+
241
+ database = SqlIdentifier(database)
242
+ schema = SqlIdentifier(schema)
243
+ warehouse = SqlIdentifier(warehouse)
244
+ session_info = _SessionInfo(
245
+ SqlIdentifier(database),
246
+ SqlIdentifier(schema),
247
+ SqlIdentifier(warehouse),
248
+ )
249
+
250
+ try:
251
+ roles_to_create = {_FeatureStoreRole.PRODUCER: producer_role}
252
+ if consumer_role is not None:
253
+ roles_to_create.update({_FeatureStoreRole.CONSUMER: consumer_role})
254
+ _configure_role_hierarchy(session, producer_role=producer_role, consumer_role=consumer_role)
255
+ except exceptions.SnowparkSQLException:
256
+ # Error can be safely ignored if roles already exist and hierarchy is already built
257
+ for _, role in roles_to_create.items():
258
+ # Ensure roles already exist
259
+ if session.sql(f"SHOW ROLES LIKE '{role}' STARTS WITH '{role}'").count() == 0:
260
+ raise
261
+
262
+ if consumer_role is not None:
263
+ # Ensure hierarchy already configured
264
+ consumer_grants = session.sql(f"SHOW GRANTS ON ROLE {consumer_role}").collect()
265
+ if not any(r["granted_to"] == "ROLE" and r["grantee_name"] == producer_role for r in consumer_grants):
266
+ raise
267
+
268
+ # Do any pre-FeatureStore.__init__() privilege setup
269
+ _configure_pre_init_privileges(session, session_info, roles_to_create)
270
+
271
+ # Use PRODUCER role to create and operate new Feature Store
272
+ current_role = session.get_current_role()
273
+ assert current_role is not None # to make mypy happy
274
+ try:
275
+ session.use_role(producer_role)
276
+ fs = FeatureStore(session, database, schema, warehouse, creation_mode=CreationMode.CREATE_IF_NOT_EXIST)
277
+ finally:
278
+ session.use_role(current_role)
279
+
280
+ # Do any post-FeatureStore.__init__() privilege setup
281
+ _configure_post_init_privileges(session, session_info, roles_to_create)
282
+
283
+ return fs