snowflake-ml-python 1.4.0__py3-none-any.whl → 1.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +11 -1
- snowflake/ml/_internal/utils/identifier.py +3 -1
- snowflake/ml/_internal/utils/sql_identifier.py +2 -6
- snowflake/ml/feature_store/feature_store.py +151 -78
- snowflake/ml/feature_store/feature_view.py +12 -24
- snowflake/ml/fileset/sfcfs.py +56 -50
- snowflake/ml/fileset/stage_fs.py +48 -13
- snowflake/ml/model/_client/model/model_version_impl.py +2 -50
- snowflake/ml/model/_client/ops/model_ops.py +78 -29
- snowflake/ml/model/_client/sql/model.py +23 -2
- snowflake/ml/model/_client/sql/model_version.py +22 -1
- snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +19 -54
- snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +8 -1
- snowflake/ml/model/_model_composer/model_method/model_method.py +6 -10
- snowflake/ml/model/_packager/model_handlers/catboost.py +206 -0
- snowflake/ml/model/_packager/model_handlers/lightgbm.py +218 -0
- snowflake/ml/model/_packager/model_handlers/sklearn.py +3 -0
- snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
- snowflake/ml/model/_packager/model_meta/model_meta.py +36 -6
- snowflake/ml/model/_packager/model_meta/model_meta_schema.py +20 -1
- snowflake/ml/model/_packager/model_meta_migrator/migrator_plans.py +3 -1
- snowflake/ml/model/_packager/model_packager.py +2 -2
- snowflake/ml/model/{_model_composer/model_runtime/_runtime_requirements.py → _packager/model_runtime/_snowml_inference_alternative_requirements.py} +1 -1
- snowflake/ml/model/_packager/model_runtime/model_runtime.py +137 -0
- snowflake/ml/model/type_hints.py +21 -2
- snowflake/ml/modeling/_internal/estimator_utils.py +16 -11
- snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +4 -1
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +13 -14
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +5 -5
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +195 -123
- snowflake/ml/modeling/cluster/affinity_propagation.py +195 -123
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +195 -123
- snowflake/ml/modeling/cluster/birch.py +195 -123
- snowflake/ml/modeling/cluster/bisecting_k_means.py +195 -123
- snowflake/ml/modeling/cluster/dbscan.py +195 -123
- snowflake/ml/modeling/cluster/feature_agglomeration.py +195 -123
- snowflake/ml/modeling/cluster/k_means.py +195 -123
- snowflake/ml/modeling/cluster/mean_shift.py +195 -123
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +195 -123
- snowflake/ml/modeling/cluster/optics.py +195 -123
- snowflake/ml/modeling/cluster/spectral_biclustering.py +195 -123
- snowflake/ml/modeling/cluster/spectral_clustering.py +195 -123
- snowflake/ml/modeling/cluster/spectral_coclustering.py +195 -123
- snowflake/ml/modeling/compose/column_transformer.py +195 -123
- snowflake/ml/modeling/compose/transformed_target_regressor.py +195 -123
- snowflake/ml/modeling/covariance/elliptic_envelope.py +195 -123
- snowflake/ml/modeling/covariance/empirical_covariance.py +195 -123
- snowflake/ml/modeling/covariance/graphical_lasso.py +195 -123
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +195 -123
- snowflake/ml/modeling/covariance/ledoit_wolf.py +195 -123
- snowflake/ml/modeling/covariance/min_cov_det.py +195 -123
- snowflake/ml/modeling/covariance/oas.py +195 -123
- snowflake/ml/modeling/covariance/shrunk_covariance.py +195 -123
- snowflake/ml/modeling/decomposition/dictionary_learning.py +195 -123
- snowflake/ml/modeling/decomposition/factor_analysis.py +195 -123
- snowflake/ml/modeling/decomposition/fast_ica.py +195 -123
- snowflake/ml/modeling/decomposition/incremental_pca.py +195 -123
- snowflake/ml/modeling/decomposition/kernel_pca.py +195 -123
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +195 -123
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +195 -123
- snowflake/ml/modeling/decomposition/pca.py +195 -123
- snowflake/ml/modeling/decomposition/sparse_pca.py +195 -123
- snowflake/ml/modeling/decomposition/truncated_svd.py +195 -123
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +195 -123
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +195 -123
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/bagging_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/bagging_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/isolation_forest.py +195 -123
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/stacking_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/voting_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/voting_regressor.py +195 -123
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +195 -123
- snowflake/ml/modeling/feature_selection/select_fdr.py +195 -123
- snowflake/ml/modeling/feature_selection/select_fpr.py +195 -123
- snowflake/ml/modeling/feature_selection/select_fwe.py +195 -123
- snowflake/ml/modeling/feature_selection/select_k_best.py +195 -123
- snowflake/ml/modeling/feature_selection/select_percentile.py +195 -123
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +195 -123
- snowflake/ml/modeling/feature_selection/variance_threshold.py +195 -123
- snowflake/ml/modeling/framework/_utils.py +8 -1
- snowflake/ml/modeling/framework/base.py +9 -1
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +195 -123
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +195 -123
- snowflake/ml/modeling/impute/iterative_imputer.py +195 -123
- snowflake/ml/modeling/impute/knn_imputer.py +195 -123
- snowflake/ml/modeling/impute/missing_indicator.py +195 -123
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +195 -123
- snowflake/ml/modeling/kernel_approximation/nystroem.py +195 -123
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +195 -123
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +195 -123
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +195 -123
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +195 -123
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +195 -123
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/ard_regression.py +195 -123
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +195 -123
- snowflake/ml/modeling/linear_model/elastic_net.py +195 -123
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +195 -123
- snowflake/ml/modeling/linear_model/gamma_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/huber_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/lars.py +195 -123
- snowflake/ml/modeling/linear_model/lars_cv.py +195 -123
- snowflake/ml/modeling/linear_model/lasso.py +195 -123
- snowflake/ml/modeling/linear_model/lasso_cv.py +195 -123
- snowflake/ml/modeling/linear_model/lasso_lars.py +195 -123
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +195 -123
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +195 -123
- snowflake/ml/modeling/linear_model/linear_regression.py +195 -123
- snowflake/ml/modeling/linear_model/logistic_regression.py +195 -123
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +195 -123
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +195 -123
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +195 -123
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +195 -123
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +195 -123
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +195 -123
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +195 -123
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/perceptron.py +195 -123
- snowflake/ml/modeling/linear_model/poisson_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/ransac_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/ridge.py +195 -123
- snowflake/ml/modeling/linear_model/ridge_classifier.py +195 -123
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +195 -123
- snowflake/ml/modeling/linear_model/ridge_cv.py +195 -123
- snowflake/ml/modeling/linear_model/sgd_classifier.py +195 -123
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +195 -123
- snowflake/ml/modeling/linear_model/sgd_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +195 -123
- snowflake/ml/modeling/manifold/isomap.py +195 -123
- snowflake/ml/modeling/manifold/mds.py +195 -123
- snowflake/ml/modeling/manifold/spectral_embedding.py +195 -123
- snowflake/ml/modeling/manifold/tsne.py +195 -123
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +195 -123
- snowflake/ml/modeling/mixture/gaussian_mixture.py +195 -123
- snowflake/ml/modeling/model_selection/grid_search_cv.py +42 -18
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +42 -18
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +195 -123
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +195 -123
- snowflake/ml/modeling/multiclass/output_code_classifier.py +195 -123
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +195 -123
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +195 -123
- snowflake/ml/modeling/naive_bayes/complement_nb.py +195 -123
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +195 -123
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +195 -123
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +195 -123
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +195 -123
- snowflake/ml/modeling/neighbors/kernel_density.py +195 -123
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +195 -123
- snowflake/ml/modeling/neighbors/nearest_centroid.py +195 -123
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +195 -123
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +195 -123
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +195 -123
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +195 -123
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +195 -123
- snowflake/ml/modeling/neural_network/mlp_classifier.py +195 -123
- snowflake/ml/modeling/neural_network/mlp_regressor.py +195 -123
- snowflake/ml/modeling/pipeline/pipeline.py +4 -4
- snowflake/ml/modeling/preprocessing/binarizer.py +1 -5
- snowflake/ml/modeling/preprocessing/k_bins_discretizer.py +1 -5
- snowflake/ml/modeling/preprocessing/label_encoder.py +1 -5
- snowflake/ml/modeling/preprocessing/max_abs_scaler.py +1 -5
- snowflake/ml/modeling/preprocessing/min_max_scaler.py +10 -12
- snowflake/ml/modeling/preprocessing/normalizer.py +1 -5
- snowflake/ml/modeling/preprocessing/one_hot_encoder.py +1 -5
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +1 -5
- snowflake/ml/modeling/preprocessing/polynomial_features.py +195 -123
- snowflake/ml/modeling/preprocessing/robust_scaler.py +1 -5
- snowflake/ml/modeling/preprocessing/standard_scaler.py +11 -11
- snowflake/ml/modeling/semi_supervised/label_propagation.py +195 -123
- snowflake/ml/modeling/semi_supervised/label_spreading.py +195 -123
- snowflake/ml/modeling/svm/linear_svc.py +195 -123
- snowflake/ml/modeling/svm/linear_svr.py +195 -123
- snowflake/ml/modeling/svm/nu_svc.py +195 -123
- snowflake/ml/modeling/svm/nu_svr.py +195 -123
- snowflake/ml/modeling/svm/svc.py +195 -123
- snowflake/ml/modeling/svm/svr.py +195 -123
- snowflake/ml/modeling/tree/decision_tree_classifier.py +195 -123
- snowflake/ml/modeling/tree/decision_tree_regressor.py +195 -123
- snowflake/ml/modeling/tree/extra_tree_classifier.py +195 -123
- snowflake/ml/modeling/tree/extra_tree_regressor.py +195 -123
- snowflake/ml/modeling/xgboost/xgb_classifier.py +195 -123
- snowflake/ml/modeling/xgboost/xgb_regressor.py +195 -123
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +195 -123
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +195 -123
- snowflake/ml/registry/registry.py +1 -1
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.4.1.dist-info}/METADATA +68 -57
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.4.1.dist-info}/RECORD +202 -200
- snowflake/ml/model/_model_composer/model_runtime/model_runtime.py +0 -97
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.4.1.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.4.1.dist-info}/WHEEL +0 -0
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.4.1.dist-info}/top_level.txt +0 -0
@@ -18,7 +18,7 @@ from snowflake.ml._internal.exceptions import (
|
|
18
18
|
exceptions as snowml_exceptions,
|
19
19
|
)
|
20
20
|
from snowflake.ml._internal.utils import query_result_checker
|
21
|
-
from snowflake.snowpark import session
|
21
|
+
from snowflake.snowpark import context, exceptions, session
|
22
22
|
from snowflake.snowpark._internal import utils as snowpark_utils
|
23
23
|
|
24
24
|
|
@@ -332,6 +332,16 @@ def get_matched_package_versions_in_snowflake_conda_channel(
|
|
332
332
|
return matched_versions
|
333
333
|
|
334
334
|
|
335
|
+
def get_matched_package_versions_in_information_schema_with_active_session(
|
336
|
+
reqs: List[requirements.Requirement], python_version: str
|
337
|
+
) -> Dict[str, List[version.Version]]:
|
338
|
+
try:
|
339
|
+
session = context.get_active_session()
|
340
|
+
except exceptions.SnowparkSessionException:
|
341
|
+
return {}
|
342
|
+
return get_matched_package_versions_in_information_schema(session, reqs, python_version)
|
343
|
+
|
344
|
+
|
335
345
|
def get_matched_package_versions_in_information_schema(
|
336
346
|
session: session.Session, reqs: List[requirements.Requirement], python_version: str
|
337
347
|
) -> Dict[str, List[version.Version]]:
|
@@ -338,4 +338,6 @@ def resolve_identifier(name: str) -> str:
|
|
338
338
|
elif UNQUOTED_CASE_INSENSITIVE_RE.match(name):
|
339
339
|
return name.upper()
|
340
340
|
else:
|
341
|
-
raise ValueError(
|
341
|
+
raise ValueError(
|
342
|
+
f"{name} is not a valid SQL identifier: https://docs.snowflake.com/en/sql-reference/identifiers-syntax"
|
343
|
+
)
|
@@ -27,15 +27,11 @@ class SqlIdentifier(str):
|
|
27
27
|
automatically added if necessary to make sure the original input's cases are preserved.
|
28
28
|
Default to False.
|
29
29
|
|
30
|
-
Raises:
|
31
|
-
ValueError: input name is not a valid identifier.
|
32
|
-
|
33
30
|
Returns:
|
34
31
|
Returns new instance created.
|
35
32
|
"""
|
36
|
-
|
37
|
-
|
38
|
-
raise ValueError(f"name:`{name}` is not a valid identifier.")
|
33
|
+
assert name is not None
|
34
|
+
|
39
35
|
if case_sensitive:
|
40
36
|
return super().__new__(cls, identifier.get_inferred_name(name))
|
41
37
|
else:
|
@@ -52,6 +52,15 @@ _ENTITY_TAG_PREFIX = "SNOWML_FEATURE_STORE_ENTITY_"
|
|
52
52
|
_FEATURE_VIEW_ENTITY_TAG = "SNOWML_FEATURE_STORE_FV_ENTITIES"
|
53
53
|
_FEATURE_VIEW_TS_COL_TAG = "SNOWML_FEATURE_STORE_FV_TS_COL"
|
54
54
|
_FEATURE_STORE_OBJECT_TAG = "SNOWML_FEATURE_STORE_OBJECT"
|
55
|
+
|
56
|
+
|
57
|
+
# TODO: remove "" after dataset is updated
|
58
|
+
class _FeatureStoreObjTypes(Enum):
|
59
|
+
FEATURE_VIEW = "FEATURE_VIEW"
|
60
|
+
FEATURE_VIEW_REFRESH_TASK = "FEATURE_VIEW_REFRESH_TASK"
|
61
|
+
TRAINING_DATA = ""
|
62
|
+
|
63
|
+
|
55
64
|
_PROJECT = "FeatureStore"
|
56
65
|
_DT_OR_VIEW_QUERY_PATTERN = re.compile(
|
57
66
|
r"""CREATE\ (OR\ REPLACE\ )?(?P<obj_type>(DYNAMIC\ TABLE|VIEW))\ .*
|
@@ -143,14 +152,17 @@ class FeatureStore:
|
|
143
152
|
database: Database to create the FeatureStore instance.
|
144
153
|
name: Target FeatureStore name, maps to a schema in the database.
|
145
154
|
default_warehouse: Default warehouse for feature store compute.
|
146
|
-
creation_mode:
|
155
|
+
creation_mode: If FAIL_IF_NOT_EXIST, feature store throws when required resources not already exist; If
|
156
|
+
CREATE_IF_NOT_EXIST, feature store will create required resources if they not already exist. Required
|
157
|
+
resources include schema and tags. Note database must already exist in either mode.
|
147
158
|
|
148
159
|
Raises:
|
149
160
|
SnowflakeMLException: [ValueError] default_warehouse does not exist.
|
150
|
-
SnowflakeMLException: [ValueError]
|
161
|
+
SnowflakeMLException: [ValueError] Required resources not exist when mode is FAIL_IF_NOT_EXIST.
|
151
162
|
SnowflakeMLException: [RuntimeError] Failed to find resources.
|
152
163
|
SnowflakeMLException: [RuntimeError] Failed to create feature store.
|
153
164
|
"""
|
165
|
+
|
154
166
|
database = SqlIdentifier(database)
|
155
167
|
name = SqlIdentifier(name)
|
156
168
|
|
@@ -177,18 +189,12 @@ class FeatureStore:
|
|
177
189
|
|
178
190
|
self.update_default_warehouse(default_warehouse)
|
179
191
|
|
192
|
+
self._check_database_exists_or_throw()
|
180
193
|
if creation_mode == CreationMode.FAIL_IF_NOT_EXIST:
|
181
|
-
|
182
|
-
|
183
|
-
raise snowml_exceptions.SnowflakeMLException(
|
184
|
-
error_code=error_codes.NOT_FOUND,
|
185
|
-
original_exception=ValueError(f"Feature store {name} does not exist."),
|
186
|
-
)
|
194
|
+
self._check_internal_objects_exist_or_throw()
|
195
|
+
|
187
196
|
else:
|
188
197
|
try:
|
189
|
-
self._session.sql(f"CREATE DATABASE IF NOT EXISTS {self._config.database}").collect(
|
190
|
-
statement_params=self._telemetry_stmp
|
191
|
-
)
|
192
198
|
self._session.sql(f"CREATE SCHEMA IF NOT EXISTS {self._config.full_schema_path}").collect(
|
193
199
|
statement_params=self._telemetry_stmp
|
194
200
|
)
|
@@ -196,12 +202,16 @@ class FeatureStore:
|
|
196
202
|
[
|
197
203
|
_FEATURE_VIEW_ENTITY_TAG,
|
198
204
|
_FEATURE_VIEW_TS_COL_TAG,
|
199
|
-
_FEATURE_STORE_OBJECT_TAG,
|
200
205
|
]
|
201
206
|
):
|
202
207
|
self._session.sql(f"CREATE TAG IF NOT EXISTS {self._get_fully_qualified_name(tag)}").collect(
|
203
208
|
statement_params=self._telemetry_stmp
|
204
209
|
)
|
210
|
+
|
211
|
+
self._session.sql(
|
212
|
+
f"""CREATE TAG IF NOT EXISTS {self._get_fully_qualified_name(_FEATURE_STORE_OBJECT_TAG)}
|
213
|
+
ALLOWED_VALUES {','.join([f"'{v.value}'" for v in _FeatureStoreObjTypes])}"""
|
214
|
+
).collect(statement_params=self._telemetry_stmp)
|
205
215
|
except Exception as e:
|
206
216
|
self.clear()
|
207
217
|
raise snowml_exceptions.SnowflakeMLException(
|
@@ -244,17 +254,18 @@ class FeatureStore:
|
|
244
254
|
A registered entity object.
|
245
255
|
|
246
256
|
Raises:
|
247
|
-
SnowflakeMLException: [ValueError] Entity with same name is already registered.
|
248
257
|
SnowflakeMLException: [RuntimeError] Failed to find resources.
|
249
258
|
"""
|
259
|
+
|
250
260
|
tag_name = self._get_entity_name(entity.name)
|
251
261
|
found_rows = self._find_object("TAGS", tag_name)
|
252
262
|
if len(found_rows) > 0:
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
263
|
+
warnings.warn(
|
264
|
+
f"Entity {entity.name} already exists. Skip registration.",
|
265
|
+
stacklevel=2,
|
266
|
+
category=UserWarning,
|
257
267
|
)
|
268
|
+
return entity
|
258
269
|
|
259
270
|
# allowed_values will add double-quotes around each value, thus use resolved str here.
|
260
271
|
join_keys = [f"'{key.resolved()}'" for key in entity.join_keys]
|
@@ -284,7 +295,7 @@ class FeatureStore:
|
|
284
295
|
feature_view: FeatureView,
|
285
296
|
version: str,
|
286
297
|
block: bool = True,
|
287
|
-
|
298
|
+
overwrite: bool = False,
|
288
299
|
) -> FeatureView:
|
289
300
|
"""
|
290
301
|
Materialize a FeatureView to Snowflake backend.
|
@@ -304,16 +315,14 @@ class FeatureStore:
|
|
304
315
|
NOTE: Version only accepts letters, numbers and underscore. Also version will be capitalized.
|
305
316
|
block: Specify whether the FeatureView backend materialization should be blocking or not. If blocking then
|
306
317
|
the API will wait until the initial FeatureView data is generated. Default to true.
|
307
|
-
|
308
|
-
first then recreate. NOTE: there will be backfill cost associated if the FeatureView is
|
309
|
-
continuously maintained.
|
318
|
+
overwrite: Overwrite the existing FeatureView with same version. This is the same as dropping the
|
319
|
+
FeatureView first then recreate. NOTE: there will be backfill cost associated if the FeatureView is
|
320
|
+
being continuously maintained.
|
310
321
|
|
311
322
|
Returns:
|
312
323
|
A materialized FeatureView object.
|
313
324
|
|
314
325
|
Raises:
|
315
|
-
SnowflakeMLException: [ValueError] FeatureView is already registered, or duplicate name and version
|
316
|
-
are detected.
|
317
326
|
SnowflakeMLException: [ValueError] FeatureView entity has not been registered.
|
318
327
|
SnowflakeMLException: [ValueError] Warehouse or default warehouse is not specified.
|
319
328
|
SnowflakeMLException: [RuntimeError] Failed to create dynamic table, task, or view.
|
@@ -322,13 +331,16 @@ class FeatureStore:
|
|
322
331
|
version = FeatureViewVersion(version)
|
323
332
|
|
324
333
|
if feature_view.status != FeatureViewStatus.DRAFT:
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
334
|
+
try:
|
335
|
+
return self._get_feature_view_if_exists(feature_view.name, str(version))
|
336
|
+
except Exception:
|
337
|
+
raise snowml_exceptions.SnowflakeMLException(
|
338
|
+
error_code=error_codes.NOT_FOUND,
|
339
|
+
original_exception=ValueError(
|
340
|
+
f"FeatureView {feature_view.name}/{feature_view.version} status is {feature_view.status}, "
|
341
|
+
+ "but it doesn't exist."
|
342
|
+
),
|
343
|
+
)
|
332
344
|
|
333
345
|
# TODO: ideally we should move this to FeatureView creation time
|
334
346
|
for e in feature_view.entities:
|
@@ -339,15 +351,11 @@ class FeatureStore:
|
|
339
351
|
)
|
340
352
|
|
341
353
|
feature_view_name = FeatureView._get_physical_name(feature_view.name, version)
|
342
|
-
if not
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
error_code=error_codes.OBJECT_ALREADY_EXISTS,
|
348
|
-
original_exception=ValueError(f"FeatureView {feature_view.name}/{version} already exists."),
|
349
|
-
suppress_source_trace=True,
|
350
|
-
)
|
354
|
+
if not overwrite:
|
355
|
+
try:
|
356
|
+
return self._get_feature_view_if_exists(feature_view.name, str(version))
|
357
|
+
except Exception:
|
358
|
+
pass
|
351
359
|
|
352
360
|
fully_qualified_name = self._get_fully_qualified_name(feature_view_name)
|
353
361
|
entities = _FEATURE_VIEW_ENTITY_TAG_DELIMITER.join([e.name for e in feature_view.entities])
|
@@ -377,17 +385,17 @@ class FeatureStore:
|
|
377
385
|
self._default_warehouse,
|
378
386
|
timestamp_col,
|
379
387
|
block,
|
380
|
-
|
388
|
+
overwrite,
|
381
389
|
)
|
382
390
|
else:
|
383
391
|
try:
|
384
|
-
|
385
|
-
query = f"""CREATE{
|
392
|
+
overwrite_clause = " OR REPLACE" if overwrite else ""
|
393
|
+
query = f"""CREATE{overwrite_clause} VIEW {fully_qualified_name} ({column_descs})
|
386
394
|
COMMENT = '{feature_view.desc}'
|
387
395
|
TAG (
|
388
396
|
{_FEATURE_VIEW_ENTITY_TAG} = '{entities}',
|
389
397
|
{_FEATURE_VIEW_TS_COL_TAG} = '{timestamp_col}',
|
390
|
-
{_FEATURE_STORE_OBJECT_TAG} = ''
|
398
|
+
{_FEATURE_STORE_OBJECT_TAG} = '{_FeatureStoreObjTypes.FEATURE_VIEW.value}'
|
391
399
|
)
|
392
400
|
AS {feature_view.query}
|
393
401
|
"""
|
@@ -402,41 +410,51 @@ class FeatureStore:
|
|
402
410
|
return self.get_feature_view(feature_view.name, str(version))
|
403
411
|
|
404
412
|
@dispatch_decorator(prpr_version="1.1.0")
|
405
|
-
def update_feature_view(
|
413
|
+
def update_feature_view(
|
414
|
+
self, name: str, version: str, refresh_freq: Optional[str] = None, warehouse: Optional[str] = None
|
415
|
+
) -> FeatureView:
|
406
416
|
"""Update a registered feature view.
|
407
417
|
Check feature_view.py for which fields are allowed to be updated after registration.
|
408
418
|
|
409
419
|
Args:
|
410
|
-
|
420
|
+
name: name of the FeatureView to be updated.
|
421
|
+
version: version of the FeatureView to be updated.
|
422
|
+
refresh_freq: updated refresh frequency.
|
423
|
+
warehouse: updated warehouse.
|
424
|
+
|
425
|
+
Returns:
|
426
|
+
Updated FeatureView.
|
411
427
|
|
412
428
|
Raises:
|
413
|
-
SnowflakeMLException: [RuntimeError]
|
429
|
+
SnowflakeMLException: [RuntimeError] If FeatureView is not managed and refresh_freq is defined.
|
414
430
|
SnowflakeMLException: [RuntimeError] Failed to update feature view.
|
415
431
|
"""
|
416
|
-
|
432
|
+
feature_view = self.get_feature_view(name=name, version=version)
|
433
|
+
if refresh_freq is not None and feature_view.status == FeatureViewStatus.STATIC:
|
417
434
|
full_name = f"{feature_view.name}/{feature_view.version}"
|
418
435
|
raise snowml_exceptions.SnowflakeMLException(
|
419
436
|
error_code=error_codes.INVALID_ARGUMENT,
|
420
|
-
original_exception=RuntimeError(
|
421
|
-
f"Feature view {full_name} must be registered and non-static so that can be updated."
|
422
|
-
),
|
437
|
+
original_exception=RuntimeError(f"Feature view {full_name} must be non-static so that can be updated."),
|
423
438
|
)
|
424
439
|
|
425
|
-
if feature_view.
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
|
439
|
-
|
440
|
+
warehouse = SqlIdentifier(warehouse) if warehouse else feature_view.warehouse
|
441
|
+
|
442
|
+
# TODO(@wezhou): we need to properly handle cron expr
|
443
|
+
try:
|
444
|
+
self._session.sql(
|
445
|
+
f"""ALTER DYNAMIC TABLE {feature_view.fully_qualified_name()} SET
|
446
|
+
TARGET_LAG = '{refresh_freq or feature_view.refresh_freq}'
|
447
|
+
WAREHOUSE = {warehouse}
|
448
|
+
"""
|
449
|
+
).collect(statement_params=self._telemetry_stmp)
|
450
|
+
except Exception as e:
|
451
|
+
raise snowml_exceptions.SnowflakeMLException(
|
452
|
+
error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
|
453
|
+
original_exception=RuntimeError(
|
454
|
+
f"Update feature view {feature_view.name}/{feature_view.version} failed: {e}"
|
455
|
+
),
|
456
|
+
) from e
|
457
|
+
return self.get_feature_view(name=name, version=version)
|
440
458
|
|
441
459
|
@dispatch_decorator(prpr_version="1.0.8")
|
442
460
|
def read_feature_view(self, feature_view: FeatureView) -> DataFrame:
|
@@ -937,6 +955,16 @@ class FeatureStore:
|
|
937
955
|
) from e
|
938
956
|
logger.info(f"Feature store {self._config.full_schema_path} has been cleared.")
|
939
957
|
|
958
|
+
def _get_feature_view_if_exists(self, name: str, version: str) -> FeatureView:
|
959
|
+
existing_fv = self.get_feature_view(name, version)
|
960
|
+
warnings.warn(
|
961
|
+
f"FeatureView {name}/{version} already exists. Skip registration."
|
962
|
+
+ " Set `overwrite` to True if you want to replace existing FeatureView.",
|
963
|
+
stacklevel=2,
|
964
|
+
category=UserWarning,
|
965
|
+
)
|
966
|
+
return existing_fv
|
967
|
+
|
940
968
|
def _create_dynamic_table(
|
941
969
|
self,
|
942
970
|
feature_view_name: SqlIdentifier,
|
@@ -951,19 +979,20 @@ class FeatureStore:
|
|
951
979
|
override: bool,
|
952
980
|
) -> None:
|
953
981
|
# TODO: cluster by join keys once DT supports that
|
982
|
+
override_clause = " OR REPLACE" if override else ""
|
983
|
+
query = f"""CREATE{override_clause} DYNAMIC TABLE {fully_qualified_name} ({column_descs})
|
984
|
+
TARGET_LAG = '{'DOWNSTREAM' if schedule_task else feature_view.refresh_freq}'
|
985
|
+
COMMENT = '{feature_view.desc}'
|
986
|
+
TAG (
|
987
|
+
{self._get_fully_qualified_name(_FEATURE_VIEW_ENTITY_TAG)} = '{entities}',
|
988
|
+
{self._get_fully_qualified_name(_FEATURE_VIEW_TS_COL_TAG)} = '{timestamp_col}',
|
989
|
+
{self._get_fully_qualified_name(_FEATURE_STORE_OBJECT_TAG)} =
|
990
|
+
'{_FeatureStoreObjTypes.FEATURE_VIEW.value}'
|
991
|
+
)
|
992
|
+
WAREHOUSE = {warehouse}
|
993
|
+
AS {feature_view.query}
|
994
|
+
"""
|
954
995
|
try:
|
955
|
-
override_clause = " OR REPLACE" if override else ""
|
956
|
-
query = f"""CREATE{override_clause} DYNAMIC TABLE {fully_qualified_name} ({column_descs})
|
957
|
-
TARGET_LAG = '{'DOWNSTREAM' if schedule_task else feature_view.refresh_freq}'
|
958
|
-
COMMENT = '{feature_view.desc}'
|
959
|
-
TAG (
|
960
|
-
{self._get_fully_qualified_name(_FEATURE_VIEW_ENTITY_TAG)} = '{entities}',
|
961
|
-
{self._get_fully_qualified_name(_FEATURE_VIEW_TS_COL_TAG)} = '{timestamp_col}',
|
962
|
-
{self._get_fully_qualified_name(_FEATURE_STORE_OBJECT_TAG)} = ''
|
963
|
-
)
|
964
|
-
WAREHOUSE = {warehouse}
|
965
|
-
AS {feature_view.query}
|
966
|
-
"""
|
967
996
|
self._session.sql(query).collect(block=block, statement_params=self._telemetry_stmp)
|
968
997
|
|
969
998
|
if schedule_task:
|
@@ -978,7 +1007,8 @@ class FeatureStore:
|
|
978
1007
|
self._session.sql(
|
979
1008
|
f"""
|
980
1009
|
ALTER TASK {fully_qualified_name}
|
981
|
-
SET TAG {self._get_fully_qualified_name(_FEATURE_STORE_OBJECT_TAG)}
|
1010
|
+
SET TAG {self._get_fully_qualified_name(_FEATURE_STORE_OBJECT_TAG)}
|
1011
|
+
='{_FeatureStoreObjTypes.FEATURE_VIEW_REFRESH_TASK.value}'
|
982
1012
|
"""
|
983
1013
|
).collect(statement_params=self._telemetry_stmp)
|
984
1014
|
self._session.sql(f"ALTER TASK {fully_qualified_name} RESUME").collect(
|
@@ -988,6 +1018,9 @@ class FeatureStore:
|
|
988
1018
|
self._session.sql(f"DROP DYNAMIC TABLE IF EXISTS {fully_qualified_name}").collect(
|
989
1019
|
statement_params=self._telemetry_stmp
|
990
1020
|
)
|
1021
|
+
self._session.sql(f"DROP TASK IF EXISTS {fully_qualified_name}").collect(
|
1022
|
+
statement_params=self._telemetry_stmp
|
1023
|
+
)
|
991
1024
|
raise
|
992
1025
|
except Exception as e:
|
993
1026
|
raise snowml_exceptions.SnowflakeMLException(
|
@@ -1168,6 +1201,46 @@ class FeatureStore:
|
|
1168
1201
|
|
1169
1202
|
return self._session.sql(query), join_keys
|
1170
1203
|
|
1204
|
+
def _check_database_exists_or_throw(self) -> None:
|
1205
|
+
resolved_db_name = self._config.database.resolved()
|
1206
|
+
dbs = self._session.sql(
|
1207
|
+
f"""
|
1208
|
+
SHOW DATABASES LIKE '{resolved_db_name}' STARTS WITH '{resolved_db_name}'
|
1209
|
+
"""
|
1210
|
+
).collect(statement_params=self._telemetry_stmp)
|
1211
|
+
if len(dbs) == 0:
|
1212
|
+
raise snowml_exceptions.SnowflakeMLException(
|
1213
|
+
error_code=error_codes.NOT_FOUND,
|
1214
|
+
original_exception=ValueError(f"Database {resolved_db_name} does not exist."),
|
1215
|
+
)
|
1216
|
+
|
1217
|
+
def _check_internal_objects_exist_or_throw(self) -> None:
|
1218
|
+
schema_result = self._find_object("SCHEMAS", self._config.schema)
|
1219
|
+
if len(schema_result) == 0:
|
1220
|
+
raise snowml_exceptions.SnowflakeMLException(
|
1221
|
+
error_code=error_codes.NOT_FOUND,
|
1222
|
+
original_exception=ValueError(
|
1223
|
+
f"Feature store schema {self._config.schema} does not exist. "
|
1224
|
+
"Use CreationMode.CREATE_IF_NOT_EXIST mode instead if you want to create one."
|
1225
|
+
),
|
1226
|
+
)
|
1227
|
+
for tag_name in to_sql_identifiers(
|
1228
|
+
[
|
1229
|
+
_FEATURE_STORE_OBJECT_TAG,
|
1230
|
+
_FEATURE_VIEW_ENTITY_TAG,
|
1231
|
+
_FEATURE_VIEW_TS_COL_TAG,
|
1232
|
+
]
|
1233
|
+
):
|
1234
|
+
tag_result = self._find_object("TAGS", tag_name)
|
1235
|
+
if len(tag_result) == 0:
|
1236
|
+
raise snowml_exceptions.SnowflakeMLException(
|
1237
|
+
error_code=error_codes.NOT_FOUND,
|
1238
|
+
original_exception=ValueError(
|
1239
|
+
f"Feature store internal tag {tag_name} does not exist. "
|
1240
|
+
"Use CreationMode.CREATE_IF_NOT_EXIST mode instead if you want to create one."
|
1241
|
+
),
|
1242
|
+
)
|
1243
|
+
|
1171
1244
|
def _is_asof_join_enabled(self) -> bool:
|
1172
1245
|
result = None
|
1173
1246
|
try:
|
@@ -1,7 +1,6 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
3
|
import json
|
4
|
-
import re
|
5
4
|
from collections import OrderedDict
|
6
5
|
from dataclasses import dataclass
|
7
6
|
from enum import Enum
|
@@ -29,22 +28,19 @@ from snowflake.snowpark.types import (
|
|
29
28
|
_FEATURE_VIEW_NAME_DELIMITER = "$"
|
30
29
|
_TIMESTAMP_COL_PLACEHOLDER = "FS_TIMESTAMP_COL_PLACEHOLDER_VAL"
|
31
30
|
_FEATURE_OBJ_TYPE = "FEATURE_OBJ_TYPE"
|
32
|
-
_FEATURE_VIEW_VERSION_RE = re.compile("^([A-Za-z0-9_]*)$")
|
33
31
|
|
34
32
|
|
35
|
-
class FeatureViewVersion(
|
33
|
+
class FeatureViewVersion(SqlIdentifier):
|
36
34
|
def __new__(cls, version: str) -> FeatureViewVersion:
|
37
|
-
if
|
35
|
+
if _FEATURE_VIEW_NAME_DELIMITER in version:
|
38
36
|
raise snowml_exceptions.SnowflakeMLException(
|
39
37
|
error_code=error_codes.INVALID_ARGUMENT,
|
40
|
-
original_exception=ValueError(
|
41
|
-
f"`{version}` is not a valid feature view version. Only letter, number and underscore is allowed."
|
42
|
-
),
|
38
|
+
original_exception=ValueError(f"{_FEATURE_VIEW_NAME_DELIMITER} is not allowed in version: {version}."),
|
43
39
|
)
|
44
|
-
return super().__new__(cls, version
|
40
|
+
return super().__new__(cls, version) # type: ignore[return-value]
|
45
41
|
|
46
42
|
def __init__(self, version: str) -> None:
|
47
|
-
|
43
|
+
super().__init__(version)
|
48
44
|
|
49
45
|
|
50
46
|
class FeatureViewStatus(Enum):
|
@@ -164,27 +160,19 @@ class FeatureView:
|
|
164
160
|
res.append(name)
|
165
161
|
return FeatureViewSlice(self, res)
|
166
162
|
|
167
|
-
def physical_name(self) -> SqlIdentifier:
|
168
|
-
"""Returns the physical name for this feature in Snowflake.
|
169
|
-
|
170
|
-
Returns:
|
171
|
-
Physical name string.
|
172
|
-
|
173
|
-
Raises:
|
174
|
-
RuntimeError: if the FeatureView is not materialized.
|
175
|
-
"""
|
176
|
-
if self.status == FeatureViewStatus.DRAFT or self.version is None:
|
177
|
-
raise RuntimeError(f"FeatureView {self.name} has not been materialized.")
|
178
|
-
return FeatureView._get_physical_name(self.name, self.version)
|
179
|
-
|
180
163
|
def fully_qualified_name(self) -> str:
|
181
164
|
"""Returns the fully qualified name (<database_name>.<schema_name>.<feature_view_name>) for the
|
182
165
|
FeatureView in Snowflake.
|
183
166
|
|
184
167
|
Returns:
|
185
168
|
fully qualified name string.
|
169
|
+
|
170
|
+
Raises:
|
171
|
+
RuntimeError: if the FeatureView is not registered.
|
186
172
|
"""
|
187
|
-
|
173
|
+
if self.status == FeatureViewStatus.DRAFT or self.version is None:
|
174
|
+
raise RuntimeError(f"FeatureView {self.name} has not been registered.")
|
175
|
+
return f"{self._database}.{self._schema}.{FeatureView._get_physical_name(self.name, self.version)}"
|
188
176
|
|
189
177
|
def attach_feature_desc(self, descs: Dict[str, str]) -> FeatureView:
|
190
178
|
"""
|
@@ -386,7 +374,7 @@ Got {len(self._feature_df.queries['queries'])}: {self._feature_df.queries['queri
|
|
386
374
|
def to_df(self, session: Session) -> DataFrame:
|
387
375
|
values = list(self._to_dict().values())
|
388
376
|
schema = [x.lstrip("_") for x in list(self._to_dict().keys())]
|
389
|
-
values.append(str(self.
|
377
|
+
values.append(str(FeatureView._get_physical_name(self._name, self._version))) # type: ignore[arg-type]
|
390
378
|
schema.append("physical_name")
|
391
379
|
return session.create_dataframe([values], schema=schema)
|
392
380
|
|