snowflake-ml-python 1.5.0__py3-none-any.whl → 1.5.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/cortex/_sentiment.py +7 -4
- snowflake/ml/_internal/env_utils.py +6 -0
- snowflake/ml/_internal/lineage/lineage_utils.py +95 -0
- snowflake/ml/_internal/telemetry.py +1 -0
- snowflake/ml/_internal/utils/identifier.py +1 -1
- snowflake/ml/_internal/utils/sql_identifier.py +14 -1
- snowflake/ml/_internal/utils/temp_file_utils.py +5 -2
- snowflake/ml/dataset/__init__.py +2 -1
- snowflake/ml/dataset/dataset.py +4 -3
- snowflake/ml/dataset/dataset_reader.py +5 -8
- snowflake/ml/feature_store/__init__.py +6 -0
- snowflake/ml/feature_store/access_manager.py +283 -0
- snowflake/ml/feature_store/feature_store.py +160 -100
- snowflake/ml/feature_store/feature_view.py +30 -19
- snowflake/ml/fileset/embedded_stage_fs.py +15 -12
- snowflake/ml/fileset/snowfs.py +2 -30
- snowflake/ml/fileset/stage_fs.py +25 -7
- snowflake/ml/model/_client/model/model_impl.py +46 -39
- snowflake/ml/model/_client/model/model_version_impl.py +24 -2
- snowflake/ml/model/_client/ops/metadata_ops.py +27 -4
- snowflake/ml/model/_client/ops/model_ops.py +174 -16
- snowflake/ml/model/_client/sql/_base.py +34 -0
- snowflake/ml/model/_client/sql/model.py +32 -39
- snowflake/ml/model/_client/sql/model_version.py +111 -42
- snowflake/ml/model/_client/sql/stage.py +6 -32
- snowflake/ml/model/_client/sql/tag.py +32 -56
- snowflake/ml/model/_model_composer/model_composer.py +8 -4
- snowflake/ml/model/_packager/model_handlers/mlflow.py +2 -1
- snowflake/ml/model/_packager/model_meta/model_meta.py +1 -3
- snowflake/ml/model/_packager/model_runtime/model_runtime.py +3 -27
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +90 -142
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_search_udf_file.py +159 -0
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +81 -3
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +8 -1
- snowflake/ml/modeling/cluster/affinity_propagation.py +8 -1
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +8 -1
- snowflake/ml/modeling/cluster/birch.py +8 -1
- snowflake/ml/modeling/cluster/bisecting_k_means.py +8 -1
- snowflake/ml/modeling/cluster/dbscan.py +8 -1
- snowflake/ml/modeling/cluster/feature_agglomeration.py +8 -1
- snowflake/ml/modeling/cluster/k_means.py +8 -1
- snowflake/ml/modeling/cluster/mean_shift.py +8 -1
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +8 -1
- snowflake/ml/modeling/cluster/optics.py +8 -1
- snowflake/ml/modeling/cluster/spectral_biclustering.py +8 -1
- snowflake/ml/modeling/cluster/spectral_clustering.py +8 -1
- snowflake/ml/modeling/cluster/spectral_coclustering.py +8 -1
- snowflake/ml/modeling/compose/column_transformer.py +8 -1
- snowflake/ml/modeling/compose/transformed_target_regressor.py +8 -1
- snowflake/ml/modeling/covariance/elliptic_envelope.py +8 -1
- snowflake/ml/modeling/covariance/empirical_covariance.py +8 -1
- snowflake/ml/modeling/covariance/graphical_lasso.py +8 -1
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +8 -1
- snowflake/ml/modeling/covariance/ledoit_wolf.py +8 -1
- snowflake/ml/modeling/covariance/min_cov_det.py +8 -1
- snowflake/ml/modeling/covariance/oas.py +8 -1
- snowflake/ml/modeling/covariance/shrunk_covariance.py +8 -1
- snowflake/ml/modeling/decomposition/dictionary_learning.py +8 -1
- snowflake/ml/modeling/decomposition/factor_analysis.py +8 -1
- snowflake/ml/modeling/decomposition/fast_ica.py +8 -1
- snowflake/ml/modeling/decomposition/incremental_pca.py +8 -1
- snowflake/ml/modeling/decomposition/kernel_pca.py +8 -1
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +8 -1
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +8 -1
- snowflake/ml/modeling/decomposition/pca.py +8 -1
- snowflake/ml/modeling/decomposition/sparse_pca.py +8 -1
- snowflake/ml/modeling/decomposition/truncated_svd.py +8 -1
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +8 -1
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +8 -1
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +8 -1
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +8 -1
- snowflake/ml/modeling/ensemble/bagging_classifier.py +8 -1
- snowflake/ml/modeling/ensemble/bagging_regressor.py +8 -1
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +8 -1
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +8 -1
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +8 -1
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +8 -1
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +8 -1
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +8 -1
- snowflake/ml/modeling/ensemble/isolation_forest.py +8 -1
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +8 -1
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +8 -1
- snowflake/ml/modeling/ensemble/stacking_regressor.py +8 -1
- snowflake/ml/modeling/ensemble/voting_classifier.py +8 -1
- snowflake/ml/modeling/ensemble/voting_regressor.py +8 -1
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +8 -1
- snowflake/ml/modeling/feature_selection/select_fdr.py +8 -1
- snowflake/ml/modeling/feature_selection/select_fpr.py +8 -1
- snowflake/ml/modeling/feature_selection/select_fwe.py +8 -1
- snowflake/ml/modeling/feature_selection/select_k_best.py +8 -1
- snowflake/ml/modeling/feature_selection/select_percentile.py +8 -1
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +8 -1
- snowflake/ml/modeling/feature_selection/variance_threshold.py +8 -1
- snowflake/ml/modeling/framework/base.py +4 -3
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +8 -1
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +8 -1
- snowflake/ml/modeling/impute/iterative_imputer.py +8 -1
- snowflake/ml/modeling/impute/knn_imputer.py +8 -1
- snowflake/ml/modeling/impute/missing_indicator.py +8 -1
- snowflake/ml/modeling/impute/simple_imputer.py +21 -2
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +8 -1
- snowflake/ml/modeling/kernel_approximation/nystroem.py +8 -1
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +8 -1
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +8 -1
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +8 -1
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +8 -1
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +8 -1
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +8 -1
- snowflake/ml/modeling/linear_model/ard_regression.py +8 -1
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +8 -1
- snowflake/ml/modeling/linear_model/elastic_net.py +8 -1
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +8 -1
- snowflake/ml/modeling/linear_model/gamma_regressor.py +8 -1
- snowflake/ml/modeling/linear_model/huber_regressor.py +8 -1
- snowflake/ml/modeling/linear_model/lars.py +8 -1
- snowflake/ml/modeling/linear_model/lars_cv.py +8 -1
- snowflake/ml/modeling/linear_model/lasso.py +8 -1
- snowflake/ml/modeling/linear_model/lasso_cv.py +8 -1
- snowflake/ml/modeling/linear_model/lasso_lars.py +8 -1
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +8 -1
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +8 -1
- snowflake/ml/modeling/linear_model/linear_regression.py +8 -1
- snowflake/ml/modeling/linear_model/logistic_regression.py +8 -1
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +8 -1
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +8 -1
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +8 -1
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +8 -1
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +8 -1
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +8 -1
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +8 -1
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +8 -1
- snowflake/ml/modeling/linear_model/perceptron.py +8 -1
- snowflake/ml/modeling/linear_model/poisson_regressor.py +8 -1
- snowflake/ml/modeling/linear_model/ransac_regressor.py +8 -1
- snowflake/ml/modeling/linear_model/ridge.py +8 -1
- snowflake/ml/modeling/linear_model/ridge_classifier.py +8 -1
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +8 -1
- snowflake/ml/modeling/linear_model/ridge_cv.py +8 -1
- snowflake/ml/modeling/linear_model/sgd_classifier.py +8 -1
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +8 -1
- snowflake/ml/modeling/linear_model/sgd_regressor.py +8 -1
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +8 -1
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +8 -1
- snowflake/ml/modeling/manifold/isomap.py +8 -1
- snowflake/ml/modeling/manifold/mds.py +8 -1
- snowflake/ml/modeling/manifold/spectral_embedding.py +8 -1
- snowflake/ml/modeling/manifold/tsne.py +8 -1
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +8 -1
- snowflake/ml/modeling/mixture/gaussian_mixture.py +8 -1
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +8 -1
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +8 -1
- snowflake/ml/modeling/multiclass/output_code_classifier.py +8 -1
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +8 -1
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +8 -1
- snowflake/ml/modeling/naive_bayes/complement_nb.py +8 -1
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +8 -1
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +8 -1
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +8 -1
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +8 -1
- snowflake/ml/modeling/neighbors/kernel_density.py +8 -1
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +8 -1
- snowflake/ml/modeling/neighbors/nearest_centroid.py +8 -1
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +8 -1
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +8 -1
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +8 -1
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +8 -1
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +8 -1
- snowflake/ml/modeling/neural_network/mlp_classifier.py +8 -1
- snowflake/ml/modeling/neural_network/mlp_regressor.py +8 -1
- snowflake/ml/modeling/parameters/enable_anonymous_sproc.py +5 -0
- snowflake/ml/modeling/pipeline/pipeline.py +27 -7
- snowflake/ml/modeling/preprocessing/polynomial_features.py +8 -1
- snowflake/ml/modeling/semi_supervised/label_propagation.py +8 -1
- snowflake/ml/modeling/semi_supervised/label_spreading.py +8 -1
- snowflake/ml/modeling/svm/linear_svc.py +8 -1
- snowflake/ml/modeling/svm/linear_svr.py +8 -1
- snowflake/ml/modeling/svm/nu_svc.py +8 -1
- snowflake/ml/modeling/svm/nu_svr.py +8 -1
- snowflake/ml/modeling/svm/svc.py +8 -1
- snowflake/ml/modeling/svm/svr.py +8 -1
- snowflake/ml/modeling/tree/decision_tree_classifier.py +8 -1
- snowflake/ml/modeling/tree/decision_tree_regressor.py +8 -1
- snowflake/ml/modeling/tree/extra_tree_classifier.py +8 -1
- snowflake/ml/modeling/tree/extra_tree_regressor.py +8 -1
- snowflake/ml/modeling/xgboost/xgb_classifier.py +8 -1
- snowflake/ml/modeling/xgboost/xgb_regressor.py +8 -1
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +8 -1
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +8 -1
- snowflake/ml/registry/_manager/model_manager.py +95 -8
- snowflake/ml/registry/registry.py +10 -1
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.5.0.dist-info → snowflake_ml_python-1.5.2.dist-info}/METADATA +66 -10
- {snowflake_ml_python-1.5.0.dist-info → snowflake_ml_python-1.5.2.dist-info}/RECORD +196 -192
- snowflake/ml/_internal/lineage/dataset_dataframe.py +0 -44
- {snowflake_ml_python-1.5.0.dist-info → snowflake_ml_python-1.5.2.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.5.0.dist-info → snowflake_ml_python-1.5.2.dist-info}/WHEEL +0 -0
- {snowflake_ml_python-1.5.0.dist-info → snowflake_ml_python-1.5.2.dist-info}/top_level.txt +0 -0
@@ -8,7 +8,19 @@ import re
|
|
8
8
|
import warnings
|
9
9
|
from dataclasses import dataclass
|
10
10
|
from enum import Enum
|
11
|
-
from typing import
|
11
|
+
from typing import (
|
12
|
+
Any,
|
13
|
+
Callable,
|
14
|
+
Dict,
|
15
|
+
List,
|
16
|
+
Literal,
|
17
|
+
Optional,
|
18
|
+
Tuple,
|
19
|
+
TypeVar,
|
20
|
+
Union,
|
21
|
+
cast,
|
22
|
+
overload,
|
23
|
+
)
|
12
24
|
|
13
25
|
import packaging.version as pkg_version
|
14
26
|
import snowflake.ml.version as snowml_version
|
@@ -32,7 +44,7 @@ from snowflake.ml.feature_store.entity import _ENTITY_NAME_LENGTH_LIMIT, Entity
|
|
32
44
|
from snowflake.ml.feature_store.feature_view import (
|
33
45
|
_FEATURE_OBJ_TYPE,
|
34
46
|
_FEATURE_VIEW_NAME_DELIMITER,
|
35
|
-
|
47
|
+
_LEGACY_TIMESTAMP_COL_PLACEHOLDER_VALS,
|
36
48
|
FeatureView,
|
37
49
|
FeatureViewSlice,
|
38
50
|
FeatureViewStatus,
|
@@ -242,23 +254,16 @@ class FeatureStore:
|
|
242
254
|
|
243
255
|
else:
|
244
256
|
try:
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
]
|
252
|
-
):
|
257
|
+
# Explicitly check if schema exists first since we may not have CREATE SCHEMA privilege
|
258
|
+
if len(self._find_object("SCHEMAS", self._config.schema)) == 0:
|
259
|
+
self._session.sql(f"CREATE SCHEMA IF NOT EXISTS {self._config.full_schema_path}").collect(
|
260
|
+
statement_params=self._telemetry_stmp
|
261
|
+
)
|
262
|
+
for tag in to_sql_identifiers([_FEATURE_VIEW_METADATA_TAG, _FEATURE_STORE_OBJECT_TAG]):
|
253
263
|
self._session.sql(f"CREATE TAG IF NOT EXISTS {self._get_fully_qualified_name(tag)}").collect(
|
254
264
|
statement_params=self._telemetry_stmp
|
255
265
|
)
|
256
|
-
|
257
|
-
self._session.sql(
|
258
|
-
f"CREATE TAG IF NOT EXISTS {self._get_fully_qualified_name(_FEATURE_STORE_OBJECT_TAG)}"
|
259
|
-
).collect(statement_params=self._telemetry_stmp)
|
260
266
|
except Exception as e:
|
261
|
-
self.clear()
|
262
267
|
raise snowml_exceptions.SnowflakeMLException(
|
263
268
|
error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
|
264
269
|
original_exception=RuntimeError(f"Failed to create feature store {name}: {e}."),
|
@@ -750,7 +755,7 @@ class FeatureStore:
|
|
750
755
|
except Exception as e:
|
751
756
|
raise snowml_exceptions.SnowflakeMLException(
|
752
757
|
error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
|
753
|
-
original_exception=RuntimeError(f"Failed to
|
758
|
+
original_exception=RuntimeError(f"Failed to delete entity: {e}."),
|
754
759
|
) from e
|
755
760
|
logger.info(f"Deleted Entity {name}.")
|
756
761
|
|
@@ -802,7 +807,7 @@ class FeatureStore:
|
|
802
807
|
|
803
808
|
return df
|
804
809
|
|
805
|
-
@
|
810
|
+
@overload
|
806
811
|
def generate_dataset(
|
807
812
|
self,
|
808
813
|
name: str,
|
@@ -814,7 +819,40 @@ class FeatureStore:
|
|
814
819
|
exclude_columns: Optional[List[str]] = None,
|
815
820
|
include_feature_view_timestamp_col: bool = False,
|
816
821
|
desc: str = "",
|
822
|
+
output_type: Literal["dataset"] = "dataset",
|
817
823
|
) -> dataset.Dataset:
|
824
|
+
...
|
825
|
+
|
826
|
+
@overload
|
827
|
+
def generate_dataset(
|
828
|
+
self,
|
829
|
+
name: str,
|
830
|
+
spine_df: DataFrame,
|
831
|
+
features: List[Union[FeatureView, FeatureViewSlice]],
|
832
|
+
output_type: Literal["table"],
|
833
|
+
version: Optional[str] = None,
|
834
|
+
spine_timestamp_col: Optional[str] = None,
|
835
|
+
spine_label_cols: Optional[List[str]] = None,
|
836
|
+
exclude_columns: Optional[List[str]] = None,
|
837
|
+
include_feature_view_timestamp_col: bool = False,
|
838
|
+
desc: str = "",
|
839
|
+
) -> DataFrame:
|
840
|
+
...
|
841
|
+
|
842
|
+
@dispatch_decorator() # type: ignore[misc]
|
843
|
+
def generate_dataset(
|
844
|
+
self,
|
845
|
+
name: str,
|
846
|
+
spine_df: DataFrame,
|
847
|
+
features: List[Union[FeatureView, FeatureViewSlice]],
|
848
|
+
version: Optional[str] = None,
|
849
|
+
spine_timestamp_col: Optional[str] = None,
|
850
|
+
spine_label_cols: Optional[List[str]] = None,
|
851
|
+
exclude_columns: Optional[List[str]] = None,
|
852
|
+
include_feature_view_timestamp_col: bool = False,
|
853
|
+
desc: str = "",
|
854
|
+
output_type: Literal["dataset", "table"] = "dataset",
|
855
|
+
) -> Union[dataset.Dataset, DataFrame]:
|
818
856
|
"""
|
819
857
|
Generate dataset by given source table and feature views.
|
820
858
|
|
@@ -834,30 +872,29 @@ class FeatureStore:
|
|
834
872
|
include_feature_view_timestamp_col: Generated dataset will include timestamp column of feature view
|
835
873
|
(if feature view has timestamp column) if set true. Default to false.
|
836
874
|
desc: A description about this dataset.
|
875
|
+
output_type: The type of Snowflake storage to use for the generated training data.
|
837
876
|
|
838
877
|
Returns:
|
839
|
-
|
878
|
+
If output_type is "dataset" (default), returns a Dataset object.
|
879
|
+
If output_type is "table", returns a Snowpark DataFrame representing the table.
|
840
880
|
|
841
881
|
Raises:
|
842
|
-
SnowflakeMLException: [ValueError] spine_df contains more than one query.
|
843
882
|
SnowflakeMLException: [ValueError] Dataset name/version already exists
|
844
883
|
SnowflakeMLException: [ValueError] Snapshot creation failed.
|
884
|
+
SnowflakeMLException: [ValueError] Invalid output_type specified.
|
845
885
|
SnowflakeMLException: [RuntimeError] Failed to create clone from table.
|
846
886
|
SnowflakeMLException: [RuntimeError] Failed to find resources.
|
847
887
|
"""
|
888
|
+
if output_type not in {"table", "dataset"}:
|
889
|
+
raise snowml_exceptions.SnowflakeMLException(
|
890
|
+
error_code=error_codes.INVALID_ARGUMENT,
|
891
|
+
original_exception=ValueError(f"Invalid output_type: {output_type}."),
|
892
|
+
)
|
848
893
|
if spine_timestamp_col is not None:
|
849
894
|
spine_timestamp_col = SqlIdentifier(spine_timestamp_col)
|
850
895
|
if spine_label_cols is not None:
|
851
896
|
spine_label_cols = to_sql_identifiers(spine_label_cols) # type: ignore[assignment]
|
852
897
|
|
853
|
-
if len(spine_df.queries["queries"]) != 1:
|
854
|
-
raise snowml_exceptions.SnowflakeMLException(
|
855
|
-
error_code=error_codes.INVALID_ARGUMENT,
|
856
|
-
original_exception=ValueError(
|
857
|
-
f"spine_df must contain only one query. Got: {spine_df.queries['queries']}"
|
858
|
-
),
|
859
|
-
)
|
860
|
-
|
861
898
|
result_df, join_keys = self._join_features(
|
862
899
|
spine_df, features, spine_timestamp_col, include_feature_view_timestamp_col
|
863
900
|
)
|
@@ -875,33 +912,49 @@ class FeatureStore:
|
|
875
912
|
result_df = self._exclude_columns(result_df, exclude_columns)
|
876
913
|
|
877
914
|
fs_meta = FeatureStoreMetadata(
|
878
|
-
spine_query=spine_df.queries["queries"][
|
915
|
+
spine_query=spine_df.queries["queries"][-1],
|
879
916
|
serialized_feature_views=[fv.to_json() for fv in features],
|
880
917
|
spine_timestamp_col=spine_timestamp_col,
|
881
918
|
)
|
882
919
|
|
883
920
|
try:
|
884
|
-
|
885
|
-
|
886
|
-
|
887
|
-
|
888
|
-
|
889
|
-
|
890
|
-
|
891
|
-
|
892
|
-
|
893
|
-
|
894
|
-
|
921
|
+
if output_type == "table":
|
922
|
+
table_name = f"{name}_{version}"
|
923
|
+
result_df.write.mode("errorifexists").save_as_table(table_name) # type: ignore[call-overload]
|
924
|
+
ds_df = self._session.table(table_name)
|
925
|
+
return ds_df
|
926
|
+
else:
|
927
|
+
assert output_type == "dataset"
|
928
|
+
if not self._is_dataset_enabled():
|
929
|
+
raise snowml_exceptions.SnowflakeMLException(
|
930
|
+
error_code=error_codes.SNOWML_CREATE_FAILED,
|
931
|
+
original_exception=RuntimeError(
|
932
|
+
"Dataset is not enabled in your account. Ask your account admin to set"
|
933
|
+
' FEATURE_DATASET=ENABLED or set output_type="table" to generate the data'
|
934
|
+
" as a Snowflake Table instead."
|
935
|
+
),
|
936
|
+
)
|
937
|
+
ds: dataset.Dataset = dataset.create_from_dataframe(
|
938
|
+
self._session,
|
939
|
+
name,
|
940
|
+
version,
|
941
|
+
input_dataframe=result_df,
|
942
|
+
exclude_cols=[spine_timestamp_col],
|
943
|
+
label_cols=spine_label_cols,
|
944
|
+
properties=fs_meta,
|
945
|
+
comment=desc,
|
946
|
+
)
|
947
|
+
return ds
|
895
948
|
|
896
949
|
except dataset_errors.DatasetExistError as e:
|
897
950
|
raise snowml_exceptions.SnowflakeMLException(
|
898
951
|
error_code=error_codes.OBJECT_ALREADY_EXISTS,
|
899
|
-
original_exception=
|
952
|
+
original_exception=RuntimeError(str(e)),
|
900
953
|
) from e
|
901
954
|
except SnowparkSQLException as e:
|
902
955
|
raise snowml_exceptions.SnowflakeMLException(
|
903
956
|
error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
|
904
|
-
original_exception=RuntimeError(f"An error occurred during
|
957
|
+
original_exception=RuntimeError(f"An error occurred during dataset generation: {e}."),
|
905
958
|
) from e
|
906
959
|
|
907
960
|
@dispatch_decorator()
|
@@ -930,52 +983,47 @@ class FeatureStore:
|
|
930
983
|
return self._load_serialized_feature_objects(source_meta.properties.serialized_feature_views)
|
931
984
|
|
932
985
|
@dispatch_decorator()
|
933
|
-
def
|
986
|
+
def _clear(self, dryrun: bool = True) -> None:
|
934
987
|
"""
|
935
|
-
Clear all feature
|
936
|
-
|
988
|
+
Clear all feature views and entities. Note Feature Store schema and metadata will NOT be purged
|
989
|
+
together. Use SQL to delete schema and metadata instead.
|
937
990
|
|
938
|
-
|
939
|
-
|
991
|
+
Args:
|
992
|
+
dryrun: Print a list of objects will be deleted but not actually perform the deletion when true.
|
940
993
|
"""
|
941
|
-
|
942
|
-
|
943
|
-
|
944
|
-
|
945
|
-
|
946
|
-
|
947
|
-
|
948
|
-
|
949
|
-
|
950
|
-
|
951
|
-
|
952
|
-
|
953
|
-
|
954
|
-
|
955
|
-
|
956
|
-
|
957
|
-
|
958
|
-
|
959
|
-
|
960
|
-
|
961
|
-
|
962
|
-
|
963
|
-
|
964
|
-
|
965
|
-
|
966
|
-
|
967
|
-
|
968
|
-
|
969
|
-
|
970
|
-
|
971
|
-
|
972
|
-
|
994
|
+
warnings.warn(
|
995
|
+
"It will clear ALL feature views and entities in this Feature Store. Make sure your role"
|
996
|
+
" has sufficient access to all feature views and entities. Insufficient access to some feature"
|
997
|
+
" views or entities will leave Feature Store in an incomplete state.",
|
998
|
+
stacklevel=2,
|
999
|
+
category=UserWarning,
|
1000
|
+
)
|
1001
|
+
|
1002
|
+
all_fvs_df = self.list_feature_views()
|
1003
|
+
all_entities_df = self.list_entities()
|
1004
|
+
all_fvs_rows = all_fvs_df.collect()
|
1005
|
+
all_entities_rows = all_entities_df.collect()
|
1006
|
+
|
1007
|
+
if dryrun:
|
1008
|
+
logger.info(
|
1009
|
+
"Following feature views and entities will be deleted."
|
1010
|
+
+ " Set 'dryrun=False' to perform the actual deletion."
|
1011
|
+
)
|
1012
|
+
logger.info(f"Total {len(all_fvs_rows)} Feature views to be deleted:")
|
1013
|
+
all_fvs_df.show(n=len(all_fvs_rows))
|
1014
|
+
logger.info(f"\nTotal {len(all_entities_rows)} entities to be deleted:")
|
1015
|
+
all_entities_df.show(n=len(all_entities_rows))
|
1016
|
+
return
|
1017
|
+
|
1018
|
+
for fv_row in all_fvs_rows:
|
1019
|
+
fv = self.get_feature_view(
|
1020
|
+
SqlIdentifier(fv_row["NAME"], case_sensitive=True).identifier(), fv_row["VERSION"]
|
1021
|
+
)
|
1022
|
+
self.delete_feature_view(fv)
|
1023
|
+
|
1024
|
+
for entity_row in all_entities_rows:
|
1025
|
+
self.delete_entity(SqlIdentifier(entity_row["NAME"], case_sensitive=True).identifier())
|
973
1026
|
|
974
|
-
except Exception as e:
|
975
|
-
raise snowml_exceptions.SnowflakeMLException(
|
976
|
-
error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
|
977
|
-
original_exception=RuntimeError(f"Failed to clear feature store {self._config.full_schema_path}: {e}."),
|
978
|
-
) from e
|
979
1027
|
logger.info(f"Feature store {self._config.full_schema_path} has been cleared.")
|
980
1028
|
|
981
1029
|
def _get_feature_view_if_exists(self, name: str, version: str) -> FeatureView:
|
@@ -1093,14 +1141,6 @@ class FeatureStore:
|
|
1093
1141
|
spine_timestamp_col: Optional[SqlIdentifier],
|
1094
1142
|
include_feature_view_timestamp_col: bool,
|
1095
1143
|
) -> Tuple[DataFrame, List[SqlIdentifier]]:
|
1096
|
-
if len(spine_df.queries["queries"]) != 1:
|
1097
|
-
raise snowml_exceptions.SnowflakeMLException(
|
1098
|
-
error_code=error_codes.INVALID_ARGUMENT,
|
1099
|
-
original_exception=ValueError(
|
1100
|
-
f"spine_df must contain only one query. Got: {spine_df.queries['queries']}"
|
1101
|
-
),
|
1102
|
-
)
|
1103
|
-
|
1104
1144
|
for f in features:
|
1105
1145
|
f = f.feature_view_ref if isinstance(f, FeatureViewSlice) else f
|
1106
1146
|
if f.status == FeatureViewStatus.DRAFT:
|
@@ -1122,7 +1162,7 @@ class FeatureStore:
|
|
1122
1162
|
self._asof_join_enabled = self._is_asof_join_enabled()
|
1123
1163
|
|
1124
1164
|
# TODO: leverage Snowpark dataframe for more concise syntax once it supports AsOfJoin
|
1125
|
-
query = spine_df.queries["queries"][
|
1165
|
+
query = spine_df.queries["queries"][-1]
|
1126
1166
|
layer = 0
|
1127
1167
|
for f in features:
|
1128
1168
|
if isinstance(f, FeatureViewSlice):
|
@@ -1180,7 +1220,15 @@ class FeatureStore:
|
|
1180
1220
|
"""
|
1181
1221
|
layer += 1
|
1182
1222
|
|
1183
|
-
|
1223
|
+
# TODO: construct result dataframe with datframe APIs once ASOF join is supported natively.
|
1224
|
+
# Below code manually construct result dataframe from private members of spine dataframe, which
|
1225
|
+
# likely will cause unintentional issues. This setp is needed because spine_df might contains
|
1226
|
+
# prerequisite queries and post actions that must be carried over to result dataframe.
|
1227
|
+
result_df = self._session.sql(query)
|
1228
|
+
result_df._plan.queries = spine_df._plan.queries[:-1] + result_df._plan.queries
|
1229
|
+
result_df._plan.post_actions = spine_df._plan.post_actions
|
1230
|
+
|
1231
|
+
return result_df, join_keys
|
1184
1232
|
|
1185
1233
|
def _check_database_exists_or_throw(self) -> None:
|
1186
1234
|
resolved_db_name = self._config.database.resolved()
|
@@ -1517,6 +1565,9 @@ class FeatureStore:
|
|
1517
1565
|
original_exception=RuntimeError(f"Failed to parse query text for FeatureView {name}/{version}: {row}."),
|
1518
1566
|
)
|
1519
1567
|
|
1568
|
+
fv_name = FeatureView._get_physical_name(name, version)
|
1569
|
+
infer_schema_df = self._session.sql(f"SELECT * FROM {self._get_fully_qualified_name(fv_name)}")
|
1570
|
+
|
1520
1571
|
if m.group("obj_type") == "DYNAMIC TABLE":
|
1521
1572
|
query = m.group("query")
|
1522
1573
|
df = self._session.sql(query)
|
@@ -1524,7 +1575,7 @@ class FeatureStore:
|
|
1524
1575
|
fv_metadata = _FeatureViewMetadata.from_json(m.group("fv_metadata"))
|
1525
1576
|
entities = [find_and_compose_entity(n) for n in fv_metadata.entities]
|
1526
1577
|
ts_col = fv_metadata.timestamp_col
|
1527
|
-
timestamp_col = ts_col if ts_col
|
1578
|
+
timestamp_col = ts_col if ts_col not in _LEGACY_TIMESTAMP_COL_PLACEHOLDER_VALS else None
|
1528
1579
|
|
1529
1580
|
fv = FeatureView._construct_feature_view(
|
1530
1581
|
name=name,
|
@@ -1534,9 +1585,7 @@ class FeatureStore:
|
|
1534
1585
|
desc=desc,
|
1535
1586
|
version=version,
|
1536
1587
|
status=FeatureViewStatus(row["scheduling_state"]),
|
1537
|
-
feature_descs=self._fetch_column_descs(
|
1538
|
-
"DYNAMIC TABLE", SqlIdentifier(row["name"], case_sensitive=True)
|
1539
|
-
),
|
1588
|
+
feature_descs=self._fetch_column_descs("DYNAMIC TABLE", fv_name),
|
1540
1589
|
refresh_freq=row["target_lag"],
|
1541
1590
|
database=self._config.database.identifier(),
|
1542
1591
|
schema=self._config.schema.identifier(),
|
@@ -1544,6 +1593,7 @@ class FeatureStore:
|
|
1544
1593
|
refresh_mode=row["refresh_mode"],
|
1545
1594
|
refresh_mode_reason=row["refresh_mode_reason"],
|
1546
1595
|
owner=row["owner"],
|
1596
|
+
infer_schema_df=infer_schema_df,
|
1547
1597
|
)
|
1548
1598
|
return fv
|
1549
1599
|
else:
|
@@ -1553,7 +1603,7 @@ class FeatureStore:
|
|
1553
1603
|
fv_metadata = _FeatureViewMetadata.from_json(m.group("fv_metadata"))
|
1554
1604
|
entities = [find_and_compose_entity(n) for n in fv_metadata.entities]
|
1555
1605
|
ts_col = fv_metadata.timestamp_col
|
1556
|
-
timestamp_col = ts_col if ts_col
|
1606
|
+
timestamp_col = ts_col if ts_col not in _LEGACY_TIMESTAMP_COL_PLACEHOLDER_VALS else None
|
1557
1607
|
|
1558
1608
|
fv = FeatureView._construct_feature_view(
|
1559
1609
|
name=name,
|
@@ -1563,7 +1613,7 @@ class FeatureStore:
|
|
1563
1613
|
desc=desc,
|
1564
1614
|
version=version,
|
1565
1615
|
status=FeatureViewStatus.STATIC,
|
1566
|
-
feature_descs=self._fetch_column_descs("VIEW",
|
1616
|
+
feature_descs=self._fetch_column_descs("VIEW", fv_name),
|
1567
1617
|
refresh_freq=None,
|
1568
1618
|
database=self._config.database.identifier(),
|
1569
1619
|
schema=self._config.schema.identifier(),
|
@@ -1571,6 +1621,7 @@ class FeatureStore:
|
|
1571
1621
|
refresh_mode=None,
|
1572
1622
|
refresh_mode_reason=None,
|
1573
1623
|
owner=row["owner"],
|
1624
|
+
infer_schema_df=infer_schema_df,
|
1574
1625
|
)
|
1575
1626
|
return fv
|
1576
1627
|
|
@@ -1710,7 +1761,7 @@ class FeatureStore:
|
|
1710
1761
|
self._session.sql(
|
1711
1762
|
f"""
|
1712
1763
|
SELECT * FROM TABLE(
|
1713
|
-
INFORMATION_SCHEMA.TAG_REFERENCES_INTERNAL(
|
1764
|
+
{self._config.database}.INFORMATION_SCHEMA.TAG_REFERENCES_INTERNAL(
|
1714
1765
|
TAG_NAME => '{_FEATURE_STORE_OBJECT_TAG}'
|
1715
1766
|
)
|
1716
1767
|
) LIMIT 1;
|
@@ -1720,6 +1771,15 @@ class FeatureStore:
|
|
1720
1771
|
except Exception:
|
1721
1772
|
return False
|
1722
1773
|
|
1774
|
+
def _is_dataset_enabled(self) -> bool:
|
1775
|
+
try:
|
1776
|
+
self._session.sql(f"SHOW DATASETS IN SCHEMA {self._config.full_schema_path}").collect()
|
1777
|
+
return True
|
1778
|
+
except SnowparkSQLException as e:
|
1779
|
+
if "'DATASETS' does not exist" in e.message:
|
1780
|
+
return False
|
1781
|
+
raise
|
1782
|
+
|
1723
1783
|
def _check_feature_store_object_versions(self) -> None:
|
1724
1784
|
versions = self._collapse_object_versions()
|
1725
1785
|
if len(versions) > 0 and pkg_version.parse(snowml_version.VERSION) < versions[0]:
|
@@ -5,12 +5,8 @@ import re
|
|
5
5
|
from collections import OrderedDict
|
6
6
|
from dataclasses import asdict, dataclass
|
7
7
|
from enum import Enum
|
8
|
-
from typing import Dict, List, Optional
|
8
|
+
from typing import Any, Dict, List, Optional
|
9
9
|
|
10
|
-
from snowflake.ml._internal.exceptions import (
|
11
|
-
error_codes,
|
12
|
-
exceptions as snowml_exceptions,
|
13
|
-
)
|
14
10
|
from snowflake.ml._internal.utils.identifier import concat_names
|
15
11
|
from snowflake.ml._internal.utils.sql_identifier import (
|
16
12
|
SqlIdentifier,
|
@@ -27,12 +23,18 @@ from snowflake.snowpark.types import (
|
|
27
23
|
)
|
28
24
|
|
29
25
|
_FEATURE_VIEW_NAME_DELIMITER = "$"
|
30
|
-
|
26
|
+
_LEGACY_TIMESTAMP_COL_PLACEHOLDER_VALS = ["FS_TIMESTAMP_COL_PLACEHOLDER_VAL", "NULL"]
|
27
|
+
_TIMESTAMP_COL_PLACEHOLDER = "NULL"
|
31
28
|
_FEATURE_OBJ_TYPE = "FEATURE_OBJ_TYPE"
|
32
29
|
# Feature view version rule is aligned with dataset version rule in SQL.
|
33
30
|
_FEATURE_VIEW_VERSION_RE = re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_.\-]*$")
|
34
31
|
_FEATURE_VIEW_VERSION_MAX_LENGTH = 128
|
35
32
|
|
33
|
+
_RESULT_SCAN_QUERY_PATTERN = re.compile(
|
34
|
+
r".*FROM\s*TABLE\s*\(\s*RESULT_SCAN\s*\(.*",
|
35
|
+
flags=re.DOTALL | re.IGNORECASE | re.X,
|
36
|
+
)
|
37
|
+
|
36
38
|
|
37
39
|
@dataclass(frozen=True)
|
38
40
|
class _FeatureViewMetadata:
|
@@ -53,13 +55,10 @@ class _FeatureViewMetadata:
|
|
53
55
|
class FeatureViewVersion(str):
|
54
56
|
def __new__(cls, version: str) -> FeatureViewVersion:
|
55
57
|
if not _FEATURE_VIEW_VERSION_RE.match(version) or len(version) > _FEATURE_VIEW_VERSION_MAX_LENGTH:
|
56
|
-
raise
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
"It must start with letter or digit, and followed by letter, digit, '_', '-' or '.'. "
|
61
|
-
f"The length limit is {_FEATURE_VIEW_VERSION_MAX_LENGTH}."
|
62
|
-
),
|
58
|
+
raise ValueError(
|
59
|
+
f"`{version}` is not a valid feature view version. "
|
60
|
+
"It must start with letter or digit, and followed by letter, digit, '_', '-' or '.'. "
|
61
|
+
f"The length limit is {_FEATURE_VIEW_VERSION_MAX_LENGTH}."
|
63
62
|
)
|
64
63
|
return super().__new__(cls, version)
|
65
64
|
|
@@ -121,12 +120,13 @@ class FeatureView:
|
|
121
120
|
timestamp_col: Optional[str] = None,
|
122
121
|
refresh_freq: Optional[str] = None,
|
123
122
|
desc: str = "",
|
123
|
+
**_kwargs: Any,
|
124
124
|
) -> None:
|
125
125
|
"""
|
126
126
|
Create a FeatureView instance.
|
127
127
|
|
128
128
|
Args:
|
129
|
-
name: name of the FeatureView. NOTE:
|
129
|
+
name: name of the FeatureView. NOTE: following Snowflake identifier rule
|
130
130
|
entities: entities that the FeatureView is associated with.
|
131
131
|
feature_df: Snowpark DataFrame containing data source and all feature feature_df logics.
|
132
132
|
Final projection of the DataFrame should contain feature names, join keys and timestamp(if applicable).
|
@@ -140,6 +140,7 @@ class FeatureView:
|
|
140
140
|
NOTE: If refresh_freq is not provided, then FeatureView will be registered as View on Snowflake backend
|
141
141
|
and there won't be extra storage cost.
|
142
142
|
desc: description of the FeatureView.
|
143
|
+
_kwargs: reserved kwargs for system generated args. NOTE: DO NOT USE.
|
143
144
|
"""
|
144
145
|
|
145
146
|
self._name: SqlIdentifier = SqlIdentifier(name)
|
@@ -149,6 +150,7 @@ class FeatureView:
|
|
149
150
|
SqlIdentifier(timestamp_col) if timestamp_col is not None else None
|
150
151
|
)
|
151
152
|
self._desc: str = desc
|
153
|
+
self._infer_schema_df: DataFrame = _kwargs.get("_infer_schema_df", self._feature_df)
|
152
154
|
self._query: str = self._get_query()
|
153
155
|
self._version: Optional[FeatureViewVersion] = None
|
154
156
|
self._status: FeatureViewStatus = FeatureViewStatus.DRAFT
|
@@ -295,7 +297,7 @@ class FeatureView:
|
|
295
297
|
|
296
298
|
@property
|
297
299
|
def output_schema(self) -> StructType:
|
298
|
-
return self.
|
300
|
+
return self._infer_schema_df.schema
|
299
301
|
|
300
302
|
@property
|
301
303
|
def refresh_mode(self) -> Optional[str]:
|
@@ -329,7 +331,7 @@ Got {len(self._feature_df.queries['queries'])}: {self._feature_df.queries['queri
|
|
329
331
|
f"FeatureView name `{self._name}` contains invalid character `{_FEATURE_VIEW_NAME_DELIMITER}`."
|
330
332
|
)
|
331
333
|
|
332
|
-
unescaped_df_cols = to_sql_identifiers(self.
|
334
|
+
unescaped_df_cols = to_sql_identifiers(self._infer_schema_df.columns)
|
333
335
|
for e in self._entities:
|
334
336
|
for k in e.join_keys:
|
335
337
|
if k not in unescaped_df_cols:
|
@@ -341,17 +343,20 @@ Got {len(self._feature_df.queries['queries'])}: {self._feature_df.queries['queri
|
|
341
343
|
ts_col = self._timestamp_col
|
342
344
|
if ts_col == SqlIdentifier(_TIMESTAMP_COL_PLACEHOLDER):
|
343
345
|
raise ValueError(f"Invalid timestamp_col name, cannot be {_TIMESTAMP_COL_PLACEHOLDER}.")
|
344
|
-
if ts_col not in to_sql_identifiers(self.
|
346
|
+
if ts_col not in to_sql_identifiers(self._infer_schema_df.columns):
|
345
347
|
raise ValueError(f"timestamp_col {ts_col} is not found in input dataframe.")
|
346
348
|
|
347
|
-
col_type = self.
|
349
|
+
col_type = self._infer_schema_df.schema[ts_col].datatype
|
348
350
|
if not isinstance(col_type, (DateType, TimeType, TimestampType, _NumericType)):
|
349
351
|
raise ValueError(f"Invalid data type for timestamp_col {ts_col}: {col_type}.")
|
350
352
|
|
353
|
+
if re.match(_RESULT_SCAN_QUERY_PATTERN, self._query) is not None:
|
354
|
+
raise ValueError(f"feature_df should not be reading from RESULT_SCAN. Invalid query: {self._query}")
|
355
|
+
|
351
356
|
def _get_feature_names(self) -> List[SqlIdentifier]:
|
352
357
|
join_keys = [k for e in self._entities for k in e.join_keys]
|
353
358
|
ts_col = [self._timestamp_col] if self._timestamp_col is not None else []
|
354
|
-
feature_names = to_sql_identifiers(self.
|
359
|
+
feature_names = to_sql_identifiers(self._infer_schema_df.columns, case_sensitive=False)
|
355
360
|
return [c for c in feature_names if c not in join_keys + ts_col]
|
356
361
|
|
357
362
|
def __repr__(self) -> str:
|
@@ -384,6 +389,9 @@ Got {len(self._feature_df.queries['queries'])}: {self._feature_df.queries['queri
|
|
384
389
|
fv_dict = self.__dict__.copy()
|
385
390
|
if "_feature_df" in fv_dict:
|
386
391
|
fv_dict.pop("_feature_df")
|
392
|
+
if "_infer_schema_df" in fv_dict:
|
393
|
+
infer_schema_df = fv_dict.pop("_infer_schema_df")
|
394
|
+
fv_dict["_infer_schema_query"] = infer_schema_df.queries["queries"][0]
|
387
395
|
fv_dict["_entities"] = [e._to_dict() for e in self._entities]
|
388
396
|
fv_dict["_status"] = str(self._status)
|
389
397
|
fv_dict["_name"] = str(self._name) if self._name is not None else None
|
@@ -440,6 +448,7 @@ Got {len(self._feature_df.queries['queries'])}: {self._feature_df.queries['queri
|
|
440
448
|
refresh_mode=json_dict["_refresh_mode"],
|
441
449
|
refresh_mode_reason=json_dict["_refresh_mode_reason"],
|
442
450
|
owner=json_dict["_owner"],
|
451
|
+
infer_schema_df=session.sql(json_dict.get("_infer_schema_query", None)),
|
443
452
|
)
|
444
453
|
|
445
454
|
@staticmethod
|
@@ -471,6 +480,7 @@ Got {len(self._feature_df.queries['queries'])}: {self._feature_df.queries['queri
|
|
471
480
|
refresh_mode: Optional[str],
|
472
481
|
refresh_mode_reason: Optional[str],
|
473
482
|
owner: Optional[str],
|
483
|
+
infer_schema_df: Optional[DataFrame],
|
474
484
|
) -> FeatureView:
|
475
485
|
fv = FeatureView(
|
476
486
|
name=name,
|
@@ -478,6 +488,7 @@ Got {len(self._feature_df.queries['queries'])}: {self._feature_df.queries['queri
|
|
478
488
|
feature_df=feature_df,
|
479
489
|
timestamp_col=timestamp_col,
|
480
490
|
desc=desc,
|
491
|
+
_infer_schema_df=infer_schema_df,
|
481
492
|
)
|
482
493
|
fv._version = FeatureViewVersion(version) if version is not None else None
|
483
494
|
fv._status = status
|
@@ -78,22 +78,26 @@ class SFEmbeddedStageFileSystem(stage_fs.SFStageFileSystem):
|
|
78
78
|
match = _SNOWURL_PATH_RE.fullmatch(file)
|
79
79
|
assert match is not None and match.group("filepath") is not None
|
80
80
|
versions_dict[match.group("version")].append(match.group("filepath"))
|
81
|
-
presigned_urls: List[Tuple[str, str]] = []
|
82
81
|
try:
|
82
|
+
async_jobs: List[snowpark.AsyncJob] = []
|
83
83
|
for version, version_files in versions_dict.items():
|
84
84
|
for file in version_files:
|
85
85
|
stage_loc = f"{self.stage_name}/versions/{version}"
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
)
|
86
|
+
query_result = self._session.sql(
|
87
|
+
f"select '{version}/{file}' as name,"
|
88
|
+
f" get_presigned_url('{stage_loc}', '{file}', {url_lifetime}) as url"
|
89
|
+
).collect(
|
90
|
+
block=False,
|
91
|
+
statement_params=telemetry.get_function_usage_statement_params(
|
92
|
+
project=stage_fs._PROJECT,
|
93
|
+
api_calls=[snowpark.DataFrame.collect],
|
94
|
+
),
|
96
95
|
)
|
96
|
+
async_jobs.append(query_result)
|
97
|
+
presigned_urls: List[Tuple[str, str]] = [
|
98
|
+
(r["NAME"], r["URL"]) for job in async_jobs for r in stage_fs._resolve_async_job(job)
|
99
|
+
]
|
100
|
+
return presigned_urls
|
97
101
|
except snowpark_exceptions.SnowparkClientException as e:
|
98
102
|
if e.message.startswith(fileset_errors.ERRNO_DOMAIN_NOT_EXIST) or e.message.startswith(
|
99
103
|
fileset_errors.ERRNO_STAGE_NOT_EXIST
|
@@ -109,7 +113,6 @@ class SFEmbeddedStageFileSystem(stage_fs.SFStageFileSystem):
|
|
109
113
|
error_code=error_codes.INTERNAL_SNOWML_ERROR,
|
110
114
|
original_exception=fileset_errors.FileSetError(str(e)),
|
111
115
|
)
|
112
|
-
return presigned_urls
|
113
116
|
|
114
117
|
@classmethod
|
115
118
|
def _parent(cls, path: str) -> str:
|