snowflake-ml-python 1.4.0__py3-none-any.whl → 1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +77 -32
- snowflake/ml/_internal/exceptions/dataset_error_messages.py +5 -0
- snowflake/ml/_internal/exceptions/dataset_errors.py +24 -0
- snowflake/ml/_internal/exceptions/error_codes.py +3 -0
- snowflake/ml/_internal/lineage/data_source.py +10 -0
- snowflake/ml/_internal/lineage/dataset_dataframe.py +44 -0
- snowflake/ml/_internal/utils/identifier.py +3 -1
- snowflake/ml/_internal/utils/sql_identifier.py +2 -6
- snowflake/ml/dataset/__init__.py +10 -0
- snowflake/ml/dataset/dataset.py +454 -129
- snowflake/ml/dataset/dataset_factory.py +53 -0
- snowflake/ml/dataset/dataset_metadata.py +103 -0
- snowflake/ml/dataset/dataset_reader.py +202 -0
- snowflake/ml/feature_store/feature_store.py +531 -332
- snowflake/ml/feature_store/feature_view.py +40 -23
- snowflake/ml/fileset/embedded_stage_fs.py +146 -0
- snowflake/ml/fileset/sfcfs.py +56 -54
- snowflake/ml/fileset/snowfs.py +159 -0
- snowflake/ml/fileset/stage_fs.py +49 -17
- snowflake/ml/model/__init__.py +2 -2
- snowflake/ml/model/_api.py +16 -1
- snowflake/ml/model/_client/model/model_impl.py +27 -0
- snowflake/ml/model/_client/model/model_version_impl.py +137 -50
- snowflake/ml/model/_client/ops/model_ops.py +159 -40
- snowflake/ml/model/_client/sql/model.py +25 -2
- snowflake/ml/model/_client/sql/model_version.py +131 -2
- snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +5 -1
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +1 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +2 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +0 -5
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +21 -50
- snowflake/ml/model/_model_composer/model_composer.py +22 -1
- snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +38 -51
- snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +19 -1
- snowflake/ml/model/_model_composer/model_method/model_method.py +6 -10
- snowflake/ml/model/_packager/model_env/model_env.py +41 -0
- snowflake/ml/model/_packager/model_handlers/catboost.py +206 -0
- snowflake/ml/model/_packager/model_handlers/lightgbm.py +218 -0
- snowflake/ml/model/_packager/model_handlers/sklearn.py +3 -0
- snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
- snowflake/ml/model/_packager/model_meta/model_meta.py +37 -11
- snowflake/ml/model/_packager/model_meta/model_meta_schema.py +20 -1
- snowflake/ml/model/_packager/model_meta_migrator/migrator_plans.py +3 -1
- snowflake/ml/model/_packager/model_packager.py +2 -5
- snowflake/ml/model/{_model_composer/model_runtime/_runtime_requirements.py → _packager/model_runtime/_snowml_inference_alternative_requirements.py} +1 -1
- snowflake/ml/model/_packager/model_runtime/model_runtime.py +137 -0
- snowflake/ml/model/type_hints.py +21 -2
- snowflake/ml/modeling/_internal/estimator_utils.py +16 -11
- snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +4 -1
- snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py +55 -3
- snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_handlers.py +34 -18
- snowflake/ml/modeling/_internal/model_trainer.py +7 -0
- snowflake/ml/modeling/_internal/model_trainer_builder.py +42 -9
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +13 -14
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +29 -7
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +261 -16
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +246 -175
- snowflake/ml/modeling/cluster/affinity_propagation.py +246 -175
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +246 -175
- snowflake/ml/modeling/cluster/birch.py +248 -175
- snowflake/ml/modeling/cluster/bisecting_k_means.py +248 -175
- snowflake/ml/modeling/cluster/dbscan.py +246 -175
- snowflake/ml/modeling/cluster/feature_agglomeration.py +248 -175
- snowflake/ml/modeling/cluster/k_means.py +248 -175
- snowflake/ml/modeling/cluster/mean_shift.py +246 -175
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +248 -175
- snowflake/ml/modeling/cluster/optics.py +246 -175
- snowflake/ml/modeling/cluster/spectral_biclustering.py +246 -175
- snowflake/ml/modeling/cluster/spectral_clustering.py +246 -175
- snowflake/ml/modeling/cluster/spectral_coclustering.py +246 -175
- snowflake/ml/modeling/compose/column_transformer.py +248 -175
- snowflake/ml/modeling/compose/transformed_target_regressor.py +246 -175
- snowflake/ml/modeling/covariance/elliptic_envelope.py +246 -175
- snowflake/ml/modeling/covariance/empirical_covariance.py +246 -175
- snowflake/ml/modeling/covariance/graphical_lasso.py +246 -175
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +246 -175
- snowflake/ml/modeling/covariance/ledoit_wolf.py +246 -175
- snowflake/ml/modeling/covariance/min_cov_det.py +246 -175
- snowflake/ml/modeling/covariance/oas.py +246 -175
- snowflake/ml/modeling/covariance/shrunk_covariance.py +246 -175
- snowflake/ml/modeling/decomposition/dictionary_learning.py +248 -175
- snowflake/ml/modeling/decomposition/factor_analysis.py +248 -175
- snowflake/ml/modeling/decomposition/fast_ica.py +248 -175
- snowflake/ml/modeling/decomposition/incremental_pca.py +248 -175
- snowflake/ml/modeling/decomposition/kernel_pca.py +248 -175
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +248 -175
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +248 -175
- snowflake/ml/modeling/decomposition/pca.py +248 -175
- snowflake/ml/modeling/decomposition/sparse_pca.py +248 -175
- snowflake/ml/modeling/decomposition/truncated_svd.py +248 -175
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +248 -175
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +246 -175
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/bagging_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/bagging_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/isolation_forest.py +246 -175
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/stacking_regressor.py +248 -175
- snowflake/ml/modeling/ensemble/voting_classifier.py +248 -175
- snowflake/ml/modeling/ensemble/voting_regressor.py +248 -175
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +248 -175
- snowflake/ml/modeling/feature_selection/select_fdr.py +248 -175
- snowflake/ml/modeling/feature_selection/select_fpr.py +248 -175
- snowflake/ml/modeling/feature_selection/select_fwe.py +248 -175
- snowflake/ml/modeling/feature_selection/select_k_best.py +248 -175
- snowflake/ml/modeling/feature_selection/select_percentile.py +248 -175
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +248 -175
- snowflake/ml/modeling/feature_selection/variance_threshold.py +248 -175
- snowflake/ml/modeling/framework/_utils.py +8 -1
- snowflake/ml/modeling/framework/base.py +72 -37
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +246 -175
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +246 -175
- snowflake/ml/modeling/impute/iterative_imputer.py +248 -175
- snowflake/ml/modeling/impute/knn_imputer.py +248 -175
- snowflake/ml/modeling/impute/missing_indicator.py +248 -175
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +248 -175
- snowflake/ml/modeling/kernel_approximation/nystroem.py +248 -175
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +248 -175
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +248 -175
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +248 -175
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +246 -175
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +246 -175
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/ard_regression.py +246 -175
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +246 -175
- snowflake/ml/modeling/linear_model/elastic_net.py +246 -175
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +246 -175
- snowflake/ml/modeling/linear_model/gamma_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/huber_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/lars.py +246 -175
- snowflake/ml/modeling/linear_model/lars_cv.py +246 -175
- snowflake/ml/modeling/linear_model/lasso.py +246 -175
- snowflake/ml/modeling/linear_model/lasso_cv.py +246 -175
- snowflake/ml/modeling/linear_model/lasso_lars.py +246 -175
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +246 -175
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +246 -175
- snowflake/ml/modeling/linear_model/linear_regression.py +246 -175
- snowflake/ml/modeling/linear_model/logistic_regression.py +246 -175
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +246 -175
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +246 -175
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +246 -175
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +246 -175
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +246 -175
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +246 -175
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +246 -175
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/perceptron.py +246 -175
- snowflake/ml/modeling/linear_model/poisson_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/ransac_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/ridge.py +246 -175
- snowflake/ml/modeling/linear_model/ridge_classifier.py +246 -175
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +246 -175
- snowflake/ml/modeling/linear_model/ridge_cv.py +246 -175
- snowflake/ml/modeling/linear_model/sgd_classifier.py +246 -175
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +246 -175
- snowflake/ml/modeling/linear_model/sgd_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +246 -175
- snowflake/ml/modeling/manifold/isomap.py +248 -175
- snowflake/ml/modeling/manifold/mds.py +248 -175
- snowflake/ml/modeling/manifold/spectral_embedding.py +248 -175
- snowflake/ml/modeling/manifold/tsne.py +248 -175
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +246 -175
- snowflake/ml/modeling/mixture/gaussian_mixture.py +246 -175
- snowflake/ml/modeling/model_selection/grid_search_cv.py +63 -41
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +80 -38
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +246 -175
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +246 -175
- snowflake/ml/modeling/multiclass/output_code_classifier.py +246 -175
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +246 -175
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +246 -175
- snowflake/ml/modeling/naive_bayes/complement_nb.py +246 -175
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +246 -175
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +246 -175
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +246 -175
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +246 -175
- snowflake/ml/modeling/neighbors/kernel_density.py +246 -175
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +246 -175
- snowflake/ml/modeling/neighbors/nearest_centroid.py +246 -175
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +246 -175
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +248 -175
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +246 -175
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +246 -175
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +248 -175
- snowflake/ml/modeling/neural_network/mlp_classifier.py +246 -175
- snowflake/ml/modeling/neural_network/mlp_regressor.py +246 -175
- snowflake/ml/modeling/pipeline/pipeline.py +517 -35
- snowflake/ml/modeling/preprocessing/binarizer.py +1 -5
- snowflake/ml/modeling/preprocessing/k_bins_discretizer.py +1 -5
- snowflake/ml/modeling/preprocessing/label_encoder.py +1 -5
- snowflake/ml/modeling/preprocessing/max_abs_scaler.py +1 -5
- snowflake/ml/modeling/preprocessing/min_max_scaler.py +10 -12
- snowflake/ml/modeling/preprocessing/normalizer.py +1 -5
- snowflake/ml/modeling/preprocessing/one_hot_encoder.py +13 -5
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +1 -5
- snowflake/ml/modeling/preprocessing/polynomial_features.py +248 -175
- snowflake/ml/modeling/preprocessing/robust_scaler.py +1 -5
- snowflake/ml/modeling/preprocessing/standard_scaler.py +11 -11
- snowflake/ml/modeling/semi_supervised/label_propagation.py +246 -175
- snowflake/ml/modeling/semi_supervised/label_spreading.py +246 -175
- snowflake/ml/modeling/svm/linear_svc.py +246 -175
- snowflake/ml/modeling/svm/linear_svr.py +246 -175
- snowflake/ml/modeling/svm/nu_svc.py +246 -175
- snowflake/ml/modeling/svm/nu_svr.py +246 -175
- snowflake/ml/modeling/svm/svc.py +246 -175
- snowflake/ml/modeling/svm/svr.py +246 -175
- snowflake/ml/modeling/tree/decision_tree_classifier.py +246 -175
- snowflake/ml/modeling/tree/decision_tree_regressor.py +246 -175
- snowflake/ml/modeling/tree/extra_tree_classifier.py +246 -175
- snowflake/ml/modeling/tree/extra_tree_regressor.py +246 -175
- snowflake/ml/modeling/xgboost/xgb_classifier.py +246 -175
- snowflake/ml/modeling/xgboost/xgb_regressor.py +246 -175
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +246 -175
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +246 -175
- snowflake/ml/registry/model_registry.py +3 -149
- snowflake/ml/registry/registry.py +1 -1
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/METADATA +129 -57
- snowflake_ml_python-1.5.0.dist-info/RECORD +380 -0
- snowflake/ml/model/_model_composer/model_runtime/model_runtime.py +0 -97
- snowflake/ml/registry/_artifact_manager.py +0 -156
- snowflake/ml/registry/artifact.py +0 -46
- snowflake_ml_python-1.4.0.dist-info/RECORD +0 -370
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/WHEEL +0 -0
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/top_level.txt +0 -0
@@ -3,7 +3,7 @@ from __future__ import annotations
|
|
3
3
|
import json
|
4
4
|
import re
|
5
5
|
from collections import OrderedDict
|
6
|
-
from dataclasses import dataclass
|
6
|
+
from dataclasses import asdict, dataclass
|
7
7
|
from enum import Enum
|
8
8
|
from typing import Dict, List, Optional
|
9
9
|
|
@@ -29,22 +29,42 @@ from snowflake.snowpark.types import (
|
|
29
29
|
_FEATURE_VIEW_NAME_DELIMITER = "$"
|
30
30
|
_TIMESTAMP_COL_PLACEHOLDER = "FS_TIMESTAMP_COL_PLACEHOLDER_VAL"
|
31
31
|
_FEATURE_OBJ_TYPE = "FEATURE_OBJ_TYPE"
|
32
|
-
|
32
|
+
# Feature view version rule is aligned with dataset version rule in SQL.
|
33
|
+
_FEATURE_VIEW_VERSION_RE = re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_.\-]*$")
|
34
|
+
_FEATURE_VIEW_VERSION_MAX_LENGTH = 128
|
35
|
+
|
36
|
+
|
37
|
+
@dataclass(frozen=True)
|
38
|
+
class _FeatureViewMetadata:
|
39
|
+
"""Represent metadata tracked on top of FV backend object"""
|
40
|
+
|
41
|
+
entities: List[str]
|
42
|
+
timestamp_col: str
|
43
|
+
|
44
|
+
def to_json(self) -> str:
|
45
|
+
return json.dumps(asdict(self))
|
46
|
+
|
47
|
+
@classmethod
|
48
|
+
def from_json(cls, json_str: str) -> _FeatureViewMetadata:
|
49
|
+
state_dict = json.loads(json_str)
|
50
|
+
return cls(**state_dict)
|
33
51
|
|
34
52
|
|
35
53
|
class FeatureViewVersion(str):
|
36
54
|
def __new__(cls, version: str) -> FeatureViewVersion:
|
37
|
-
if not _FEATURE_VIEW_VERSION_RE.match(version):
|
55
|
+
if not _FEATURE_VIEW_VERSION_RE.match(version) or len(version) > _FEATURE_VIEW_VERSION_MAX_LENGTH:
|
38
56
|
raise snowml_exceptions.SnowflakeMLException(
|
39
57
|
error_code=error_codes.INVALID_ARGUMENT,
|
40
58
|
original_exception=ValueError(
|
41
|
-
f"`{version}` is not a valid feature view version.
|
59
|
+
f"`{version}` is not a valid feature view version. "
|
60
|
+
"It must start with letter or digit, and followed by letter, digit, '_', '-' or '.'. "
|
61
|
+
f"The length limit is {_FEATURE_VIEW_VERSION_MAX_LENGTH}."
|
42
62
|
),
|
43
63
|
)
|
44
|
-
return super().__new__(cls, version
|
64
|
+
return super().__new__(cls, version)
|
45
65
|
|
46
66
|
def __init__(self, version: str) -> None:
|
47
|
-
|
67
|
+
super().__init__()
|
48
68
|
|
49
69
|
|
50
70
|
class FeatureViewStatus(Enum):
|
@@ -164,27 +184,19 @@ class FeatureView:
|
|
164
184
|
res.append(name)
|
165
185
|
return FeatureViewSlice(self, res)
|
166
186
|
|
167
|
-
def physical_name(self) -> SqlIdentifier:
|
168
|
-
"""Returns the physical name for this feature in Snowflake.
|
169
|
-
|
170
|
-
Returns:
|
171
|
-
Physical name string.
|
172
|
-
|
173
|
-
Raises:
|
174
|
-
RuntimeError: if the FeatureView is not materialized.
|
175
|
-
"""
|
176
|
-
if self.status == FeatureViewStatus.DRAFT or self.version is None:
|
177
|
-
raise RuntimeError(f"FeatureView {self.name} has not been materialized.")
|
178
|
-
return FeatureView._get_physical_name(self.name, self.version)
|
179
|
-
|
180
187
|
def fully_qualified_name(self) -> str:
|
181
188
|
"""Returns the fully qualified name (<database_name>.<schema_name>.<feature_view_name>) for the
|
182
189
|
FeatureView in Snowflake.
|
183
190
|
|
184
191
|
Returns:
|
185
192
|
fully qualified name string.
|
193
|
+
|
194
|
+
Raises:
|
195
|
+
RuntimeError: if the FeatureView is not registered.
|
186
196
|
"""
|
187
|
-
|
197
|
+
if self.status == FeatureViewStatus.DRAFT or self.version is None:
|
198
|
+
raise RuntimeError(f"FeatureView {self.name} has not been registered.")
|
199
|
+
return f"{self._database}.{self._schema}.{FeatureView._get_physical_name(self.name, self.version)}"
|
188
200
|
|
189
201
|
def attach_feature_desc(self, descs: Dict[str, str]) -> FeatureView:
|
190
202
|
"""
|
@@ -297,6 +309,11 @@ class FeatureView:
|
|
297
309
|
def owner(self) -> Optional[str]:
|
298
310
|
return self._owner
|
299
311
|
|
312
|
+
def _metadata(self) -> _FeatureViewMetadata:
|
313
|
+
entity_names = [e.name.identifier() for e in self.entities]
|
314
|
+
ts_col = self.timestamp_col.identifier() if self.timestamp_col is not None else _TIMESTAMP_COL_PLACEHOLDER
|
315
|
+
return _FeatureViewMetadata(entity_names, ts_col)
|
316
|
+
|
300
317
|
def _get_query(self) -> str:
|
301
318
|
if len(self._feature_df.queries["queries"]) != 1:
|
302
319
|
raise ValueError(
|
@@ -386,7 +403,7 @@ Got {len(self._feature_df.queries['queries'])}: {self._feature_df.queries['queri
|
|
386
403
|
def to_df(self, session: Session) -> DataFrame:
|
387
404
|
values = list(self._to_dict().values())
|
388
405
|
schema = [x.lstrip("_") for x in list(self._to_dict().keys())]
|
389
|
-
values.append(str(self.
|
406
|
+
values.append(str(FeatureView._get_physical_name(self._name, self._version))) # type: ignore[arg-type]
|
390
407
|
schema.append("physical_name")
|
391
408
|
return session.create_dataframe([values], schema=schema)
|
392
409
|
|
@@ -448,8 +465,8 @@ Got {len(self._feature_df.queries['queries'])}: {self._feature_df.queries['queri
|
|
448
465
|
status: FeatureViewStatus,
|
449
466
|
feature_descs: Dict[str, str],
|
450
467
|
refresh_freq: Optional[str],
|
451
|
-
database:
|
452
|
-
schema:
|
468
|
+
database: str,
|
469
|
+
schema: str,
|
453
470
|
warehouse: Optional[str],
|
454
471
|
refresh_mode: Optional[str],
|
455
472
|
refresh_mode_reason: Optional[str],
|
@@ -0,0 +1,146 @@
|
|
1
|
+
import re
|
2
|
+
from collections import defaultdict
|
3
|
+
from typing import Any, List, Optional, Tuple
|
4
|
+
|
5
|
+
from snowflake import snowpark
|
6
|
+
from snowflake.connector import connection
|
7
|
+
from snowflake.ml._internal import telemetry
|
8
|
+
from snowflake.ml._internal.exceptions import (
|
9
|
+
error_codes,
|
10
|
+
exceptions as snowml_exceptions,
|
11
|
+
fileset_errors,
|
12
|
+
)
|
13
|
+
from snowflake.ml._internal.utils import identifier
|
14
|
+
from snowflake.snowpark import exceptions as snowpark_exceptions
|
15
|
+
|
16
|
+
from . import stage_fs
|
17
|
+
|
18
|
+
_SNOWURL_PATH_RE = re.compile(r"versions/(?P<version>[^/]+)(?:/+(?P<filepath>.*))?")
|
19
|
+
|
20
|
+
|
21
|
+
class SFEmbeddedStageFileSystem(stage_fs.SFStageFileSystem):
|
22
|
+
def __init__(
|
23
|
+
self,
|
24
|
+
*,
|
25
|
+
domain: str,
|
26
|
+
name: str,
|
27
|
+
snowpark_session: Optional[snowpark.Session] = None,
|
28
|
+
sf_connection: Optional[connection.SnowflakeConnection] = None,
|
29
|
+
**kwargs: Any,
|
30
|
+
) -> None:
|
31
|
+
|
32
|
+
(db, schema, object_name, _) = identifier.parse_schema_level_object_identifier(name)
|
33
|
+
self._name = name # TODO: Require or resolve FQN
|
34
|
+
self._domain = domain
|
35
|
+
|
36
|
+
super().__init__(
|
37
|
+
db=db,
|
38
|
+
schema=schema,
|
39
|
+
stage=object_name,
|
40
|
+
snowpark_session=snowpark_session,
|
41
|
+
sf_connection=sf_connection,
|
42
|
+
**kwargs,
|
43
|
+
)
|
44
|
+
|
45
|
+
@property
|
46
|
+
def stage_name(self) -> str:
|
47
|
+
"""Get the Snowflake path to this stage.
|
48
|
+
|
49
|
+
Returns:
|
50
|
+
A string in the format of snow://<domain>/<name>
|
51
|
+
Example: snow://dataset/my_dataset
|
52
|
+
|
53
|
+
# noqa: DAR203
|
54
|
+
"""
|
55
|
+
return f"snow://{self._domain}/{self._name}"
|
56
|
+
|
57
|
+
def _stage_path_to_relative_path(self, stage_path: str) -> str:
|
58
|
+
"""Convert a stage file path which comes from the LIST query to a relative file path in that stage.
|
59
|
+
|
60
|
+
The file path returned by LIST query always has the format "versions/<version>/<relative_file_path>".
|
61
|
+
The full "versions/<version>/<relative_file_path>" is returned
|
62
|
+
|
63
|
+
Args:
|
64
|
+
stage_path: A string started with the name of the stage.
|
65
|
+
|
66
|
+
Returns:
|
67
|
+
A string of the relative stage path.
|
68
|
+
"""
|
69
|
+
return stage_path
|
70
|
+
|
71
|
+
def _fetch_presigned_urls(
|
72
|
+
self, files: List[str], url_lifetime: float = stage_fs._PRESIGNED_URL_LIFETIME_SEC
|
73
|
+
) -> List[Tuple[str, str]]:
|
74
|
+
"""Fetch presigned urls for the given files."""
|
75
|
+
# SnowURL requires full snow://<domain>/<entity>/versions/<version> as the stage path arg to get_presigned_Url
|
76
|
+
versions_dict = defaultdict(list)
|
77
|
+
for file in files:
|
78
|
+
match = _SNOWURL_PATH_RE.fullmatch(file)
|
79
|
+
assert match is not None and match.group("filepath") is not None
|
80
|
+
versions_dict[match.group("version")].append(match.group("filepath"))
|
81
|
+
presigned_urls: List[Tuple[str, str]] = []
|
82
|
+
try:
|
83
|
+
for version, version_files in versions_dict.items():
|
84
|
+
for file in version_files:
|
85
|
+
stage_loc = f"{self.stage_name}/versions/{version}"
|
86
|
+
presigned_urls.extend(
|
87
|
+
self._session.sql(
|
88
|
+
f"select '{version}/{file}' as name,"
|
89
|
+
f" get_presigned_url('{stage_loc}', '{file}', {url_lifetime}) as url"
|
90
|
+
).collect(
|
91
|
+
statement_params=telemetry.get_function_usage_statement_params(
|
92
|
+
project=stage_fs._PROJECT,
|
93
|
+
api_calls=[snowpark.DataFrame.collect],
|
94
|
+
),
|
95
|
+
)
|
96
|
+
)
|
97
|
+
except snowpark_exceptions.SnowparkClientException as e:
|
98
|
+
if e.message.startswith(fileset_errors.ERRNO_DOMAIN_NOT_EXIST) or e.message.startswith(
|
99
|
+
fileset_errors.ERRNO_STAGE_NOT_EXIST
|
100
|
+
):
|
101
|
+
raise snowml_exceptions.SnowflakeMLException(
|
102
|
+
error_code=error_codes.SNOWML_NOT_FOUND,
|
103
|
+
original_exception=fileset_errors.StageNotFoundError(
|
104
|
+
f"Stage {self.stage_name} does not exist or is not authorized."
|
105
|
+
),
|
106
|
+
)
|
107
|
+
else:
|
108
|
+
raise snowml_exceptions.SnowflakeMLException(
|
109
|
+
error_code=error_codes.INTERNAL_SNOWML_ERROR,
|
110
|
+
original_exception=fileset_errors.FileSetError(str(e)),
|
111
|
+
)
|
112
|
+
return presigned_urls
|
113
|
+
|
114
|
+
@classmethod
|
115
|
+
def _parent(cls, path: str) -> str:
|
116
|
+
"""Get parent of specified path up to minimally valid root path.
|
117
|
+
|
118
|
+
For SnowURL, the minimum valid path is snow://<domain>/<entity>/versions/<version>
|
119
|
+
|
120
|
+
Args:
|
121
|
+
path: File or directory path
|
122
|
+
|
123
|
+
Returns:
|
124
|
+
Parent path
|
125
|
+
|
126
|
+
Examples:
|
127
|
+
----
|
128
|
+
>>> fs._parent("snow://dataset/my_ds/versions/my_version/file.ext")
|
129
|
+
"snow://dataset/my_ds/versions/my_version/"
|
130
|
+
>>> fs._parent("snow://dataset/my_ds/versions/my_version/subdir/file.ext")
|
131
|
+
"snow://dataset/my_ds/versions/my_version/subdir/"
|
132
|
+
>>> fs._parent("snow://dataset/my_ds/versions/my_version/")
|
133
|
+
"snow://dataset/my_ds/versions/my_version/"
|
134
|
+
>>> fs._parent("snow://dataset/my_ds/versions/my_version")
|
135
|
+
"snow://dataset/my_ds/versions/my_version"
|
136
|
+
"""
|
137
|
+
path_match = _SNOWURL_PATH_RE.fullmatch(path)
|
138
|
+
if not path_match:
|
139
|
+
return super()._parent(path) # type: ignore[no-any-return]
|
140
|
+
filepath: str = path_match.group("filepath") or ""
|
141
|
+
root: str = path[: path_match.start("filepath")] if filepath else path
|
142
|
+
if "/" in filepath:
|
143
|
+
parent = filepath.rsplit("/", 1)[0]
|
144
|
+
return root + parent
|
145
|
+
else:
|
146
|
+
return root
|
snowflake/ml/fileset/sfcfs.py
CHANGED
@@ -60,6 +60,8 @@ class SFFileSystem(fsspec.AbstractFileSystem):
|
|
60
60
|
b'2014-02-05 14:35:00.00000054,13,2014-02-05 14:35:00 UTC,-74.00688,40.73049,-74.00563,40.70676,2\n'
|
61
61
|
"""
|
62
62
|
|
63
|
+
protocol = PROTOCOL_NAME
|
64
|
+
|
63
65
|
def __init__(
|
64
66
|
self,
|
65
67
|
sf_connection: Optional[connection.SnowflakeConnection] = None,
|
@@ -183,7 +185,6 @@ class SFFileSystem(fsspec.AbstractFileSystem):
|
|
183
185
|
func_params_to_log=["detail"],
|
184
186
|
conn_attr_name="_conn",
|
185
187
|
)
|
186
|
-
@snowpark._internal.utils.private_preview(version="0.2.0")
|
187
188
|
def ls(self, path: str, detail: bool = False, **kwargs: Any) -> Union[List[str], List[Dict[str, Any]]]:
|
188
189
|
"""Override fsspec `ls` method. List single "directory" with or without details.
|
189
190
|
|
@@ -204,7 +205,7 @@ class SFFileSystem(fsspec.AbstractFileSystem):
|
|
204
205
|
>>> sffs.ls("@MYDB.public.FOO/nytrain/")
|
205
206
|
['@MYDB.public.FOO/nytrain/data_0_0_0.csv', '@MYDB.public.FOO/nytrain/data_0_0_1.csv']
|
206
207
|
"""
|
207
|
-
file_path =
|
208
|
+
file_path = self._parse_file_path(path)
|
208
209
|
stage_fs = self._get_stage_fs(file_path)
|
209
210
|
stage_path_list = stage_fs.ls(file_path.filepath, detail=True, **kwargs)
|
210
211
|
stage_path_list = cast(List[Dict[str, Any]], stage_path_list)
|
@@ -214,7 +215,6 @@ class SFFileSystem(fsspec.AbstractFileSystem):
|
|
214
215
|
project=_PROJECT,
|
215
216
|
conn_attr_name="_conn",
|
216
217
|
)
|
217
|
-
@snowpark._internal.utils.private_preview(version="0.2.0")
|
218
218
|
def optimize_read(self, files: Optional[List[str]] = None) -> None:
|
219
219
|
"""Prefetch and cache the presigned urls for all the given files to speed up the file opening.
|
220
220
|
|
@@ -226,19 +226,20 @@ class SFFileSystem(fsspec.AbstractFileSystem):
|
|
226
226
|
"""
|
227
227
|
if not files:
|
228
228
|
return
|
229
|
-
|
229
|
+
stage_fs_dict: Dict[str, stage_fs.SFStageFileSystem] = {}
|
230
|
+
stage_file_paths: Dict[str, List[str]] = collections.defaultdict(list)
|
230
231
|
for file in files:
|
231
|
-
|
232
|
-
|
232
|
+
path_info = self._parse_file_path(file)
|
233
|
+
fs = self._get_stage_fs(path_info)
|
234
|
+
stage_fs_dict[fs.stage_name] = fs
|
235
|
+
stage_file_paths[fs.stage_name].append(path_info.filepath)
|
233
236
|
for k, v in stage_file_paths.items():
|
234
|
-
|
235
|
-
stage_fs.optimize_read(v)
|
237
|
+
stage_fs_dict[k].optimize_read(v)
|
236
238
|
|
237
239
|
@telemetry.send_api_usage_telemetry(
|
238
240
|
project=_PROJECT,
|
239
241
|
conn_attr_name="_conn",
|
240
242
|
)
|
241
|
-
@snowpark._internal.utils.private_preview(version="0.2.0")
|
242
243
|
def _open(self, path: str, **kwargs: Any) -> fsspec.spec.AbstractBufferedFile:
|
243
244
|
"""Override fsspec `_open` method. Open a file for reading in 'rb' mode.
|
244
245
|
|
@@ -256,7 +257,7 @@ class SFFileSystem(fsspec.AbstractFileSystem):
|
|
256
257
|
Returns:
|
257
258
|
A fsspec AbstractBufferedFile which supports python file operations.
|
258
259
|
"""
|
259
|
-
file_path =
|
260
|
+
file_path = self._parse_file_path(path)
|
260
261
|
stage_fs = self._get_stage_fs(file_path)
|
261
262
|
return stage_fs._open(file_path.filepath, **kwargs)
|
262
263
|
|
@@ -264,10 +265,9 @@ class SFFileSystem(fsspec.AbstractFileSystem):
|
|
264
265
|
project=_PROJECT,
|
265
266
|
conn_attr_name="_conn",
|
266
267
|
)
|
267
|
-
@snowpark._internal.utils.private_preview(version="0.2.0")
|
268
268
|
def info(self, path: str, **kwargs: Any) -> Dict[str, Any]:
|
269
269
|
"""Override fsspec `info` method. Give details of entry at path."""
|
270
|
-
file_path =
|
270
|
+
file_path = self._parse_file_path(path)
|
271
271
|
stage_fs = self._get_stage_fs(file_path)
|
272
272
|
res: Dict[str, Any] = stage_fs.info(file_path.filepath, **kwargs)
|
273
273
|
if res:
|
@@ -292,52 +292,54 @@ class SFFileSystem(fsspec.AbstractFileSystem):
|
|
292
292
|
"""Convert the relative path in a stage to an absolute path starts with the location of the stage."""
|
293
293
|
return stage_fs.stage_name + "/" + path
|
294
294
|
|
295
|
+
@classmethod
|
296
|
+
def _parse_file_path(cls, path: str) -> _SFFilePath:
|
297
|
+
"""Parse a snowflake location path.
|
295
298
|
|
296
|
-
|
297
|
-
|
299
|
+
The following propertis will be extracted from the path input:
|
300
|
+
- database
|
301
|
+
- schema
|
302
|
+
- stage
|
303
|
+
- path (optional)
|
298
304
|
|
299
|
-
|
300
|
-
|
305
|
+
Args:
|
306
|
+
path: A string in the format of "@{database}.{schema}.{stage}/{path}".
|
301
307
|
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
308
|
+
Example:
|
309
|
+
"@my_db.my_schema.my_stage/"
|
310
|
+
"@my_db.my_schema.my_stage/file1"
|
311
|
+
"@my_db.my_schema.my_stage/dir1/"
|
312
|
+
"@my_db.my_schema.my_stage/dir1/file2"
|
307
313
|
|
308
|
-
|
309
|
-
|
314
|
+
Returns:
|
315
|
+
A namedtuple consists of database name, schema name, stage name and path.
|
310
316
|
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
317
|
+
Raises:
|
318
|
+
SnowflakeMLException: An error occurred when invalid path is given.
|
319
|
+
"""
|
320
|
+
sfc_prefix = f"{PROTOCOL_NAME}://"
|
321
|
+
if path.startswith(sfc_prefix):
|
322
|
+
path = path[len(sfc_prefix) :]
|
323
|
+
if not path.startswith("@"):
|
324
|
+
raise snowml_exceptions.SnowflakeMLException(
|
325
|
+
error_code=error_codes.SNOWML_INVALID_STAGE,
|
326
|
+
original_exception=ValueError(
|
327
|
+
'Invalid path. Expected path to start with "@". Example: @database.schema.stage/optional_path.'
|
328
|
+
),
|
329
|
+
)
|
330
|
+
try:
|
331
|
+
res = identifier.parse_schema_level_object_identifier(path[1:])
|
332
|
+
if res[1] is None or res[0] is None or (res[3] and not res[3].startswith("/")):
|
333
|
+
raise ValueError("Invalid path. Missing database or schema identifier.")
|
334
|
+
logging.debug(f"Parsed path: {res}")
|
335
|
+
return _SFFilePath(res[0], res[1], res[2], res[3][1:])
|
336
|
+
except ValueError:
|
337
|
+
raise snowml_exceptions.SnowflakeMLException(
|
338
|
+
error_code=error_codes.SNOWML_INVALID_STAGE,
|
339
|
+
original_exception=ValueError(
|
340
|
+
f"Invalid path. Expected format: @database.schema.stage/optional_path. Getting {path}"
|
341
|
+
),
|
342
|
+
)
|
316
343
|
|
317
|
-
Returns:
|
318
|
-
A namedtuple consists of database name, schema name, stage name and path.
|
319
344
|
|
320
|
-
|
321
|
-
SnowflakeMLException: An error occurred when invalid path is given.
|
322
|
-
"""
|
323
|
-
sfc_prefix = f"{PROTOCOL_NAME}://"
|
324
|
-
if path.startswith(sfc_prefix):
|
325
|
-
path = path[len(sfc_prefix) :]
|
326
|
-
if not path.startswith("@"):
|
327
|
-
raise snowml_exceptions.SnowflakeMLException(
|
328
|
-
error_code=error_codes.SNOWML_INVALID_STAGE,
|
329
|
-
original_exception=ValueError(
|
330
|
-
'Invalid path. Expected path to start with "@". Example: @database.schema.stage/optional_path.'
|
331
|
-
),
|
332
|
-
)
|
333
|
-
try:
|
334
|
-
res = identifier.parse_schema_level_object_identifier(path[1:])
|
335
|
-
logging.debug(f"Parsed path: {res}")
|
336
|
-
return _SFFilePath(res[0], res[1], res[2], res[3][1:])
|
337
|
-
except ValueError:
|
338
|
-
raise snowml_exceptions.SnowflakeMLException(
|
339
|
-
error_code=error_codes.SNOWML_INVALID_STAGE,
|
340
|
-
original_exception=ValueError(
|
341
|
-
f"Invalid path. Expected format: @database.schema.stage/optional_path. Getting {path}"
|
342
|
-
),
|
343
|
-
)
|
345
|
+
fsspec.register_implementation(PROTOCOL_NAME, SFFileSystem)
|
@@ -0,0 +1,159 @@
|
|
1
|
+
import collections
|
2
|
+
import logging
|
3
|
+
import re
|
4
|
+
from typing import Any, Dict, Optional
|
5
|
+
|
6
|
+
import fsspec
|
7
|
+
import packaging.version as pkg_version
|
8
|
+
|
9
|
+
from snowflake import snowpark
|
10
|
+
from snowflake.connector import connection
|
11
|
+
from snowflake.ml._internal.exceptions import (
|
12
|
+
error_codes,
|
13
|
+
exceptions as snowml_exceptions,
|
14
|
+
)
|
15
|
+
from snowflake.ml._internal.utils import identifier, snowflake_env
|
16
|
+
from snowflake.ml.fileset import embedded_stage_fs, sfcfs
|
17
|
+
|
18
|
+
PROTOCOL_NAME = "snow"
|
19
|
+
|
20
|
+
_SFFileEntityPath = collections.namedtuple(
|
21
|
+
"_SFFileEntityPath", ["domain", "name", "filepath", "version", "relative_path"]
|
22
|
+
)
|
23
|
+
_PROJECT = "FileSet"
|
24
|
+
_SNOWURL_PATTERN = re.compile(
|
25
|
+
f"({PROTOCOL_NAME}://)?"
|
26
|
+
r"(?<!@)(?P<domain>\w+)/"
|
27
|
+
rf"(?P<name>(?:{identifier._SF_IDENTIFIER}\.){{,2}}{identifier._SF_IDENTIFIER})/"
|
28
|
+
r"(?P<path>versions/(?:(?P<version>[^/]+)(?:/(?P<relpath>.*))?)?)"
|
29
|
+
)
|
30
|
+
|
31
|
+
# FIXME(dhung): Temporary fix for bug in GS version 8.17
|
32
|
+
_BUG_VERSION_MIN = pkg_version.Version("8.17") # Inclusive minimum version with bugged behavior
|
33
|
+
_BUG_VERSION_MAX = pkg_version.Version("8.18") # Exclusive maximum version with bugged behavior
|
34
|
+
|
35
|
+
|
36
|
+
class SnowFileSystem(sfcfs.SFFileSystem):
|
37
|
+
"""A filesystem that allows user to access Snowflake embedded stage files with valid Snowflake locations.
|
38
|
+
|
39
|
+
The file system is is based on fsspec (https://filesystem-spec.readthedocs.io/). It is a file system wrapper
|
40
|
+
built on top of SFStageFileSystem. It takes Snowflake embedded stage path as the input and supports read operation.
|
41
|
+
A valid Snowflake location will have the form "snow://{domain}/{entity_name}/versions/{version}/{path_to_file}".
|
42
|
+
|
43
|
+
See `sfcfs.SFFileSystem` documentation for example usage patterns.
|
44
|
+
"""
|
45
|
+
|
46
|
+
protocol = PROTOCOL_NAME
|
47
|
+
_IS_BUGGED_VERSION = None
|
48
|
+
|
49
|
+
def __init__(
|
50
|
+
self,
|
51
|
+
sf_connection: Optional[connection.SnowflakeConnection] = None,
|
52
|
+
snowpark_session: Optional[snowpark.Session] = None,
|
53
|
+
**kwargs: Any,
|
54
|
+
) -> None:
|
55
|
+
super().__init__(sf_connection=sf_connection, snowpark_session=snowpark_session, **kwargs)
|
56
|
+
|
57
|
+
# FIXME(dhung): Temporary fix for bug in GS version 8.17
|
58
|
+
if SnowFileSystem._IS_BUGGED_VERSION is None:
|
59
|
+
try:
|
60
|
+
sf_version = snowflake_env.get_current_snowflake_version(self._session)
|
61
|
+
SnowFileSystem._IS_BUGGED_VERSION = _BUG_VERSION_MIN <= sf_version < _BUG_VERSION_MAX
|
62
|
+
except Exception:
|
63
|
+
SnowFileSystem._IS_BUGGED_VERSION = False
|
64
|
+
|
65
|
+
def info(self, path: str, **kwargs: Any) -> Dict[str, Any]:
|
66
|
+
# FIXME(dhung): Temporary fix for bug in GS version 8.17
|
67
|
+
res: Dict[str, Any] = super().info(path, **kwargs)
|
68
|
+
if res.get("type") == "directory" and not res["name"].endswith("/"):
|
69
|
+
res["name"] += "/"
|
70
|
+
return res
|
71
|
+
|
72
|
+
def _get_stage_fs(
|
73
|
+
self, sf_file_path: _SFFileEntityPath # type: ignore[override]
|
74
|
+
) -> embedded_stage_fs.SFEmbeddedStageFileSystem:
|
75
|
+
"""Get the stage file system for the given snowflake location.
|
76
|
+
|
77
|
+
Args:
|
78
|
+
sf_file_path: The Snowflake path information.
|
79
|
+
|
80
|
+
Returns:
|
81
|
+
A SFEmbeddedStageFileSystem object which supports readonly file operations on Snowflake embedded stages.
|
82
|
+
"""
|
83
|
+
stage_fs_key = (sf_file_path.domain, sf_file_path.name, sf_file_path.version)
|
84
|
+
if stage_fs_key not in self._stage_fs_set:
|
85
|
+
cnt_stage_fs = embedded_stage_fs.SFEmbeddedStageFileSystem(
|
86
|
+
snowpark_session=self._session,
|
87
|
+
domain=sf_file_path.domain,
|
88
|
+
name=sf_file_path.name,
|
89
|
+
**self._kwargs,
|
90
|
+
)
|
91
|
+
self._stage_fs_set[stage_fs_key] = cnt_stage_fs
|
92
|
+
return self._stage_fs_set[stage_fs_key]
|
93
|
+
|
94
|
+
def _stage_path_to_absolute_path(self, stage_fs: embedded_stage_fs.SFEmbeddedStageFileSystem, path: str) -> str:
|
95
|
+
"""Convert the relative path in a stage to an absolute path starts with the location of the stage."""
|
96
|
+
# Strip protocol from absolute path, since backend needs snow:// prefix to resolve correctly
|
97
|
+
# but fsspec logic strips protocol when doing any searching and globbing
|
98
|
+
stage_name = stage_fs.stage_name
|
99
|
+
protocol = f"{PROTOCOL_NAME}://"
|
100
|
+
if stage_name.startswith(protocol):
|
101
|
+
stage_name = stage_name[len(protocol) :]
|
102
|
+
abs_path = stage_name + "/" + path
|
103
|
+
# FIXME(dhung): Temporary fix for bug in GS version 8.17
|
104
|
+
if self._IS_BUGGED_VERSION:
|
105
|
+
match = _SNOWURL_PATTERN.fullmatch(abs_path)
|
106
|
+
assert match is not None
|
107
|
+
abs_path = abs_path.replace(match.group("relpath"), match.group("relpath").lstrip("/"))
|
108
|
+
return abs_path
|
109
|
+
|
110
|
+
@classmethod
|
111
|
+
def _parse_file_path(cls, path: str) -> _SFFileEntityPath: # type: ignore[override]
|
112
|
+
"""Parse a snowflake location path.
|
113
|
+
|
114
|
+
The following properties will be extracted from the path input:
|
115
|
+
- embedded stage domain
|
116
|
+
- entity name
|
117
|
+
- path (in format `versions/{version}/{relative_path}`)
|
118
|
+
- entity version (optional)
|
119
|
+
- relative file path (optional)
|
120
|
+
|
121
|
+
Args:
|
122
|
+
path: A string in the format of "snow://{domain}/{entity_name}/versions/{version}/{path_to_file}".
|
123
|
+
|
124
|
+
Returns:
|
125
|
+
A namedtuple consists of domain, entity name, filepath, version, and relative path, where
|
126
|
+
filepath = "versions/{version}/{relative_path}"
|
127
|
+
|
128
|
+
Raises:
|
129
|
+
SnowflakeMLException: An error occurred when invalid path is given.
|
130
|
+
"""
|
131
|
+
snowurl_match = _SNOWURL_PATTERN.fullmatch(path)
|
132
|
+
if not snowurl_match:
|
133
|
+
raise snowml_exceptions.SnowflakeMLException(
|
134
|
+
error_code=error_codes.SNOWML_INVALID_STAGE,
|
135
|
+
original_exception=ValueError(f"Invalid Snow URL: {path}"),
|
136
|
+
)
|
137
|
+
|
138
|
+
try:
|
139
|
+
domain = snowurl_match.group("domain")
|
140
|
+
parsed_name = identifier.parse_schema_level_object_identifier(snowurl_match.group("name"))
|
141
|
+
name = identifier.get_schema_level_object_identifier(*parsed_name)
|
142
|
+
filepath = snowurl_match.group("path")
|
143
|
+
version = snowurl_match.group("version")
|
144
|
+
relative_path = snowurl_match.group("relpath") or ""
|
145
|
+
logging.debug(f"Parsed snow URL: {snowurl_match.groups()}")
|
146
|
+
# FIXME(dhung): Temporary fix for bug in GS version 8.17
|
147
|
+
if cls._IS_BUGGED_VERSION:
|
148
|
+
filepath = filepath.replace(f"{version}/", f"{version}//")
|
149
|
+
return _SFFileEntityPath(
|
150
|
+
domain=domain, name=name, version=version, relative_path=relative_path, filepath=filepath
|
151
|
+
)
|
152
|
+
except ValueError as e:
|
153
|
+
raise snowml_exceptions.SnowflakeMLException(
|
154
|
+
error_code=error_codes.SNOWML_INVALID_STAGE,
|
155
|
+
original_exception=e,
|
156
|
+
)
|
157
|
+
|
158
|
+
|
159
|
+
fsspec.register_implementation(PROTOCOL_NAME, SnowFileSystem)
|