tracdap-runtime 0.5.30__py3-none-any.whl → 0.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tracdap/rt/_exec/dev_mode.py +2 -1
- tracdap/rt/_impl/data.py +1 -28
- tracdap/rt/_impl/static_api.py +5 -1
- tracdap/rt/_impl/storage.py +586 -10
- tracdap/rt/_impl/util.py +24 -3
- tracdap/rt/_plugins/_helpers.py +26 -25
- tracdap/rt/_plugins/storage_aws.py +162 -76
- tracdap/rt/_plugins/storage_azure.py +155 -0
- tracdap/rt/_plugins/storage_gcp.py +183 -0
- tracdap/rt/_plugins/storage_local.py +249 -98
- tracdap/rt/_version.py +1 -1
- tracdap/rt/api/static_api.py +2 -1
- tracdap/rt/config/__init__.py +8 -13
- tracdap/rt/config/common.py +10 -0
- tracdap/rt/config/common_pb2.py +38 -31
- tracdap/rt/config/job_pb2.py +21 -20
- tracdap/rt/config/platform.py +60 -25
- tracdap/rt/config/platform_pb2.py +52 -45
- tracdap/rt/config/result_pb2.py +15 -14
- tracdap/rt/config/runtime.py +0 -1
- tracdap/rt/config/runtime_pb2.py +24 -24
- tracdap/rt/exceptions.py +9 -0
- tracdap/rt/ext/plugins.py +0 -12
- tracdap/rt/ext/storage.py +47 -29
- tracdap/rt/metadata/common_pb2.py +15 -14
- tracdap/rt/metadata/custom_pb2.py +9 -8
- tracdap/rt/metadata/data_pb2.py +31 -30
- tracdap/rt/metadata/file_pb2.py +9 -8
- tracdap/rt/metadata/flow_pb2.py +33 -32
- tracdap/rt/metadata/job_pb2.py +55 -54
- tracdap/rt/metadata/model_pb2.py +31 -30
- tracdap/rt/metadata/object_id_pb2.py +13 -12
- tracdap/rt/metadata/object_pb2.py +9 -8
- tracdap/rt/metadata/search_pb2.py +19 -18
- tracdap/rt/metadata/stoarge_pb2.py +31 -30
- tracdap/rt/metadata/tag_pb2.py +13 -12
- tracdap/rt/metadata/tag_update_pb2.py +11 -10
- tracdap/rt/metadata/type_pb2.py +29 -28
- {tracdap_runtime-0.5.30.dist-info → tracdap_runtime-0.6.0.dist-info}/METADATA +26 -15
- {tracdap_runtime-0.5.30.dist-info → tracdap_runtime-0.6.0.dist-info}/RECORD +43 -43
- tracdap/rt/config/gateway.py +0 -104
- tracdap/rt/config/gateway_pb2.py +0 -45
- {tracdap_runtime-0.5.30.dist-info → tracdap_runtime-0.6.0.dist-info}/LICENSE +0 -0
- {tracdap_runtime-0.5.30.dist-info → tracdap_runtime-0.6.0.dist-info}/WHEEL +0 -0
- {tracdap_runtime-0.5.30.dist-info → tracdap_runtime-0.6.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,183 @@
|
|
1
|
+
# Copyright 2023 Accenture Global Solutions Limited
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
import typing as tp
|
16
|
+
import datetime as dt
|
17
|
+
|
18
|
+
import tracdap.rt.exceptions as ex
|
19
|
+
|
20
|
+
# Import storage interfaces
|
21
|
+
import tracdap.rt.ext.plugins as plugins
|
22
|
+
from tracdap.rt.ext.storage import *
|
23
|
+
|
24
|
+
from pyarrow import fs as pa_fs
|
25
|
+
|
26
|
+
# Set of common helpers across the core plugins (do not reference rt._impl)
|
27
|
+
from . import _helpers
|
28
|
+
|
29
|
+
|
30
|
+
try:
|
31
|
+
# These dependencies are provided by the optional [gcp] feature
|
32
|
+
# For local development, pip install -r requirements_plugins.txt
|
33
|
+
import google.cloud.storage as gcs # noqa
|
34
|
+
import gcsfs # noqa
|
35
|
+
__gcp_available = True
|
36
|
+
except ImportError:
|
37
|
+
gcs = None
|
38
|
+
gcsfs = None
|
39
|
+
__gcp_available = False
|
40
|
+
|
41
|
+
|
42
|
+
class GcpStorageProvider(IStorageProvider):
|
43
|
+
|
44
|
+
BUCKET_PROPERTY = "bucket"
|
45
|
+
PREFIX_PROPERTY = "prefix"
|
46
|
+
REGION_PROPERTY = "region"
|
47
|
+
ENDPOINT_PROPERTY = "endpoint"
|
48
|
+
|
49
|
+
CREDENTIALS_PROPERTY = "credentials"
|
50
|
+
CREDENTIALS_ADC = "adc"
|
51
|
+
CREDENTIALS_ACCESS_TOKEN = "access_token"
|
52
|
+
|
53
|
+
ACCESS_TOKEN = "accessToken"
|
54
|
+
ACCESS_TOKEN_EXPIRY = "accessTokenExpiry"
|
55
|
+
ACCESS_TOKEN_EXPIRY_DEFAULT = 3600
|
56
|
+
|
57
|
+
RUNTIME_FS_PROPERTY = "runtimeFs"
|
58
|
+
RUNTIME_FS_AUTO = "auto"
|
59
|
+
RUNTIME_FS_ARROW = "arrow"
|
60
|
+
RUNTIME_FS_FSSPEC = "fsspec"
|
61
|
+
RUNTIME_FS_DEFAULT = RUNTIME_FS_AUTO
|
62
|
+
|
63
|
+
ARROW_CLIENT_ARGS = {
|
64
|
+
REGION_PROPERTY: "default_bucket_location",
|
65
|
+
ENDPOINT_PROPERTY: "endpoint_override"
|
66
|
+
}
|
67
|
+
|
68
|
+
FSSPEC_CLIENT_ARGS = {
|
69
|
+
REGION_PROPERTY: "default_location",
|
70
|
+
ENDPOINT_PROPERTY: "endpoint_url"
|
71
|
+
}
|
72
|
+
|
73
|
+
try:
|
74
|
+
__arrow_available = pa_fs.GcsFileSystem is not None
|
75
|
+
except ImportError:
|
76
|
+
__arrow_available = False
|
77
|
+
|
78
|
+
def __init__(self, properties: tp.Dict[str, str]):
|
79
|
+
|
80
|
+
self._log = _helpers.logger_for_object(self)
|
81
|
+
self._properties = properties
|
82
|
+
|
83
|
+
self._runtime_fs = _helpers.get_plugin_property(
|
84
|
+
properties, self.RUNTIME_FS_PROPERTY) \
|
85
|
+
or self.RUNTIME_FS_DEFAULT
|
86
|
+
|
87
|
+
def has_arrow_native(self) -> bool:
|
88
|
+
return True
|
89
|
+
|
90
|
+
def get_arrow_native(self) -> pa_fs.SubTreeFileSystem:
|
91
|
+
|
92
|
+
if self._runtime_fs == self.RUNTIME_FS_AUTO:
|
93
|
+
gcs_fs = self.create_arrow() if self.__arrow_available else self.create_fsspec()
|
94
|
+
elif self._runtime_fs == self.RUNTIME_FS_ARROW:
|
95
|
+
gcs_fs = self.create_arrow()
|
96
|
+
elif self._runtime_fs == self.RUNTIME_FS_FSSPEC:
|
97
|
+
gcs_fs = self.create_fsspec()
|
98
|
+
else:
|
99
|
+
message = f"Requested runtime FS [{self._runtime_fs}] is not available for GCP storage"
|
100
|
+
self._log.error(message)
|
101
|
+
raise ex.EStartup(message)
|
102
|
+
|
103
|
+
bucket = _helpers.get_plugin_property(self._properties, self.BUCKET_PROPERTY)
|
104
|
+
prefix = _helpers.get_plugin_property(self._properties, self.PREFIX_PROPERTY)
|
105
|
+
|
106
|
+
if bucket is None or len(bucket.strip()) == 0:
|
107
|
+
message = f"Missing required config property [{self.BUCKET_PROPERTY}] for GCP storage"
|
108
|
+
self._log.error(message)
|
109
|
+
raise ex.EConfigParse(message)
|
110
|
+
|
111
|
+
root_path = f"{bucket}/{prefix}" if prefix else bucket
|
112
|
+
|
113
|
+
return pa_fs.SubTreeFileSystem(root_path, gcs_fs)
|
114
|
+
|
115
|
+
def create_arrow(self) -> pa_fs.FileSystem:
|
116
|
+
|
117
|
+
gcs_arrow_args = self.setup_client_args(self.ARROW_CLIENT_ARGS)
|
118
|
+
|
119
|
+
return pa_fs.GcsFileSystem(**gcs_arrow_args)
|
120
|
+
|
121
|
+
def create_fsspec(self) -> pa_fs.FileSystem:
|
122
|
+
|
123
|
+
gcs_fsspec_args = self.setup_client_args(self.FSSPEC_CLIENT_ARGS)
|
124
|
+
gcs_fsspec = gcsfs.GCSFileSystem(**gcs_fsspec_args)
|
125
|
+
|
126
|
+
return pa_fs.PyFileSystem(pa_fs.FSSpecHandler(gcs_fsspec))
|
127
|
+
|
128
|
+
def setup_client_args(self, arg_mapping: tp.Dict[str, str]) -> tp.Dict[str, tp.Any]:
|
129
|
+
|
130
|
+
client_args = dict()
|
131
|
+
|
132
|
+
region = _helpers.get_plugin_property(self._properties, self.REGION_PROPERTY)
|
133
|
+
endpoint = _helpers.get_plugin_property(self._properties, self.ENDPOINT_PROPERTY)
|
134
|
+
|
135
|
+
if region is not None:
|
136
|
+
region_key = arg_mapping[self.REGION_PROPERTY]
|
137
|
+
client_args[region_key] = region
|
138
|
+
|
139
|
+
if endpoint is not None:
|
140
|
+
endpoint_key = arg_mapping[self.ENDPOINT_PROPERTY]
|
141
|
+
client_args[endpoint_key] = endpoint
|
142
|
+
|
143
|
+
credentials = self.setup_credentials()
|
144
|
+
client_args.update(credentials)
|
145
|
+
|
146
|
+
return client_args
|
147
|
+
|
148
|
+
def setup_credentials(self):
|
149
|
+
|
150
|
+
# Only default (Google ADC) mechanism is supported
|
151
|
+
# Arrow GCP FS does also support access tokens, but ADC is probably all we ever need
|
152
|
+
|
153
|
+
mechanism = _helpers.get_plugin_property(self._properties, self.CREDENTIALS_PROPERTY)
|
154
|
+
|
155
|
+
if mechanism is None or len(mechanism) == 0 or mechanism.lower() == self.CREDENTIALS_ADC:
|
156
|
+
self._log.info(f"Using [{self.CREDENTIALS_ADC}] credentials mechanism")
|
157
|
+
return dict()
|
158
|
+
|
159
|
+
if mechanism == self.CREDENTIALS_ACCESS_TOKEN:
|
160
|
+
|
161
|
+
self._log.info(f"Using [{self.CREDENTIALS_ACCESS_TOKEN}] credentials mechanism")
|
162
|
+
|
163
|
+
access_token = _helpers.get_plugin_property(self._properties, self.ACCESS_TOKEN)
|
164
|
+
access_token_expiry = _helpers.get_plugin_property(self._properties, self.ACCESS_TOKEN_EXPIRY)
|
165
|
+
|
166
|
+
if access_token is None or len(access_token.strip()) == 0:
|
167
|
+
message = f"Missing required config property [{self.ACCESS_TOKEN}] for GCP storage"
|
168
|
+
raise ex.EConfigParse(message)
|
169
|
+
|
170
|
+
if access_token_expiry is None:
|
171
|
+
access_token_expiry = self.ACCESS_TOKEN_EXPIRY_DEFAULT
|
172
|
+
|
173
|
+
expiry_timestamp = dt.datetime.now(dt.timezone.utc) + dt.timedelta(seconds=float(access_token_expiry))
|
174
|
+
|
175
|
+
return {"access_token": access_token, "credential_token_expiration": expiry_timestamp}
|
176
|
+
|
177
|
+
message = f"Unrecognised credentials mechanism: [{mechanism}]"
|
178
|
+
self._log.error(message)
|
179
|
+
raise ex.EStartup(message)
|
180
|
+
|
181
|
+
|
182
|
+
if __gcp_available:
|
183
|
+
plugins.PluginManager.register_plugin(IStorageProvider, GcpStorageProvider, ["GCS"])
|
@@ -13,6 +13,9 @@
|
|
13
13
|
# limitations under the License.
|
14
14
|
|
15
15
|
import datetime as dt
|
16
|
+
import os
|
17
|
+
import re
|
18
|
+
import sys
|
16
19
|
import typing as tp
|
17
20
|
import pathlib
|
18
21
|
|
@@ -23,27 +26,61 @@ import tracdap.rt.exceptions as ex
|
|
23
26
|
import tracdap.rt.ext.plugins as plugins
|
24
27
|
from tracdap.rt.ext.storage import *
|
25
28
|
|
29
|
+
import pyarrow.fs as afs
|
30
|
+
|
26
31
|
# Set of common helpers across the core plugins (do not reference rt._impl)
|
27
32
|
from . import _helpers
|
28
33
|
|
29
|
-
# TODO: Remove dependencies on internal implementation details
|
30
|
-
import tracdap.rt._impl.storage as _storage
|
31
|
-
|
32
34
|
|
33
|
-
class
|
35
|
+
class LocalStorageProvider(IStorageProvider):
|
34
36
|
|
35
37
|
ROOT_PATH_PROPERTY = "rootPath"
|
36
38
|
|
37
|
-
|
39
|
+
RUNTIME_FS_PROPERTY = "runtimeFs"
|
40
|
+
RUNTIME_FS_AUTO = "auto"
|
41
|
+
RUNTIME_FS_ARROW = "arrow"
|
42
|
+
RUNTIME_FS_PYTHON = "python"
|
43
|
+
RUNTIME_FS_DEFAULT = RUNTIME_FS_AUTO
|
44
|
+
|
45
|
+
def __init__(self, properties: tp.Dict[str, str]):
|
38
46
|
|
39
47
|
self._log = _helpers.logger_for_object(self)
|
40
|
-
self._properties =
|
48
|
+
self._properties = properties
|
49
|
+
|
50
|
+
self._root_path = self.check_root_path(self._properties, self._log)
|
51
|
+
|
52
|
+
self._runtime_fs = _helpers.get_plugin_property(
|
53
|
+
properties, self.RUNTIME_FS_PROPERTY) \
|
54
|
+
or self.RUNTIME_FS_DEFAULT
|
55
|
+
|
56
|
+
def has_arrow_native(self) -> bool:
|
57
|
+
return self._runtime_fs in [self.RUNTIME_FS_ARROW, self.RUNTIME_FS_AUTO]
|
58
|
+
|
59
|
+
def has_file_storage(self) -> bool:
|
60
|
+
return self._runtime_fs == self.RUNTIME_FS_PYTHON
|
41
61
|
|
42
|
-
|
62
|
+
def get_arrow_native(self) -> afs.SubTreeFileSystem:
|
63
|
+
root_fs = afs.LocalFileSystem()
|
64
|
+
return afs.SubTreeFileSystem(str(self._root_path), root_fs)
|
65
|
+
|
66
|
+
def get_file_storage(self) -> IFileStorage:
|
67
|
+
|
68
|
+
config = cfg.PluginConfig()
|
69
|
+
config.protocol = "LOCAL"
|
70
|
+
config.properties = self._properties
|
71
|
+
|
72
|
+
options = dict()
|
73
|
+
|
74
|
+
return LocalFileStorage(config, options)
|
75
|
+
|
76
|
+
@classmethod
|
77
|
+
def check_root_path(cls, properties, log):
|
78
|
+
|
79
|
+
root_path_config = _helpers.get_plugin_property(properties, cls.ROOT_PATH_PROPERTY)
|
43
80
|
|
44
81
|
if not root_path_config or root_path_config.isspace():
|
45
82
|
err = f"Storage root path not set"
|
46
|
-
|
83
|
+
log.error(err)
|
47
84
|
raise ex.EStorageRequest(err)
|
48
85
|
|
49
86
|
supplied_root = pathlib.Path(root_path_config)
|
@@ -53,17 +90,58 @@ class LocalFileStorage(IFileStorage):
|
|
53
90
|
|
54
91
|
else:
|
55
92
|
err = f"Relative path not allowed for storage root [{supplied_root}]"
|
56
|
-
|
93
|
+
log.error(err)
|
57
94
|
raise ex.EStorageConfig(err)
|
58
95
|
|
59
96
|
try:
|
60
|
-
|
97
|
+
return absolute_root.resolve(strict=True)
|
61
98
|
|
62
99
|
except FileNotFoundError as e:
|
63
100
|
err = f"Storage root path does not exist: [{absolute_root}]"
|
64
|
-
|
101
|
+
log.error(err)
|
65
102
|
raise ex.EStorageRequest(err) from e
|
66
103
|
|
104
|
+
|
105
|
+
plugins.PluginManager.register_plugin(IStorageProvider, LocalStorageProvider, ["LOCAL", "file"])
|
106
|
+
|
107
|
+
|
108
|
+
# ----------------------------------------------------------------------------------------------------------------------
|
109
|
+
# CUSTOM IMPLEMENTATION FOR LOCAL STORAGE
|
110
|
+
# ----------------------------------------------------------------------------------------------------------------------
|
111
|
+
|
112
|
+
# This is the old implementation that was used before Arrow native was made available
|
113
|
+
# It is likely to be removed in a future release
|
114
|
+
|
115
|
+
|
116
|
+
class _StreamResource(tp.BinaryIO): # noqa
|
117
|
+
|
118
|
+
def __init__(self, ctx_mgr, close_func):
|
119
|
+
self.__ctx_mgr = ctx_mgr
|
120
|
+
self.__close_func = close_func
|
121
|
+
|
122
|
+
def __getitem__(self, item):
|
123
|
+
return self.__ctx_mgr.__getitem__(item)
|
124
|
+
|
125
|
+
def __enter__(self):
|
126
|
+
return self.__ctx_mgr.__enter__()
|
127
|
+
|
128
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
129
|
+
try:
|
130
|
+
self.__close_func()
|
131
|
+
finally:
|
132
|
+
self.__ctx_mgr.__exit__(exc_type, exc_val, exc_tb)
|
133
|
+
|
134
|
+
|
135
|
+
class LocalFileStorage(IFileStorage):
|
136
|
+
|
137
|
+
def __init__(self, config: cfg.PluginConfig, options: dict = None):
|
138
|
+
|
139
|
+
self._log = _helpers.logger_for_object(self)
|
140
|
+
self._properties = config.properties
|
141
|
+
self._options = options # Not used
|
142
|
+
|
143
|
+
self._root_path = LocalStorageProvider.check_root_path(self._properties, self._log)
|
144
|
+
|
67
145
|
def _get_root(self):
|
68
146
|
return self._root_path
|
69
147
|
|
@@ -74,13 +152,26 @@ class LocalFileStorage(IFileStorage):
|
|
74
152
|
|
75
153
|
def _exists(self, storage_path: str) -> bool:
|
76
154
|
|
77
|
-
item_path = self.
|
155
|
+
item_path = self._resolve_path(storage_path, "EXISTS", True)
|
78
156
|
return item_path.exists()
|
79
157
|
|
80
158
|
def size(self, storage_path: str) -> int:
|
81
159
|
|
82
160
|
operation = f"SIZE [{storage_path}]"
|
83
|
-
return self._error_handling(operation, lambda: self.
|
161
|
+
return self._error_handling(operation, lambda: self._size(storage_path))
|
162
|
+
|
163
|
+
def _size(self, storage_path: str) -> int:
|
164
|
+
|
165
|
+
item_path = self._resolve_path(storage_path, "SIZE", True)
|
166
|
+
|
167
|
+
if not item_path.exists():
|
168
|
+
raise ex.EStorageRequest(f"Storage path does not exist: SIZE [{storage_path}]")
|
169
|
+
|
170
|
+
if not item_path.is_file():
|
171
|
+
raise ex.EStorageRequest(f"Storage path is not a file: SIZE [{storage_path}]")
|
172
|
+
|
173
|
+
os_stat = item_path.stat()
|
174
|
+
return os_stat.st_size
|
84
175
|
|
85
176
|
def stat(self, storage_path: str) -> FileStat:
|
86
177
|
|
@@ -89,63 +180,100 @@ class LocalFileStorage(IFileStorage):
|
|
89
180
|
|
90
181
|
def _stat(self, storage_path: str) -> FileStat:
|
91
182
|
|
92
|
-
item_path = self.
|
183
|
+
item_path = self._resolve_path(storage_path, "STAT", True)
|
93
184
|
os_stat = item_path.stat()
|
94
185
|
|
186
|
+
return self._os_to_trac_stat(item_path, os_stat)
|
187
|
+
|
188
|
+
def _os_to_trac_stat(self, item_path: pathlib.Path, os_stat: os.stat_result):
|
189
|
+
|
190
|
+
file_name = "." if item_path == self._root_path else item_path.name
|
191
|
+
|
95
192
|
file_type = FileType.FILE if item_path.is_file() \
|
96
193
|
else FileType.DIRECTORY if item_path.is_dir() \
|
97
194
|
else None
|
98
195
|
|
196
|
+
file_size = 0 if file_type is FileType.DIRECTORY else os_stat.st_size
|
197
|
+
|
99
198
|
return FileStat(
|
199
|
+
file_name=file_name,
|
100
200
|
file_type=file_type,
|
101
|
-
|
102
|
-
|
201
|
+
storage_path=str(item_path.relative_to(self._root_path).as_posix()),
|
202
|
+
size=file_size,
|
103
203
|
mtime=dt.datetime.fromtimestamp(os_stat.st_mtime, dt.timezone.utc),
|
104
|
-
atime=dt.datetime.fromtimestamp(os_stat.st_atime, dt.timezone.utc)
|
105
|
-
uid=os_stat.st_uid,
|
106
|
-
gid=os_stat.st_gid,
|
107
|
-
mode=os_stat.st_mode)
|
204
|
+
atime=dt.datetime.fromtimestamp(os_stat.st_atime, dt.timezone.utc))
|
108
205
|
|
109
|
-
def ls(self, storage_path: str) -> tp.List[
|
206
|
+
def ls(self, storage_path: str, recursive: bool = False) -> tp.List[FileStat]:
|
110
207
|
|
111
208
|
operation = f"LS [{storage_path}]"
|
112
|
-
return self._error_handling(operation, lambda: self._ls(storage_path))
|
209
|
+
return self._error_handling(operation, lambda: self._ls(storage_path, recursive))
|
113
210
|
|
114
|
-
def _ls(self, storage_path: str) -> tp.List[
|
211
|
+
def _ls(self, storage_path: str, recursive: bool = False) -> tp.List[FileStat]:
|
115
212
|
|
116
|
-
item_path = self.
|
117
|
-
return [str(x.relative_to(item_path))
|
118
|
-
for x in item_path.iterdir()
|
119
|
-
if x.is_file() or x.is_dir()]
|
213
|
+
item_path = self._resolve_path(storage_path, "LS", True)
|
120
214
|
|
121
|
-
|
215
|
+
if not item_path.exists():
|
216
|
+
raise ex.EStorageRequest(f"Storage path does not exist: LS [{storage_path}]")
|
217
|
+
|
218
|
+
# If LS is called on anything other than a directory, return a listing of that one item
|
219
|
+
if not item_path.is_dir():
|
220
|
+
os_stat = item_path.stat()
|
221
|
+
stat = self._os_to_trac_stat(item_path, os_stat)
|
222
|
+
return [stat]
|
223
|
+
|
224
|
+
# Otherwise do a regular directory listing
|
225
|
+
else:
|
226
|
+
pattern = "**/*" if recursive else "*"
|
227
|
+
paths = list(item_path.glob(pattern))
|
228
|
+
return list(map(lambda p: self._os_to_trac_stat(p, p.stat()), paths))
|
229
|
+
|
230
|
+
def mkdir(self, storage_path: str, recursive: bool = False):
|
122
231
|
|
123
232
|
operation = f"MKDIR [{storage_path}]"
|
124
|
-
self._error_handling(operation, lambda: self._mkdir(storage_path, recursive
|
233
|
+
self._error_handling(operation, lambda: self._mkdir(storage_path, recursive))
|
125
234
|
|
126
|
-
def _mkdir(self, storage_path: str, recursive: bool = False
|
235
|
+
def _mkdir(self, storage_path: str, recursive: bool = False):
|
127
236
|
|
128
|
-
item_path = self.
|
129
|
-
item_path.mkdir(parents=recursive, exist_ok=
|
237
|
+
item_path = self._resolve_path(storage_path, "MKDIR", False)
|
238
|
+
item_path.mkdir(parents=recursive, exist_ok=True)
|
130
239
|
|
131
|
-
def rm(self, storage_path: str
|
240
|
+
def rm(self, storage_path: str):
|
132
241
|
|
133
|
-
operation = f"
|
134
|
-
self._error_handling(operation, lambda: self._rm(storage_path
|
242
|
+
operation = f"RM [{storage_path}]"
|
243
|
+
self._error_handling(operation, lambda: self._rm(storage_path))
|
244
|
+
|
245
|
+
def _rm(self, storage_path: str):
|
246
|
+
|
247
|
+
item_path = self._resolve_path(storage_path, "RM", False)
|
248
|
+
|
249
|
+
if not item_path.is_file():
|
250
|
+
raise ex.EStorageRequest(f"Storage path is not a file: RM [{storage_path}]")
|
251
|
+
|
252
|
+
item_path.unlink()
|
253
|
+
|
254
|
+
def rmdir(self, storage_path: str):
|
255
|
+
|
256
|
+
operation = f"RMDIR [{storage_path}]"
|
257
|
+
self._error_handling(operation, lambda: self._rmdir(storage_path))
|
135
258
|
|
136
|
-
def
|
259
|
+
def _rmdir(self, storage_path: str):
|
137
260
|
|
138
|
-
|
261
|
+
item_path = self._resolve_path(storage_path, "RMDIR", False)
|
139
262
|
|
140
|
-
|
263
|
+
if not item_path.is_dir():
|
264
|
+
raise ex.EStorageRequest(f"Storage path is not a directory: RMDIR [{storage_path}]")
|
141
265
|
|
142
|
-
|
143
|
-
return self._error_handling(operation, lambda: self._read_bytes(storage_path))
|
266
|
+
self._rmdir_inner(item_path)
|
144
267
|
|
145
|
-
def
|
268
|
+
def _rmdir_inner(self, item_path):
|
146
269
|
|
147
|
-
|
148
|
-
|
270
|
+
for item in item_path.iterdir():
|
271
|
+
if item.is_dir():
|
272
|
+
self._rmdir_inner(item)
|
273
|
+
else:
|
274
|
+
item.unlink()
|
275
|
+
|
276
|
+
item_path.rmdir()
|
149
277
|
|
150
278
|
def read_byte_stream(self, storage_path: str) -> tp.BinaryIO:
|
151
279
|
|
@@ -154,90 +282,97 @@ class LocalFileStorage(IFileStorage):
|
|
154
282
|
|
155
283
|
def _read_byte_stream(self, storage_path: str) -> tp.BinaryIO:
|
156
284
|
|
157
|
-
|
158
|
-
item_path = self._root_path / storage_path
|
159
|
-
stream = open(item_path, mode='rb')
|
285
|
+
item_path = self._resolve_path(storage_path, "OPEN BYTE STREAM (READ)", False)
|
160
286
|
|
161
|
-
|
287
|
+
# Do not try to open directories or other non-file objects for reading
|
288
|
+
if item_path.exists() and not item_path.is_file():
|
289
|
+
raise ex.EStorageRequest(f"Storage path is not a file: OPEN BYTE STREAM (READ) [{storage_path}]")
|
162
290
|
|
163
|
-
|
291
|
+
stream = open(item_path, mode='rb')
|
164
292
|
|
165
|
-
|
166
|
-
self._error_handling(operation, lambda: self._write_bytes(storage_path, data, overwrite))
|
293
|
+
return _StreamResource(stream, lambda: self._close_byte_stream(storage_path, stream))
|
167
294
|
|
168
|
-
def
|
295
|
+
def write_byte_stream(self, storage_path: str) -> tp.BinaryIO:
|
169
296
|
|
170
|
-
|
171
|
-
|
297
|
+
operation = f"OPEN BYTE STREAM (WRITE) [{storage_path}]"
|
298
|
+
return self._error_handling(operation, lambda: self._write_byte_stream(storage_path))
|
172
299
|
|
173
|
-
def
|
300
|
+
def _write_byte_stream(self, storage_path: str) -> tp.BinaryIO:
|
174
301
|
|
175
|
-
|
176
|
-
return self._error_handling(operation, lambda: self._write_byte_stream(storage_path, overwrite))
|
302
|
+
item_path = self._resolve_path(storage_path, "OPEN BYTE STREAM (WRITE)", False)
|
177
303
|
|
178
|
-
|
304
|
+
# Make sure the parent dir always exists
|
305
|
+
# This brings local storage in line with cloud bucket semantics for writing objects
|
306
|
+
if not item_path.parent.exists():
|
307
|
+
item_path.parent.mkdir(parents=True, exist_ok=True)
|
179
308
|
|
180
|
-
|
181
|
-
item_path
|
309
|
+
# Do not try to open directories or other non-file objects for write
|
310
|
+
if item_path.exists() and not item_path.is_file():
|
311
|
+
raise ex.EStorageRequest(f"Storage path is not a file: OPEN BYTE STREAM (WRITE) [{storage_path}]")
|
182
312
|
|
183
|
-
|
184
|
-
|
185
|
-
else:
|
186
|
-
stream = open(item_path, mode='xb')
|
313
|
+
# If the file does not already exist and there is an error, try to clean it up
|
314
|
+
delete_on_error = not item_path.exists()
|
187
315
|
|
188
|
-
|
316
|
+
# Always overwrite existing files, this is in line with cloud bucket semantics
|
317
|
+
stream = open(item_path, mode='wb')
|
189
318
|
|
190
|
-
|
319
|
+
return _StreamResource(stream, lambda: self._close_byte_stream(storage_path, stream, delete_on_error))
|
191
320
|
|
192
|
-
|
193
|
-
return self._error_handling(operation, lambda: self._read_text(storage_path, encoding))
|
321
|
+
def _close_byte_stream(self, storage_path: str, stream: tp.BinaryIO, delete_on_error: bool = False):
|
194
322
|
|
195
|
-
|
323
|
+
if stream.closed:
|
324
|
+
return
|
196
325
|
|
197
|
-
|
198
|
-
|
326
|
+
try:
|
327
|
+
read_write = "WRITE" if stream.writable() else "READ"
|
328
|
+
self._log.info(f"CLOSE BYTE STREAM ({read_write}) [{storage_path}]")
|
199
329
|
|
200
|
-
|
330
|
+
finally:
|
331
|
+
stream.close()
|
201
332
|
|
202
|
-
|
203
|
-
|
333
|
+
exc_info = sys.exc_info()
|
334
|
+
error = exc_info[1] if exc_info is not None else None
|
204
335
|
|
205
|
-
|
336
|
+
if error is not None and delete_on_error:
|
337
|
+
try:
|
338
|
+
item_path = self._resolve_path(storage_path, "CLOSE BYTE STREAM (WRITE)", False)
|
339
|
+
if item_path.exists():
|
340
|
+
item_path.unlink()
|
341
|
+
except OSError:
|
342
|
+
pass
|
206
343
|
|
207
|
-
|
208
|
-
item_path = self._root_path / storage_path
|
209
|
-
stream = open(item_path, mode='rt', encoding=encoding)
|
344
|
+
__T = tp.TypeVar("__T")
|
210
345
|
|
211
|
-
|
346
|
+
def _resolve_path(self, storage_path: str, operation_name: str, allow_root_dir: bool) -> pathlib.Path:
|
212
347
|
|
213
|
-
|
348
|
+
try:
|
214
349
|
|
215
|
-
|
216
|
-
|
350
|
+
if storage_path is None or len(storage_path.strip()) == 0:
|
351
|
+
raise ex.EStorageValidation(f"Storage path is null or blank: {operation_name} [{storage_path}]")
|
217
352
|
|
218
|
-
|
353
|
+
if self._ILLEGAL_PATH_CHARS.match(storage_path):
|
354
|
+
raise ex.EStorageValidation(f"Storage path is invalid: {operation_name} [{storage_path}]")
|
219
355
|
|
220
|
-
|
221
|
-
stream.write(data)
|
356
|
+
relative_path = pathlib.Path(storage_path)
|
222
357
|
|
223
|
-
|
358
|
+
if relative_path.is_absolute():
|
359
|
+
raise ex.EStorageValidation(f"Storage path is not relative: {operation_name} [{storage_path}]")
|
224
360
|
|
225
|
-
|
226
|
-
|
361
|
+
root_path = self._root_path
|
362
|
+
absolute_path = self._root_path.joinpath(relative_path).resolve(False)
|
227
363
|
|
228
|
-
|
364
|
+
# is_relative_to only supported in Python 3.9+, we need to support 3.7
|
365
|
+
if absolute_path != root_path and root_path not in absolute_path.parents:
|
366
|
+
raise ex.EStorageValidation(f"Path is outside storage root: {operation_name} [{storage_path}]")
|
229
367
|
|
230
|
-
|
231
|
-
|
368
|
+
if absolute_path == root_path and not allow_root_dir:
|
369
|
+
raise ex.EStorageValidation(f"Illegal operation for storage root: {operation_name} [{storage_path}]")
|
232
370
|
|
233
|
-
|
234
|
-
stream = open(item_path, mode='wt', encoding=encoding)
|
235
|
-
else:
|
236
|
-
stream = open(item_path, mode='xt', encoding=encoding)
|
371
|
+
return absolute_path
|
237
372
|
|
238
|
-
|
373
|
+
except ValueError as e:
|
239
374
|
|
240
|
-
|
375
|
+
raise ex.EStorageValidation(f"Storage path is invalid: {operation_name} [{storage_path}]") from e
|
241
376
|
|
242
377
|
def _error_handling(self, operation: str, func: tp.Callable[[], __T]) -> __T:
|
243
378
|
|
@@ -245,6 +380,11 @@ class LocalFileStorage(IFileStorage):
|
|
245
380
|
self._log.info(operation)
|
246
381
|
return func()
|
247
382
|
|
383
|
+
# ETrac means the error is already handled, log the message as-is
|
384
|
+
except ex.ETrac as e:
|
385
|
+
self._log.exception(f"{operation} {str(e)}")
|
386
|
+
raise
|
387
|
+
|
248
388
|
except FileNotFoundError as e:
|
249
389
|
msg = "File not found"
|
250
390
|
self._log.exception(f"{operation}: {msg}")
|
@@ -255,6 +395,16 @@ class LocalFileStorage(IFileStorage):
|
|
255
395
|
self._log.exception(f"{operation}: {msg}")
|
256
396
|
raise ex.EStorageRequest(msg) from e
|
257
397
|
|
398
|
+
except IsADirectoryError as e:
|
399
|
+
msg = "Path is a directory, not a file"
|
400
|
+
self._log.exception(f"{operation}: {msg}")
|
401
|
+
raise ex.EStorageRequest(msg) from e
|
402
|
+
|
403
|
+
except NotADirectoryError as e:
|
404
|
+
msg = "Path is not a directory"
|
405
|
+
self._log.exception(f"{operation}: {msg}")
|
406
|
+
raise ex.EStorageRequest(msg) from e
|
407
|
+
|
258
408
|
except PermissionError as e:
|
259
409
|
msg = "Access denied"
|
260
410
|
self._log.exception(f"{operation}: {msg}")
|
@@ -265,5 +415,6 @@ class LocalFileStorage(IFileStorage):
|
|
265
415
|
self._log.exception(f"{operation}: {msg}")
|
266
416
|
raise ex.EStorageAccess(msg) from e
|
267
417
|
|
268
|
-
|
269
|
-
|
418
|
+
_ILLEGAL_PATH_CHARS_WINDOWS = re.compile(r".*[\x00<>:\"\'|?*].*")
|
419
|
+
_ILLEGAL_PATH_CHARS_POSIX = re.compile(r".*[\x00<>:\"\'|?*\\].*")
|
420
|
+
_ILLEGAL_PATH_CHARS = _ILLEGAL_PATH_CHARS_WINDOWS if _helpers.is_windows() else _ILLEGAL_PATH_CHARS_POSIX
|