tracdap-runtime 0.5.30__py3-none-any.whl → 0.6.0.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tracdap/rt/_exec/dev_mode.py +2 -1
- tracdap/rt/_impl/data.py +1 -28
- tracdap/rt/_impl/static_api.py +5 -1
- tracdap/rt/_impl/storage.py +586 -10
- tracdap/rt/_impl/util.py +24 -3
- tracdap/rt/_plugins/_helpers.py +26 -25
- tracdap/rt/_plugins/storage_aws.py +162 -76
- tracdap/rt/_plugins/storage_azure.py +155 -0
- tracdap/rt/_plugins/storage_gcp.py +183 -0
- tracdap/rt/_plugins/storage_local.py +249 -98
- tracdap/rt/_version.py +1 -1
- tracdap/rt/api/static_api.py +2 -1
- tracdap/rt/config/__init__.py +8 -13
- tracdap/rt/config/common.py +10 -0
- tracdap/rt/config/common_pb2.py +38 -31
- tracdap/rt/config/job_pb2.py +21 -20
- tracdap/rt/config/platform.py +60 -25
- tracdap/rt/config/platform_pb2.py +52 -45
- tracdap/rt/config/result_pb2.py +15 -14
- tracdap/rt/config/runtime.py +0 -1
- tracdap/rt/config/runtime_pb2.py +24 -24
- tracdap/rt/exceptions.py +9 -0
- tracdap/rt/ext/plugins.py +0 -12
- tracdap/rt/ext/storage.py +47 -29
- tracdap/rt/metadata/common_pb2.py +15 -14
- tracdap/rt/metadata/custom_pb2.py +9 -8
- tracdap/rt/metadata/data_pb2.py +31 -30
- tracdap/rt/metadata/file_pb2.py +9 -8
- tracdap/rt/metadata/flow_pb2.py +33 -32
- tracdap/rt/metadata/job_pb2.py +55 -54
- tracdap/rt/metadata/model_pb2.py +31 -30
- tracdap/rt/metadata/object_id_pb2.py +13 -12
- tracdap/rt/metadata/object_pb2.py +9 -8
- tracdap/rt/metadata/search_pb2.py +19 -18
- tracdap/rt/metadata/stoarge_pb2.py +31 -30
- tracdap/rt/metadata/tag_pb2.py +13 -12
- tracdap/rt/metadata/tag_update_pb2.py +11 -10
- tracdap/rt/metadata/type_pb2.py +29 -28
- {tracdap_runtime-0.5.30.dist-info → tracdap_runtime-0.6.0.dev1.dist-info}/METADATA +26 -15
- {tracdap_runtime-0.5.30.dist-info → tracdap_runtime-0.6.0.dev1.dist-info}/RECORD +43 -43
- tracdap/rt/config/gateway.py +0 -104
- tracdap/rt/config/gateway_pb2.py +0 -45
- {tracdap_runtime-0.5.30.dist-info → tracdap_runtime-0.6.0.dev1.dist-info}/LICENSE +0 -0
- {tracdap_runtime-0.5.30.dist-info → tracdap_runtime-0.6.0.dev1.dist-info}/WHEEL +0 -0
- {tracdap_runtime-0.5.30.dist-info → tracdap_runtime-0.6.0.dev1.dist-info}/top_level.txt +0 -0
tracdap/rt/_impl/util.py
CHANGED
@@ -145,6 +145,28 @@ def logger_for_namespace(namespace: str) -> logging.Logger:
|
|
145
145
|
return logging.getLogger(namespace)
|
146
146
|
|
147
147
|
|
148
|
+
def format_file_size(size: int) -> str:
|
149
|
+
|
150
|
+
if size < 1024:
|
151
|
+
if size == 0:
|
152
|
+
return "0 bytes"
|
153
|
+
elif size == 1:
|
154
|
+
return "1 byte"
|
155
|
+
else:
|
156
|
+
return f"{size} bytes"
|
157
|
+
|
158
|
+
if size < 1024 ** 2:
|
159
|
+
kb = size / 1024
|
160
|
+
return f"{kb:.1f} KB"
|
161
|
+
|
162
|
+
if size < 1024 ** 3:
|
163
|
+
mb = size / (1024 ** 2)
|
164
|
+
return f"{mb:.1f} MB"
|
165
|
+
|
166
|
+
gb = size / (1024 ** 3)
|
167
|
+
return f"{gb:.1f} GB"
|
168
|
+
|
169
|
+
|
148
170
|
def new_object_id(object_type: meta.ObjectType) -> meta.TagHeader:
|
149
171
|
|
150
172
|
timestamp = dt.datetime.utcnow()
|
@@ -296,7 +318,7 @@ def error_details_from_exception(error: Exception):
|
|
296
318
|
|
297
319
|
def filter_model_stack_trace(full_stack: tb.StackSummary, checkout_directory: pathlib.Path):
|
298
320
|
|
299
|
-
frame_names = list(map(lambda
|
321
|
+
frame_names = list(map(lambda frame_: frame_.name, full_stack))
|
300
322
|
|
301
323
|
if __FIRST_MODEL_FRAME_NAME in frame_names:
|
302
324
|
first_model_frame = frame_names.index(__FIRST_MODEL_FRAME_NAME)
|
@@ -309,7 +331,7 @@ def filter_model_stack_trace(full_stack: tb.StackSummary, checkout_directory: pa
|
|
309
331
|
|
310
332
|
for frame_index, frame in enumerate(full_stack[first_model_frame:]):
|
311
333
|
module_path = pathlib.Path(frame.filename)
|
312
|
-
if
|
334
|
+
if "tracdap" in module_path.parts:
|
313
335
|
tracdap_index = len(module_path.parts) - 1 - list(reversed(module_path.parts)).index("tracdap")
|
314
336
|
if tracdap_index < len(module_path.parts)-1:
|
315
337
|
if module_path.parts[tracdap_index+1] == "rt":
|
@@ -322,4 +344,3 @@ def filter_model_stack_trace(full_stack: tb.StackSummary, checkout_directory: pa
|
|
322
344
|
last_model_frame = first_model_frame + frame_index
|
323
345
|
|
324
346
|
return full_stack[first_model_frame:last_model_frame+1]
|
325
|
-
|
tracdap/rt/_plugins/_helpers.py
CHANGED
@@ -22,6 +22,8 @@ import platform
|
|
22
22
|
import urllib.parse
|
23
23
|
import typing as tp
|
24
24
|
|
25
|
+
import tracdap.rt.exceptions as _ex
|
26
|
+
|
25
27
|
|
26
28
|
def get_plugin_property(properties: tp.Dict[str, str], property_name: str):
|
27
29
|
|
@@ -40,6 +42,30 @@ def get_plugin_property(properties: tp.Dict[str, str], property_name: str):
|
|
40
42
|
return None
|
41
43
|
|
42
44
|
|
45
|
+
def get_plugin_property_boolean(properties: tp.Dict[str, str], property_name: str, property_default: bool = False):
|
46
|
+
|
47
|
+
property_value = get_plugin_property(properties, property_name)
|
48
|
+
|
49
|
+
if property_value is None:
|
50
|
+
return property_default
|
51
|
+
|
52
|
+
if isinstance(property_value, bool):
|
53
|
+
return property_value
|
54
|
+
|
55
|
+
if isinstance(property_value, str):
|
56
|
+
|
57
|
+
if len(property_value.strip()) == 0:
|
58
|
+
return property_default
|
59
|
+
|
60
|
+
if property_value.strip().lower() == "true":
|
61
|
+
return True
|
62
|
+
|
63
|
+
if property_value.strip().lower() == "false":
|
64
|
+
return False
|
65
|
+
|
66
|
+
raise _ex.EConfigParse(f"Invalid value for [{property_name}]: Expected a boolean value, got [{property_value}]")
|
67
|
+
|
68
|
+
|
43
69
|
# Handling for credentials supplied via HTTP(S) URLs
|
44
70
|
|
45
71
|
__HTTP_TOKEN_KEY = "token"
|
@@ -98,31 +124,6 @@ def apply_http_credentials(url: urllib.parse.ParseResult, credentials: str) -> u
|
|
98
124
|
|
99
125
|
# Logging helpers
|
100
126
|
|
101
|
-
_T = tp.TypeVar("_T")
|
102
|
-
|
103
|
-
|
104
|
-
class _LogClose(tp.Generic[_T]):
|
105
|
-
|
106
|
-
def __init__(self, ctx_mgr: _T, log, msg):
|
107
|
-
self.__ctx_mgr = ctx_mgr
|
108
|
-
self.__log = log
|
109
|
-
self.__msg = msg
|
110
|
-
|
111
|
-
def __getitem__(self, item):
|
112
|
-
return self.__ctx_mgr.__getitem__(item)
|
113
|
-
|
114
|
-
def __enter__(self):
|
115
|
-
return self.__ctx_mgr.__enter__()
|
116
|
-
|
117
|
-
def __exit__(self, exc_type, exc_val, exc_tb):
|
118
|
-
self.__ctx_mgr.__exit__(exc_type, exc_val, exc_tb)
|
119
|
-
self.__log.info(self.__msg)
|
120
|
-
|
121
|
-
|
122
|
-
def log_close(ctx_mgg: _T, log: logging.Logger, msg: str) -> _T:
|
123
|
-
|
124
|
-
return _LogClose(ctx_mgg, log, msg)
|
125
|
-
|
126
127
|
|
127
128
|
def log_safe(param: tp.Any):
|
128
129
|
|
@@ -23,31 +23,26 @@ import tracdap.rt.exceptions as ex
|
|
23
23
|
import tracdap.rt.ext.plugins as plugins
|
24
24
|
from tracdap.rt.ext.storage import *
|
25
25
|
|
26
|
+
from pyarrow import fs as afs
|
27
|
+
|
26
28
|
# Set of common helpers across the core plugins (do not reference rt._impl)
|
27
29
|
from . import _helpers
|
28
30
|
|
29
|
-
# TODO: Remove dependencies on internal implementation details
|
30
|
-
import tracdap.rt._impl.storage as _storage
|
31
|
-
|
32
|
-
# AWS SDK
|
33
|
-
import boto3
|
34
|
-
import botocore.response # noqa
|
35
|
-
import botocore.exceptions as aws_ex # noqa
|
36
|
-
|
37
|
-
|
38
|
-
class S3ObjectStorage(IFileStorage):
|
39
|
-
|
40
|
-
# This is a quick implementation of IFileStorage on S3 using the boto3 AWS SDK
|
41
31
|
|
42
|
-
|
43
|
-
#
|
44
|
-
|
32
|
+
try:
|
33
|
+
# AWS SDK
|
34
|
+
import boto3
|
35
|
+
import botocore.response
|
36
|
+
import botocore.exceptions as aws_ex
|
37
|
+
__aws_available = True
|
38
|
+
except ImportError:
|
39
|
+
boto3 = None
|
40
|
+
botocore = None
|
41
|
+
aws_ex = None
|
42
|
+
__aws_available = False
|
45
43
|
|
46
|
-
# TODO: Switch to using Apache Arrow file system interface
|
47
|
-
# Arrow already has implementations for AWS, GCP, HDFS and local files
|
48
|
-
# The arrow interface also allows extension with fsspec, to support Azure blob storage or custom implementations
|
49
44
|
|
50
|
-
|
45
|
+
class AwsStorageProvider(IStorageProvider):
|
51
46
|
|
52
47
|
BUCKET_PROPERTY = "bucket"
|
53
48
|
PREFIX_PROPERTY = "prefix"
|
@@ -61,38 +56,106 @@ class S3ObjectStorage(IFileStorage):
|
|
61
56
|
ACCESS_KEY_ID_PROPERTY = "accessKeyId"
|
62
57
|
SECRET_ACCESS_KEY_PROPERTY = "secretAccessKey"
|
63
58
|
|
64
|
-
|
59
|
+
RUNTIME_FS_PROPERTY = "runtimeFs"
|
60
|
+
RUNTIME_FS_AUTO = "auto"
|
61
|
+
RUNTIME_FS_ARROW = "arrow"
|
62
|
+
RUNTIME_FS_BOTO3 = "boto3"
|
63
|
+
RUNTIME_FS_DEFAULT = RUNTIME_FS_AUTO
|
64
|
+
|
65
|
+
ARROW_CLIENT_ARGS = {
|
66
|
+
REGION_PROPERTY: "region",
|
67
|
+
ENDPOINT_PROPERTY: "endpoint_override",
|
68
|
+
ACCESS_KEY_ID_PROPERTY: "access_key",
|
69
|
+
SECRET_ACCESS_KEY_PROPERTY: "secret_key"
|
70
|
+
}
|
71
|
+
|
72
|
+
BOTO_CLIENT_ARGS = {
|
73
|
+
REGION_PROPERTY: "region_name",
|
74
|
+
ENDPOINT_PROPERTY: "endpoint_url",
|
75
|
+
ACCESS_KEY_ID_PROPERTY: "aws_access_key_id",
|
76
|
+
SECRET_ACCESS_KEY_PROPERTY: "aws_secret_access_key"
|
77
|
+
}
|
78
|
+
|
79
|
+
def __init__(self, properties: tp.Dict[str, str]):
|
65
80
|
|
66
81
|
self._log = _helpers.logger_for_object(self)
|
82
|
+
self._properties = properties
|
67
83
|
|
68
|
-
self.
|
84
|
+
self._runtime_fs = _helpers.get_plugin_property(
|
85
|
+
properties, self.RUNTIME_FS_PROPERTY) \
|
86
|
+
or self.RUNTIME_FS_DEFAULT
|
87
|
+
|
88
|
+
def has_arrow_native(self) -> bool:
|
89
|
+
if self._runtime_fs == self.RUNTIME_FS_ARROW:
|
90
|
+
return True
|
91
|
+
elif self._runtime_fs == self.RUNTIME_FS_AUTO:
|
92
|
+
return afs.S3FileSystem is not None
|
93
|
+
else:
|
94
|
+
return False
|
95
|
+
|
96
|
+
def has_file_storage(self) -> bool:
|
97
|
+
if self._runtime_fs == self.RUNTIME_FS_BOTO3:
|
98
|
+
return True
|
99
|
+
elif self._runtime_fs == self.RUNTIME_FS_AUTO:
|
100
|
+
return afs.S3FileSystem is None
|
101
|
+
else:
|
102
|
+
return False
|
69
103
|
|
70
|
-
|
71
|
-
self._prefix = _helpers.get_plugin_property(self._properties, self.PREFIX_PROPERTY) or ""
|
72
|
-
self._region = _helpers.get_plugin_property(self._properties, self.REGION_PROPERTY)
|
73
|
-
self._endpoint = _helpers.get_plugin_property(self._properties, self.ENDPOINT_PROPERTY)
|
104
|
+
def get_arrow_native(self) -> afs.SubTreeFileSystem:
|
74
105
|
|
75
|
-
|
106
|
+
s3fs_args = self.setup_client_args(self.ARROW_CLIENT_ARGS)
|
107
|
+
s3fs = afs.S3FileSystem(**s3fs_args)
|
76
108
|
|
77
|
-
|
78
|
-
|
79
|
-
**credentials_params}
|
109
|
+
bucket = _helpers.get_plugin_property(self._properties, self.BUCKET_PROPERTY)
|
110
|
+
prefix = _helpers.get_plugin_property(self._properties, self.PREFIX_PROPERTY)
|
80
111
|
|
81
|
-
if
|
82
|
-
|
112
|
+
if bucket is None or len(bucket.strip()) == 0:
|
113
|
+
message = f"Missing required config property [{self.BUCKET_PROPERTY}] for S3 storage"
|
114
|
+
self._log.error(message)
|
115
|
+
raise ex.EConfigParse(message)
|
83
116
|
|
84
|
-
if
|
85
|
-
client_args["endpoint_url"] = self._endpoint
|
117
|
+
root_path = f"{bucket}/{prefix}" if prefix else bucket
|
86
118
|
|
87
|
-
|
119
|
+
return afs.SubTreeFileSystem(root_path, s3fs)
|
120
|
+
|
121
|
+
def get_file_storage(self) -> IFileStorage:
|
122
|
+
|
123
|
+
client_args = self.setup_client_args(self.BOTO_CLIENT_ARGS)
|
124
|
+
client_args["service_name"] = "s3"
|
125
|
+
|
126
|
+
config = cfg.PluginConfig()
|
127
|
+
config.protocol = "S3"
|
128
|
+
config.properties = self._properties
|
129
|
+
|
130
|
+
return S3ObjectStorage(config, client_args)
|
131
|
+
|
132
|
+
def setup_client_args(self, key_mapping: tp.Dict[str, str]) -> tp.Dict[str, tp.Any]:
|
88
133
|
|
89
|
-
|
134
|
+
client_args = dict()
|
90
135
|
|
91
|
-
|
136
|
+
region = _helpers.get_plugin_property(self._properties, self.REGION_PROPERTY)
|
137
|
+
endpoint = _helpers.get_plugin_property(self._properties, self.ENDPOINT_PROPERTY)
|
92
138
|
|
93
|
-
if
|
139
|
+
if region is not None:
|
140
|
+
region_key = key_mapping[self.REGION_PROPERTY]
|
141
|
+
client_args[region_key] = region
|
142
|
+
|
143
|
+
if endpoint is not None:
|
144
|
+
endpoint_key = key_mapping[self.ENDPOINT_PROPERTY]
|
145
|
+
client_args[endpoint_key] = endpoint
|
146
|
+
|
147
|
+
credentials = self.setup_credentials(key_mapping)
|
148
|
+
client_args.update(credentials)
|
149
|
+
|
150
|
+
return client_args
|
151
|
+
|
152
|
+
def setup_credentials(self, key_mapping: tp.Dict[str, str]):
|
153
|
+
|
154
|
+
mechanism = _helpers.get_plugin_property(self._properties, self.CREDENTIALS_PROPERTY)
|
155
|
+
|
156
|
+
if mechanism is None or len(mechanism) == 0 or mechanism.lower() == self.CREDENTIALS_DEFAULT:
|
94
157
|
self._log.info(f"Using [{self.CREDENTIALS_DEFAULT}] credentials mechanism")
|
95
|
-
return
|
158
|
+
return dict()
|
96
159
|
|
97
160
|
if mechanism.lower() == self.CREDENTIALS_STATIC:
|
98
161
|
|
@@ -103,12 +166,49 @@ class S3ObjectStorage(IFileStorage):
|
|
103
166
|
f"Using [{self.CREDENTIALS_STATIC}] credentials mechanism, " +
|
104
167
|
f"access key id = [{access_key_id}]")
|
105
168
|
|
106
|
-
|
169
|
+
access_key_id_arg = key_mapping[self.ACCESS_KEY_ID_PROPERTY]
|
170
|
+
secret_access_key_arg = key_mapping[self.SECRET_ACCESS_KEY_PROPERTY]
|
171
|
+
|
172
|
+
return {
|
173
|
+
access_key_id_arg: access_key_id,
|
174
|
+
secret_access_key_arg: secret_access_key}
|
107
175
|
|
108
176
|
message = f"Unrecognised credentials mechanism: [{mechanism}]"
|
109
177
|
self._log.error(message)
|
110
178
|
raise ex.EStartup(message)
|
111
179
|
|
180
|
+
|
181
|
+
if __aws_available:
|
182
|
+
plugins.PluginManager.register_plugin(IStorageProvider, AwsStorageProvider, ["S3"])
|
183
|
+
|
184
|
+
|
185
|
+
# ----------------------------------------------------------------------------------------------------------------------
|
186
|
+
# CUSTOM IMPLEMENTATION FOR S3 STORAGE
|
187
|
+
# ----------------------------------------------------------------------------------------------------------------------
|
188
|
+
|
189
|
+
# This is the old implementation that was used before Arrow native was made available
|
190
|
+
# It is likely to be removed in a future release
|
191
|
+
|
192
|
+
|
193
|
+
class S3ObjectStorage(IFileStorage):
|
194
|
+
|
195
|
+
# This is a quick implementation of IFileStorage on S3 using the boto3 AWS SDK
|
196
|
+
|
197
|
+
def __init__(self, config: cfg.PluginConfig, client_args: dict):
|
198
|
+
|
199
|
+
self._log = _helpers.logger_for_object(self)
|
200
|
+
|
201
|
+
self._properties = config.properties
|
202
|
+
self._bucket = _helpers.get_plugin_property(self._properties, AwsStorageProvider.BUCKET_PROPERTY)
|
203
|
+
self._prefix = _helpers.get_plugin_property(self._properties, AwsStorageProvider.PREFIX_PROPERTY) or ""
|
204
|
+
|
205
|
+
if self._bucket is None or len(self._bucket.strip()) == 0:
|
206
|
+
message = f"Missing required config property [{AwsStorageProvider.BUCKET_PROPERTY}] for S3 storage"
|
207
|
+
self._log.error(message)
|
208
|
+
raise ex.EConfigParse(message)
|
209
|
+
|
210
|
+
self._client = boto3.client(**client_args)
|
211
|
+
|
112
212
|
def exists(self, storage_path: str) -> bool:
|
113
213
|
|
114
214
|
try:
|
@@ -140,19 +240,21 @@ class S3ObjectStorage(IFileStorage):
|
|
140
240
|
|
141
241
|
self._log.info(f"STAT [{storage_path}]")
|
142
242
|
|
243
|
+
name = storage_path.split("/")[-1]
|
244
|
+
|
143
245
|
if self.exists(storage_path):
|
144
246
|
|
145
247
|
# Only OBJECTS can support stat atm
|
146
248
|
# Handling for directories needs to be changed, as part of refactor onto object storage
|
147
249
|
size = self.size(storage_path)
|
148
|
-
return FileStat(FileType.FILE, size)
|
250
|
+
return FileStat(name, FileType.FILE, storage_path, size)
|
149
251
|
|
150
252
|
else:
|
151
253
|
|
152
254
|
self.ls(storage_path)
|
153
|
-
return FileStat(FileType.DIRECTORY, 0)
|
255
|
+
return FileStat(name, FileType.DIRECTORY, storage_path, 0)
|
154
256
|
|
155
|
-
def ls(self, storage_path: str) -> tp.List[
|
257
|
+
def ls(self, storage_path: str, recursive: bool = False) -> tp.List[FileStat]:
|
156
258
|
|
157
259
|
self._log.info(f"LS [{storage_path}]")
|
158
260
|
|
@@ -174,36 +276,41 @@ class S3ObjectStorage(IFileStorage):
|
|
174
276
|
if raw_key == prefix:
|
175
277
|
continue
|
176
278
|
key = raw_key.replace(prefix, "")
|
177
|
-
|
279
|
+
size = entry["Size"]
|
280
|
+
mtime = entry["LastModified "]
|
281
|
+
stat = FileStat(key, FileType.FILE, raw_key, size, mtime=mtime)
|
282
|
+
keys.append(stat)
|
178
283
|
|
179
284
|
if "CommonPrefixes" in response:
|
180
285
|
for raw_prefix in response["CommonPrefixes"]:
|
181
286
|
common_prefix = raw_prefix.replace(prefix, "")
|
182
|
-
|
287
|
+
stat = FileStat(common_prefix, FileType.DIRECTORY, raw_prefix, 0)
|
288
|
+
keys.append(stat)
|
183
289
|
|
184
290
|
return keys
|
185
291
|
|
186
|
-
def mkdir(self, storage_path: str, recursive: bool = False
|
292
|
+
def mkdir(self, storage_path: str, recursive: bool = False):
|
187
293
|
|
188
294
|
self._log.info(f"MKDIR [{storage_path}]")
|
189
295
|
|
190
296
|
# No-op in object storage
|
191
297
|
pass
|
192
298
|
|
193
|
-
def rm(self, storage_path: str
|
299
|
+
def rm(self, storage_path: str):
|
194
300
|
|
195
301
|
try:
|
196
302
|
self._log.info(f"RM [{storage_path}]")
|
197
303
|
|
198
|
-
if recursive:
|
199
|
-
raise RuntimeError("RM (recursive) not available for S3 storage")
|
200
|
-
|
201
304
|
object_key = self._resolve_path(storage_path)
|
202
305
|
self._client.delete_object(Bucket=self._bucket, Key=object_key)
|
203
306
|
|
204
307
|
except aws_ex.ClientError as error:
|
205
308
|
raise ex.EStorageRequest(f"Storage error: {str(error)}") from error
|
206
309
|
|
310
|
+
def rmdir(self, storage_path: str):
|
311
|
+
|
312
|
+
raise RuntimeError("RMDIR (recursive) not available for S3 storage")
|
313
|
+
|
207
314
|
def read_bytes(self, storage_path: str) -> bytes:
|
208
315
|
|
209
316
|
self._log.info(f"READ BYTES [{storage_path}]")
|
@@ -218,7 +325,7 @@ class S3ObjectStorage(IFileStorage):
|
|
218
325
|
data = self.read_bytes(storage_path)
|
219
326
|
return io.BytesIO(data)
|
220
327
|
|
221
|
-
def _read_impl(self, storage_path: str)
|
328
|
+
def _read_impl(self, storage_path: str):
|
222
329
|
|
223
330
|
try:
|
224
331
|
|
@@ -229,7 +336,7 @@ class S3ObjectStorage(IFileStorage):
|
|
229
336
|
except aws_ex.ClientError as error:
|
230
337
|
raise ex.EStorageRequest(f"Storage error: {str(error)}") from error
|
231
338
|
|
232
|
-
def write_bytes(self, storage_path: str, data: bytes
|
339
|
+
def write_bytes(self, storage_path: str, data: bytes):
|
233
340
|
|
234
341
|
try:
|
235
342
|
self._log.info(f"WRITE BYTES [{storage_path}]")
|
@@ -244,43 +351,27 @@ class S3ObjectStorage(IFileStorage):
|
|
244
351
|
except aws_ex.ClientError as error:
|
245
352
|
raise ex.EStorageRequest(f"Storage error: {str(error)}") from error
|
246
353
|
|
247
|
-
def write_byte_stream(self, storage_path: str
|
354
|
+
def write_byte_stream(self, storage_path: str) -> tp.BinaryIO:
|
248
355
|
|
249
356
|
self._log.info(f"WRITE BYTE STREAM [{storage_path}]")
|
250
357
|
|
251
|
-
return self._AwsWriteBuf(self, storage_path
|
358
|
+
return self._AwsWriteBuf(self, storage_path)
|
252
359
|
|
253
360
|
class _AwsWriteBuf(io.BytesIO):
|
254
361
|
|
255
|
-
def __init__(self, storage, storage_path
|
362
|
+
def __init__(self, storage, storage_path):
|
256
363
|
super().__init__()
|
257
364
|
self._storage = storage
|
258
365
|
self._storage_path = storage_path
|
259
|
-
self._overwrite = overwrite
|
260
366
|
self._written = False
|
261
367
|
|
262
368
|
def close(self):
|
263
369
|
if not self._written:
|
264
370
|
self.seek(0)
|
265
371
|
data = self.read()
|
266
|
-
self._storage.write_bytes(self._storage_path, data
|
372
|
+
self._storage.write_bytes(self._storage_path, data)
|
267
373
|
self._written = True
|
268
374
|
|
269
|
-
# TODO: These methods can be removed from the interface, they are not needed
|
270
|
-
# (storage layer only needs to work in binary mode)
|
271
|
-
|
272
|
-
def read_text(self, storage_path: str, encoding: str = 'utf-8') -> str:
|
273
|
-
raise RuntimeError("READ (text mode) not available for S3 storage")
|
274
|
-
|
275
|
-
def read_text_stream(self, storage_path: str, encoding: str = 'utf-8') -> tp.TextIO:
|
276
|
-
raise RuntimeError("READ (text mode) not available for S3 storage")
|
277
|
-
|
278
|
-
def write_text(self, storage_path: str, data: str, encoding: str = 'utf-8', overwrite: bool = False):
|
279
|
-
raise RuntimeError("WRITE (text mode) not available for S3 storage")
|
280
|
-
|
281
|
-
def write_text_stream(self, storage_path: str, encoding: str = 'utf-8', overwrite: bool = False) -> tp.TextIO:
|
282
|
-
raise RuntimeError("WRITE (text mode) not available for S3 storage")
|
283
|
-
|
284
375
|
def _resolve_path(self, storage_path: str) -> str:
|
285
376
|
|
286
377
|
if self._prefix is None or self._prefix.strip() == "":
|
@@ -290,8 +381,3 @@ class S3ObjectStorage(IFileStorage):
|
|
290
381
|
full_path = self._prefix + separator + storage_path
|
291
382
|
|
292
383
|
return full_path[1:] if full_path.startswith("/") else full_path
|
293
|
-
|
294
|
-
|
295
|
-
# Register the S3 storage plugin
|
296
|
-
|
297
|
-
_storage.StorageManager.register_storage_type("S3", S3ObjectStorage, _storage.CommonDataStorage)
|
@@ -0,0 +1,155 @@
|
|
1
|
+
# Copyright 2023 Accenture Global Solutions Limited
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
import logging
|
16
|
+
import typing as tp
|
17
|
+
|
18
|
+
# TRAC interfaces
|
19
|
+
import tracdap.rt.exceptions as ex
|
20
|
+
import tracdap.rt.ext.plugins as plugins
|
21
|
+
from tracdap.rt.ext.storage import *
|
22
|
+
|
23
|
+
import pyarrow.fs as afs
|
24
|
+
|
25
|
+
try:
|
26
|
+
# These dependencies are provided by the optional [azure] feature
|
27
|
+
# For local development, pip install -r requirements_plugins.txt
|
28
|
+
import azure.storage.blob as az_blob # noqa
|
29
|
+
import adlfs # noqa
|
30
|
+
__azure_available = True
|
31
|
+
except ImportError:
|
32
|
+
adlfs = None
|
33
|
+
__azure_available = False
|
34
|
+
|
35
|
+
# Set of common helpers across the core plugins (do not reference rt._impl)
|
36
|
+
from . import _helpers
|
37
|
+
|
38
|
+
|
39
|
+
class AzureBlobStorageProvider(IStorageProvider):
|
40
|
+
|
41
|
+
# This client depends on the Azure fsspec implementation, since there is no native implementation from Arrow
|
42
|
+
# To enable it, the tracdap package must be installed with the optional [azure] feature
|
43
|
+
|
44
|
+
# Current supported authentication mechanisms are "default" and "access_key"
|
45
|
+
# Client always uses location mode = primary, version aware = False
|
46
|
+
|
47
|
+
STORAGE_ACCOUNT_PROPERTY = "storageAccount"
|
48
|
+
CONTAINER_PROPERTY = "container"
|
49
|
+
PREFIX_PROPERTY = "prefix"
|
50
|
+
|
51
|
+
CREDENTIALS_PROPERTY = "credentials"
|
52
|
+
CREDENTIALS_DEFAULT = "default"
|
53
|
+
CREDENTIALS_ACCESS_KEY = "access_key"
|
54
|
+
|
55
|
+
ACCESS_KEY_PROPERTY = "accessKey"
|
56
|
+
|
57
|
+
RUNTIME_FS_PROPERTY = "runtimeFs"
|
58
|
+
RUNTIME_FS_AUTO = "auto"
|
59
|
+
RUNTIME_FS_FSSPEC = "fsspec"
|
60
|
+
RUNTIME_FS_DEFAULT = RUNTIME_FS_AUTO
|
61
|
+
|
62
|
+
def __init__(self, properties: tp.Dict[str, str]):
|
63
|
+
|
64
|
+
self._log = _helpers.logger_for_object(self)
|
65
|
+
self._properties = properties
|
66
|
+
|
67
|
+
self._runtime_fs = _helpers.get_plugin_property(
|
68
|
+
properties, self.RUNTIME_FS_PROPERTY) \
|
69
|
+
or self.RUNTIME_FS_DEFAULT
|
70
|
+
|
71
|
+
# The Azure SDK is very verbose with logging
|
72
|
+
# Avoid log noise by raising the log level for the Azure namespace
|
73
|
+
azure_log = _helpers.logger_for_namespace("azure.core")
|
74
|
+
azure_log.level = logging.WARNING
|
75
|
+
|
76
|
+
def has_arrow_native(self) -> bool:
|
77
|
+
return True
|
78
|
+
|
79
|
+
def get_arrow_native(self) -> afs.SubTreeFileSystem:
|
80
|
+
|
81
|
+
if self._runtime_fs == self.RUNTIME_FS_AUTO or self._runtime_fs == self.RUNTIME_FS_FSSPEC:
|
82
|
+
azure_fs = self.create_fsspec()
|
83
|
+
else:
|
84
|
+
message = f"Requested runtime FS [{self._runtime_fs}] is not available for Azure storage"
|
85
|
+
self._log.error(message)
|
86
|
+
raise ex.EStartup(message)
|
87
|
+
|
88
|
+
container = _helpers.get_plugin_property(self._properties, self.CONTAINER_PROPERTY)
|
89
|
+
prefix = _helpers.get_plugin_property(self._properties, self.PREFIX_PROPERTY)
|
90
|
+
|
91
|
+
if container is None or container.strip() == "":
|
92
|
+
message = f"Missing required config property [{self.CONTAINER_PROPERTY}] for Azure blob storage"
|
93
|
+
self._log.error(message)
|
94
|
+
raise ex.EConfigParse(message)
|
95
|
+
|
96
|
+
root_path = f"{container}/{prefix}" if prefix else container
|
97
|
+
|
98
|
+
return afs.SubTreeFileSystem(root_path, azure_fs)
|
99
|
+
|
100
|
+
def create_fsspec(self) -> afs.FileSystem:
|
101
|
+
|
102
|
+
azure_fsspec_args = self.setup_client_args()
|
103
|
+
azure_fsspec = adlfs.AzureBlobFileSystem(**azure_fsspec_args)
|
104
|
+
|
105
|
+
return afs.PyFileSystem(afs.FSSpecHandler(azure_fsspec))
|
106
|
+
|
107
|
+
def setup_client_args(self) -> tp.Dict[str, tp.Any]:
|
108
|
+
|
109
|
+
client_args = dict()
|
110
|
+
|
111
|
+
storage_account = _helpers.get_plugin_property(self._properties, self.STORAGE_ACCOUNT_PROPERTY)
|
112
|
+
|
113
|
+
if storage_account is None or len(storage_account.strip()) == 0:
|
114
|
+
message = f"Missing required config property [{self.STORAGE_ACCOUNT_PROPERTY}] for Azure blob storage"
|
115
|
+
self._log.error(message)
|
116
|
+
raise ex.EConfigParse(message)
|
117
|
+
|
118
|
+
client_args["account_name"] = storage_account
|
119
|
+
|
120
|
+
credentials = self.setup_credentials()
|
121
|
+
client_args.update(credentials)
|
122
|
+
|
123
|
+
return client_args
|
124
|
+
|
125
|
+
def setup_credentials(self):
|
126
|
+
|
127
|
+
# Only default (Google ADC) mechanism is supported
|
128
|
+
# Arrow GCP FS does also support access tokens, but ADC is probably all we ever need
|
129
|
+
|
130
|
+
mechanism = _helpers.get_plugin_property(self._properties, self.CREDENTIALS_PROPERTY)
|
131
|
+
|
132
|
+
if mechanism is None or len(mechanism) == 0 or mechanism.lower() == self.CREDENTIALS_DEFAULT:
|
133
|
+
self._log.info(f"Using [{self.CREDENTIALS_DEFAULT}] credentials mechanism")
|
134
|
+
return {"anon": False}
|
135
|
+
|
136
|
+
if mechanism == self.CREDENTIALS_ACCESS_KEY:
|
137
|
+
|
138
|
+
self._log.info(f"Using [{self.CREDENTIALS_ACCESS_KEY}] credentials mechanism")
|
139
|
+
|
140
|
+
access_key = _helpers.get_plugin_property(self._properties, self.ACCESS_KEY_PROPERTY)
|
141
|
+
|
142
|
+
if access_key is None or len(access_key.strip()) == 0:
|
143
|
+
message = f"Missing required config property [{self.ACCESS_KEY_PROPERTY}] for Azure blob storage"
|
144
|
+
raise ex.EConfigParse(message)
|
145
|
+
|
146
|
+
return {"account_key": access_key}
|
147
|
+
|
148
|
+
message = f"Unrecognised credentials mechanism: [{mechanism}]"
|
149
|
+
self._log.error(message)
|
150
|
+
raise ex.EStartup(message)
|
151
|
+
|
152
|
+
|
153
|
+
# Only register the plugin if the [azure] feature is available
|
154
|
+
if __azure_available:
|
155
|
+
plugins.PluginManager.register_plugin(IStorageProvider, AzureBlobStorageProvider, ["BLOB"])
|