tracdap-runtime 0.6.0rc1__py3-none-any.whl → 0.6.0rc3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tracdap/rt/_impl/data.py +53 -16
- tracdap/rt/_impl/storage.py +92 -27
- tracdap/rt/_plugins/storage_aws.py +158 -142
- tracdap/rt/_plugins/storage_azure.py +155 -0
- tracdap/rt/_plugins/storage_gcp.py +72 -15
- tracdap/rt/_plugins/storage_local.py +11 -6
- tracdap/rt/_version.py +1 -1
- tracdap/rt/config/__init__.py +12 -17
- tracdap/rt/config/common.py +10 -0
- tracdap/rt/config/common_pb2.py +38 -31
- tracdap/rt/config/job_pb2.py +21 -20
- tracdap/rt/config/platform.py +60 -25
- tracdap/rt/config/platform_pb2.py +52 -45
- tracdap/rt/config/result_pb2.py +15 -14
- tracdap/rt/config/runtime.py +0 -1
- tracdap/rt/config/runtime_pb2.py +24 -24
- tracdap/rt/ext/storage.py +2 -2
- tracdap/rt/metadata/__init__.py +20 -20
- tracdap/rt/metadata/common_pb2.py +15 -14
- tracdap/rt/metadata/custom_pb2.py +9 -8
- tracdap/rt/metadata/data_pb2.py +31 -30
- tracdap/rt/metadata/file_pb2.py +9 -8
- tracdap/rt/metadata/flow_pb2.py +33 -32
- tracdap/rt/metadata/job_pb2.py +55 -54
- tracdap/rt/metadata/model_pb2.py +31 -30
- tracdap/rt/metadata/object_id_pb2.py +13 -12
- tracdap/rt/metadata/object_pb2.py +9 -8
- tracdap/rt/metadata/search_pb2.py +19 -18
- tracdap/rt/metadata/stoarge_pb2.py +31 -30
- tracdap/rt/metadata/tag_pb2.py +13 -12
- tracdap/rt/metadata/tag_update_pb2.py +11 -10
- tracdap/rt/metadata/type_pb2.py +29 -28
- {tracdap_runtime-0.6.0rc1.dist-info → tracdap_runtime-0.6.0rc3.dist-info}/METADATA +27 -15
- {tracdap_runtime-0.6.0rc1.dist-info → tracdap_runtime-0.6.0rc3.dist-info}/RECORD +37 -38
- {tracdap_runtime-0.6.0rc1.dist-info → tracdap_runtime-0.6.0rc3.dist-info}/WHEEL +1 -1
- tracdap/rt/config/gateway.py +0 -104
- tracdap/rt/config/gateway_pb2.py +0 -45
- {tracdap_runtime-0.6.0rc1.dist-info → tracdap_runtime-0.6.0rc3.dist-info}/LICENSE +0 -0
- {tracdap_runtime-0.6.0rc1.dist-info → tracdap_runtime-0.6.0rc3.dist-info}/top_level.txt +0 -0
@@ -31,19 +31,19 @@ from . import _helpers
|
|
31
31
|
|
32
32
|
try:
|
33
33
|
# AWS SDK
|
34
|
-
import boto3
|
35
|
-
import botocore.response
|
36
|
-
import botocore.exceptions as aws_ex
|
37
|
-
|
34
|
+
import boto3
|
35
|
+
import botocore.response
|
36
|
+
import botocore.exceptions as aws_ex
|
37
|
+
__aws_available = True
|
38
38
|
except ImportError:
|
39
|
-
|
39
|
+
boto3 = None
|
40
|
+
botocore = None
|
41
|
+
aws_ex = None
|
42
|
+
__aws_available = False
|
40
43
|
|
41
44
|
|
42
45
|
class AwsStorageProvider(IStorageProvider):
|
43
46
|
|
44
|
-
ARROW_NATIVE_FS_PROPERTY = "arrowNativeFs"
|
45
|
-
ARROW_NATIVE_FS_DEFAULT = False
|
46
|
-
|
47
47
|
BUCKET_PROPERTY = "bucket"
|
48
48
|
PREFIX_PROPERTY = "prefix"
|
49
49
|
REGION_PROPERTY = "region"
|
@@ -56,6 +56,12 @@ class AwsStorageProvider(IStorageProvider):
|
|
56
56
|
ACCESS_KEY_ID_PROPERTY = "accessKeyId"
|
57
57
|
SECRET_ACCESS_KEY_PROPERTY = "secretAccessKey"
|
58
58
|
|
59
|
+
RUNTIME_FS_PROPERTY = "runtimeFs"
|
60
|
+
RUNTIME_FS_AUTO = "auto"
|
61
|
+
RUNTIME_FS_ARROW = "arrow"
|
62
|
+
RUNTIME_FS_BOTO3 = "boto3"
|
63
|
+
RUNTIME_FS_DEFAULT = RUNTIME_FS_AUTO
|
64
|
+
|
59
65
|
ARROW_CLIENT_ARGS = {
|
60
66
|
REGION_PROPERTY: "region",
|
61
67
|
ENDPOINT_PROPERTY: "endpoint_override",
|
@@ -75,14 +81,25 @@ class AwsStorageProvider(IStorageProvider):
|
|
75
81
|
self._log = _helpers.logger_for_object(self)
|
76
82
|
self._properties = properties
|
77
83
|
|
78
|
-
self.
|
79
|
-
properties, self.
|
84
|
+
self._runtime_fs = _helpers.get_plugin_property(
|
85
|
+
properties, self.RUNTIME_FS_PROPERTY) \
|
86
|
+
or self.RUNTIME_FS_DEFAULT
|
80
87
|
|
81
88
|
def has_arrow_native(self) -> bool:
|
82
|
-
|
89
|
+
if self._runtime_fs == self.RUNTIME_FS_ARROW:
|
90
|
+
return True
|
91
|
+
elif self._runtime_fs == self.RUNTIME_FS_AUTO:
|
92
|
+
return afs.S3FileSystem is not None
|
93
|
+
else:
|
94
|
+
return False
|
83
95
|
|
84
96
|
def has_file_storage(self) -> bool:
|
85
|
-
|
97
|
+
if self._runtime_fs == self.RUNTIME_FS_BOTO3:
|
98
|
+
return True
|
99
|
+
elif self._runtime_fs == self.RUNTIME_FS_AUTO:
|
100
|
+
return afs.S3FileSystem is None
|
101
|
+
else:
|
102
|
+
return False
|
86
103
|
|
87
104
|
def get_arrow_native(self) -> afs.SubTreeFileSystem:
|
88
105
|
|
@@ -161,7 +178,8 @@ class AwsStorageProvider(IStorageProvider):
|
|
161
178
|
raise ex.EStartup(message)
|
162
179
|
|
163
180
|
|
164
|
-
|
181
|
+
if __aws_available:
|
182
|
+
plugins.PluginManager.register_plugin(IStorageProvider, AwsStorageProvider, ["S3"])
|
165
183
|
|
166
184
|
|
167
185
|
# ----------------------------------------------------------------------------------------------------------------------
|
@@ -172,196 +190,194 @@ plugins.PluginManager.register_plugin(IStorageProvider, AwsStorageProvider, ["S3
|
|
172
190
|
# It is likely to be removed in a future release
|
173
191
|
|
174
192
|
|
175
|
-
|
193
|
+
class S3ObjectStorage(IFileStorage):
|
176
194
|
|
177
|
-
|
195
|
+
# This is a quick implementation of IFileStorage on S3 using the boto3 AWS SDK
|
178
196
|
|
179
|
-
|
197
|
+
def __init__(self, config: cfg.PluginConfig, client_args: dict):
|
180
198
|
|
181
|
-
|
182
|
-
|
183
|
-
self._log = _helpers.logger_for_object(self)
|
199
|
+
self._log = _helpers.logger_for_object(self)
|
184
200
|
|
185
|
-
|
186
|
-
|
187
|
-
|
201
|
+
self._properties = config.properties
|
202
|
+
self._bucket = _helpers.get_plugin_property(self._properties, AwsStorageProvider.BUCKET_PROPERTY)
|
203
|
+
self._prefix = _helpers.get_plugin_property(self._properties, AwsStorageProvider.PREFIX_PROPERTY) or ""
|
188
204
|
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
205
|
+
if self._bucket is None or len(self._bucket.strip()) == 0:
|
206
|
+
message = f"Missing required config property [{AwsStorageProvider.BUCKET_PROPERTY}] for S3 storage"
|
207
|
+
self._log.error(message)
|
208
|
+
raise ex.EConfigParse(message)
|
193
209
|
|
194
|
-
|
210
|
+
self._client = boto3.client(**client_args)
|
195
211
|
|
196
|
-
|
212
|
+
def exists(self, storage_path: str) -> bool:
|
197
213
|
|
198
|
-
|
199
|
-
|
214
|
+
try:
|
215
|
+
self._log.info(f"EXISTS [{storage_path}]")
|
200
216
|
|
201
|
-
|
202
|
-
|
203
|
-
|
217
|
+
object_key = self._resolve_path(storage_path)
|
218
|
+
self._client.head_object(Bucket=self._bucket, Key=object_key)
|
219
|
+
return True
|
204
220
|
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
221
|
+
except aws_ex.ClientError as error:
|
222
|
+
aws_code = error.response['Error']['Code']
|
223
|
+
if aws_code == str(http.HTTPStatus.NOT_FOUND.value): # noqa
|
224
|
+
return False
|
225
|
+
raise ex.EStorageRequest(f"Storage error: {str(error)}") from error
|
210
226
|
|
211
|
-
|
227
|
+
def size(self, storage_path: str) -> int:
|
212
228
|
|
213
|
-
|
214
|
-
|
229
|
+
try:
|
230
|
+
self._log.info(f"SIZE [{storage_path}]")
|
215
231
|
|
216
|
-
|
217
|
-
|
218
|
-
|
232
|
+
object_key = self._resolve_path(storage_path)
|
233
|
+
response = self._client.head_object(Bucket=self._bucket, Key=object_key)
|
234
|
+
return response['ContentLength']
|
219
235
|
|
220
|
-
|
221
|
-
|
236
|
+
except aws_ex.ClientError as error:
|
237
|
+
raise ex.EStorageRequest(f"Storage error: {str(error)}") from error
|
222
238
|
|
223
|
-
|
239
|
+
def stat(self, storage_path: str) -> FileStat:
|
224
240
|
|
225
|
-
|
241
|
+
self._log.info(f"STAT [{storage_path}]")
|
226
242
|
|
227
|
-
|
243
|
+
name = storage_path.split("/")[-1]
|
228
244
|
|
229
|
-
|
245
|
+
if self.exists(storage_path):
|
230
246
|
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
247
|
+
# Only OBJECTS can support stat atm
|
248
|
+
# Handling for directories needs to be changed, as part of refactor onto object storage
|
249
|
+
size = self.size(storage_path)
|
250
|
+
return FileStat(name, FileType.FILE, storage_path, size)
|
235
251
|
|
236
|
-
|
252
|
+
else:
|
237
253
|
|
238
|
-
|
239
|
-
|
254
|
+
self.ls(storage_path)
|
255
|
+
return FileStat(name, FileType.DIRECTORY, storage_path, 0)
|
240
256
|
|
241
|
-
|
257
|
+
def ls(self, storage_path: str, recursive: bool = False) -> tp.List[FileStat]:
|
242
258
|
|
243
|
-
|
259
|
+
self._log.info(f"LS [{storage_path}]")
|
244
260
|
|
245
|
-
|
261
|
+
prefix = self._resolve_path(storage_path) + "/"
|
246
262
|
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
263
|
+
response = self._client.list_objects_v2(
|
264
|
+
Bucket=self._bucket,
|
265
|
+
Prefix=prefix,
|
266
|
+
Delimiter="/")
|
251
267
|
|
252
|
-
|
268
|
+
keys = []
|
253
269
|
|
254
|
-
|
255
|
-
|
270
|
+
if "Contents" not in response and "CommonPrefixes" not in response:
|
271
|
+
raise ex.EStorageRequest(f"Storage prefix not found: [{storage_path}]")
|
256
272
|
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
273
|
+
if "Contents" in response:
|
274
|
+
for entry in response["Contents"]:
|
275
|
+
raw_key = entry["Key"]
|
276
|
+
if raw_key == prefix:
|
277
|
+
continue
|
278
|
+
key = raw_key.replace(prefix, "")
|
279
|
+
size = entry["Size"]
|
280
|
+
mtime = entry["LastModified "]
|
281
|
+
stat = FileStat(key, FileType.FILE, raw_key, size, mtime=mtime)
|
282
|
+
keys.append(stat)
|
267
283
|
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
284
|
+
if "CommonPrefixes" in response:
|
285
|
+
for raw_prefix in response["CommonPrefixes"]:
|
286
|
+
common_prefix = raw_prefix.replace(prefix, "")
|
287
|
+
stat = FileStat(common_prefix, FileType.DIRECTORY, raw_prefix, 0)
|
288
|
+
keys.append(stat)
|
273
289
|
|
274
|
-
|
290
|
+
return keys
|
275
291
|
|
276
|
-
|
292
|
+
def mkdir(self, storage_path: str, recursive: bool = False):
|
277
293
|
|
278
|
-
|
294
|
+
self._log.info(f"MKDIR [{storage_path}]")
|
279
295
|
|
280
|
-
|
281
|
-
|
296
|
+
# No-op in object storage
|
297
|
+
pass
|
282
298
|
|
283
|
-
|
299
|
+
def rm(self, storage_path: str):
|
284
300
|
|
285
|
-
|
286
|
-
|
301
|
+
try:
|
302
|
+
self._log.info(f"RM [{storage_path}]")
|
287
303
|
|
288
|
-
|
289
|
-
|
304
|
+
object_key = self._resolve_path(storage_path)
|
305
|
+
self._client.delete_object(Bucket=self._bucket, Key=object_key)
|
290
306
|
|
291
|
-
|
292
|
-
|
307
|
+
except aws_ex.ClientError as error:
|
308
|
+
raise ex.EStorageRequest(f"Storage error: {str(error)}") from error
|
293
309
|
|
294
|
-
|
310
|
+
def rmdir(self, storage_path: str):
|
295
311
|
|
296
|
-
|
312
|
+
raise RuntimeError("RMDIR (recursive) not available for S3 storage")
|
297
313
|
|
298
|
-
|
314
|
+
def read_bytes(self, storage_path: str) -> bytes:
|
299
315
|
|
300
|
-
|
316
|
+
self._log.info(f"READ BYTES [{storage_path}]")
|
301
317
|
|
302
|
-
|
303
|
-
|
318
|
+
body = self._read_impl(storage_path)
|
319
|
+
return body.read()
|
304
320
|
|
305
|
-
|
321
|
+
def read_byte_stream(self, storage_path: str) -> tp.BinaryIO:
|
306
322
|
|
307
|
-
|
323
|
+
self._log.info(f"READ BYTE STREAM [{storage_path}]")
|
308
324
|
|
309
|
-
|
310
|
-
|
325
|
+
data = self.read_bytes(storage_path)
|
326
|
+
return io.BytesIO(data)
|
311
327
|
|
312
|
-
|
328
|
+
def _read_impl(self, storage_path: str):
|
313
329
|
|
314
|
-
|
330
|
+
try:
|
315
331
|
|
316
|
-
|
317
|
-
|
318
|
-
|
332
|
+
object_key = self._resolve_path(storage_path)
|
333
|
+
response = self._client.get_object(Bucket=self._bucket, Key=object_key)
|
334
|
+
return response['Body']
|
319
335
|
|
320
|
-
|
321
|
-
|
336
|
+
except aws_ex.ClientError as error:
|
337
|
+
raise ex.EStorageRequest(f"Storage error: {str(error)}") from error
|
322
338
|
|
323
|
-
|
339
|
+
def write_bytes(self, storage_path: str, data: bytes):
|
324
340
|
|
325
|
-
|
326
|
-
|
341
|
+
try:
|
342
|
+
self._log.info(f"WRITE BYTES [{storage_path}]")
|
327
343
|
|
328
|
-
|
344
|
+
object_key = self._resolve_path(storage_path)
|
329
345
|
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
346
|
+
self._client.put_object(
|
347
|
+
Bucket=self._bucket,
|
348
|
+
Key=object_key,
|
349
|
+
Body=data)
|
334
350
|
|
335
|
-
|
336
|
-
|
351
|
+
except aws_ex.ClientError as error:
|
352
|
+
raise ex.EStorageRequest(f"Storage error: {str(error)}") from error
|
337
353
|
|
338
|
-
|
354
|
+
def write_byte_stream(self, storage_path: str) -> tp.BinaryIO:
|
339
355
|
|
340
|
-
|
356
|
+
self._log.info(f"WRITE BYTE STREAM [{storage_path}]")
|
341
357
|
|
342
|
-
|
358
|
+
return self._AwsWriteBuf(self, storage_path)
|
343
359
|
|
344
|
-
|
360
|
+
class _AwsWriteBuf(io.BytesIO):
|
345
361
|
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
362
|
+
def __init__(self, storage, storage_path):
|
363
|
+
super().__init__()
|
364
|
+
self._storage = storage
|
365
|
+
self._storage_path = storage_path
|
366
|
+
self._written = False
|
351
367
|
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
368
|
+
def close(self):
|
369
|
+
if not self._written:
|
370
|
+
self.seek(0)
|
371
|
+
data = self.read()
|
372
|
+
self._storage.write_bytes(self._storage_path, data)
|
373
|
+
self._written = True
|
358
374
|
|
359
|
-
|
375
|
+
def _resolve_path(self, storage_path: str) -> str:
|
360
376
|
|
361
|
-
|
362
|
-
|
377
|
+
if self._prefix is None or self._prefix.strip() == "":
|
378
|
+
return storage_path
|
363
379
|
|
364
|
-
|
365
|
-
|
380
|
+
separator = "" if self._prefix.endswith("/") else "/"
|
381
|
+
full_path = self._prefix + separator + storage_path
|
366
382
|
|
367
|
-
|
383
|
+
return full_path[1:] if full_path.startswith("/") else full_path
|
@@ -0,0 +1,155 @@
|
|
1
|
+
# Copyright 2023 Accenture Global Solutions Limited
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
import logging
|
16
|
+
import typing as tp
|
17
|
+
|
18
|
+
# TRAC interfaces
|
19
|
+
import tracdap.rt.exceptions as ex
|
20
|
+
import tracdap.rt.ext.plugins as plugins
|
21
|
+
from tracdap.rt.ext.storage import *
|
22
|
+
|
23
|
+
import pyarrow.fs as afs
|
24
|
+
|
25
|
+
try:
|
26
|
+
# These dependencies are provided by the optional [azure] feature
|
27
|
+
# For local development, pip install -r requirements_plugins.txt
|
28
|
+
import azure.storage.blob as az_blob # noqa
|
29
|
+
import adlfs # noqa
|
30
|
+
__azure_available = True
|
31
|
+
except ImportError:
|
32
|
+
adlfs = None
|
33
|
+
__azure_available = False
|
34
|
+
|
35
|
+
# Set of common helpers across the core plugins (do not reference rt._impl)
|
36
|
+
from . import _helpers
|
37
|
+
|
38
|
+
|
39
|
+
class AzureBlobStorageProvider(IStorageProvider):
|
40
|
+
|
41
|
+
# This client depends on the Azure fsspec implementation, since there is no native implementation from Arrow
|
42
|
+
# To enable it, the tracdap package must be installed with the optional [azure] feature
|
43
|
+
|
44
|
+
# Current supported authentication mechanisms are "default" and "access_key"
|
45
|
+
# Client always uses location mode = primary, version aware = False
|
46
|
+
|
47
|
+
STORAGE_ACCOUNT_PROPERTY = "storageAccount"
|
48
|
+
CONTAINER_PROPERTY = "container"
|
49
|
+
PREFIX_PROPERTY = "prefix"
|
50
|
+
|
51
|
+
CREDENTIALS_PROPERTY = "credentials"
|
52
|
+
CREDENTIALS_DEFAULT = "default"
|
53
|
+
CREDENTIALS_ACCESS_KEY = "access_key"
|
54
|
+
|
55
|
+
ACCESS_KEY_PROPERTY = "accessKey"
|
56
|
+
|
57
|
+
RUNTIME_FS_PROPERTY = "runtimeFs"
|
58
|
+
RUNTIME_FS_AUTO = "auto"
|
59
|
+
RUNTIME_FS_FSSPEC = "fsspec"
|
60
|
+
RUNTIME_FS_DEFAULT = RUNTIME_FS_AUTO
|
61
|
+
|
62
|
+
def __init__(self, properties: tp.Dict[str, str]):
|
63
|
+
|
64
|
+
self._log = _helpers.logger_for_object(self)
|
65
|
+
self._properties = properties
|
66
|
+
|
67
|
+
self._runtime_fs = _helpers.get_plugin_property(
|
68
|
+
properties, self.RUNTIME_FS_PROPERTY) \
|
69
|
+
or self.RUNTIME_FS_DEFAULT
|
70
|
+
|
71
|
+
# The Azure SDK is very verbose with logging
|
72
|
+
# Avoid log noise by raising the log level for the Azure namespace
|
73
|
+
azure_log = _helpers.logger_for_namespace("azure.core")
|
74
|
+
azure_log.level = logging.WARNING
|
75
|
+
|
76
|
+
def has_arrow_native(self) -> bool:
|
77
|
+
return True
|
78
|
+
|
79
|
+
def get_arrow_native(self) -> afs.SubTreeFileSystem:
|
80
|
+
|
81
|
+
if self._runtime_fs == self.RUNTIME_FS_AUTO or self._runtime_fs == self.RUNTIME_FS_FSSPEC:
|
82
|
+
azure_fs = self.create_fsspec()
|
83
|
+
else:
|
84
|
+
message = f"Requested runtime FS [{self._runtime_fs}] is not available for Azure storage"
|
85
|
+
self._log.error(message)
|
86
|
+
raise ex.EStartup(message)
|
87
|
+
|
88
|
+
container = _helpers.get_plugin_property(self._properties, self.CONTAINER_PROPERTY)
|
89
|
+
prefix = _helpers.get_plugin_property(self._properties, self.PREFIX_PROPERTY)
|
90
|
+
|
91
|
+
if container is None or container.strip() == "":
|
92
|
+
message = f"Missing required config property [{self.CONTAINER_PROPERTY}] for Azure blob storage"
|
93
|
+
self._log.error(message)
|
94
|
+
raise ex.EConfigParse(message)
|
95
|
+
|
96
|
+
root_path = f"{container}/{prefix}" if prefix else container
|
97
|
+
|
98
|
+
return afs.SubTreeFileSystem(root_path, azure_fs)
|
99
|
+
|
100
|
+
def create_fsspec(self) -> afs.FileSystem:
|
101
|
+
|
102
|
+
azure_fsspec_args = self.setup_client_args()
|
103
|
+
azure_fsspec = adlfs.AzureBlobFileSystem(**azure_fsspec_args)
|
104
|
+
|
105
|
+
return afs.PyFileSystem(afs.FSSpecHandler(azure_fsspec))
|
106
|
+
|
107
|
+
def setup_client_args(self) -> tp.Dict[str, tp.Any]:
|
108
|
+
|
109
|
+
client_args = dict()
|
110
|
+
|
111
|
+
storage_account = _helpers.get_plugin_property(self._properties, self.STORAGE_ACCOUNT_PROPERTY)
|
112
|
+
|
113
|
+
if storage_account is None or len(storage_account.strip()) == 0:
|
114
|
+
message = f"Missing required config property [{self.STORAGE_ACCOUNT_PROPERTY}] for Azure blob storage"
|
115
|
+
self._log.error(message)
|
116
|
+
raise ex.EConfigParse(message)
|
117
|
+
|
118
|
+
client_args["account_name"] = storage_account
|
119
|
+
|
120
|
+
credentials = self.setup_credentials()
|
121
|
+
client_args.update(credentials)
|
122
|
+
|
123
|
+
return client_args
|
124
|
+
|
125
|
+
def setup_credentials(self):
|
126
|
+
|
127
|
+
# Only default (Google ADC) mechanism is supported
|
128
|
+
# Arrow GCP FS does also support access tokens, but ADC is probably all we ever need
|
129
|
+
|
130
|
+
mechanism = _helpers.get_plugin_property(self._properties, self.CREDENTIALS_PROPERTY)
|
131
|
+
|
132
|
+
if mechanism is None or len(mechanism) == 0 or mechanism.lower() == self.CREDENTIALS_DEFAULT:
|
133
|
+
self._log.info(f"Using [{self.CREDENTIALS_DEFAULT}] credentials mechanism")
|
134
|
+
return {"anon": False}
|
135
|
+
|
136
|
+
if mechanism == self.CREDENTIALS_ACCESS_KEY:
|
137
|
+
|
138
|
+
self._log.info(f"Using [{self.CREDENTIALS_ACCESS_KEY}] credentials mechanism")
|
139
|
+
|
140
|
+
access_key = _helpers.get_plugin_property(self._properties, self.ACCESS_KEY_PROPERTY)
|
141
|
+
|
142
|
+
if access_key is None or len(access_key.strip()) == 0:
|
143
|
+
message = f"Missing required config property [{self.ACCESS_KEY_PROPERTY}] for Azure blob storage"
|
144
|
+
raise ex.EConfigParse(message)
|
145
|
+
|
146
|
+
return {"account_key": access_key}
|
147
|
+
|
148
|
+
message = f"Unrecognised credentials mechanism: [{mechanism}]"
|
149
|
+
self._log.error(message)
|
150
|
+
raise ex.EStartup(message)
|
151
|
+
|
152
|
+
|
153
|
+
# Only register the plugin if the [azure] feature is available
|
154
|
+
if __azure_available:
|
155
|
+
plugins.PluginManager.register_plugin(IStorageProvider, AzureBlobStorageProvider, ["BLOB"])
|