tracdap-runtime 0.6.0rc1__py3-none-any.whl → 0.6.0rc3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. tracdap/rt/_impl/data.py +53 -16
  2. tracdap/rt/_impl/storage.py +92 -27
  3. tracdap/rt/_plugins/storage_aws.py +158 -142
  4. tracdap/rt/_plugins/storage_azure.py +155 -0
  5. tracdap/rt/_plugins/storage_gcp.py +72 -15
  6. tracdap/rt/_plugins/storage_local.py +11 -6
  7. tracdap/rt/_version.py +1 -1
  8. tracdap/rt/config/__init__.py +12 -17
  9. tracdap/rt/config/common.py +10 -0
  10. tracdap/rt/config/common_pb2.py +38 -31
  11. tracdap/rt/config/job_pb2.py +21 -20
  12. tracdap/rt/config/platform.py +60 -25
  13. tracdap/rt/config/platform_pb2.py +52 -45
  14. tracdap/rt/config/result_pb2.py +15 -14
  15. tracdap/rt/config/runtime.py +0 -1
  16. tracdap/rt/config/runtime_pb2.py +24 -24
  17. tracdap/rt/ext/storage.py +2 -2
  18. tracdap/rt/metadata/__init__.py +20 -20
  19. tracdap/rt/metadata/common_pb2.py +15 -14
  20. tracdap/rt/metadata/custom_pb2.py +9 -8
  21. tracdap/rt/metadata/data_pb2.py +31 -30
  22. tracdap/rt/metadata/file_pb2.py +9 -8
  23. tracdap/rt/metadata/flow_pb2.py +33 -32
  24. tracdap/rt/metadata/job_pb2.py +55 -54
  25. tracdap/rt/metadata/model_pb2.py +31 -30
  26. tracdap/rt/metadata/object_id_pb2.py +13 -12
  27. tracdap/rt/metadata/object_pb2.py +9 -8
  28. tracdap/rt/metadata/search_pb2.py +19 -18
  29. tracdap/rt/metadata/stoarge_pb2.py +31 -30
  30. tracdap/rt/metadata/tag_pb2.py +13 -12
  31. tracdap/rt/metadata/tag_update_pb2.py +11 -10
  32. tracdap/rt/metadata/type_pb2.py +29 -28
  33. {tracdap_runtime-0.6.0rc1.dist-info → tracdap_runtime-0.6.0rc3.dist-info}/METADATA +27 -15
  34. {tracdap_runtime-0.6.0rc1.dist-info → tracdap_runtime-0.6.0rc3.dist-info}/RECORD +37 -38
  35. {tracdap_runtime-0.6.0rc1.dist-info → tracdap_runtime-0.6.0rc3.dist-info}/WHEEL +1 -1
  36. tracdap/rt/config/gateway.py +0 -104
  37. tracdap/rt/config/gateway_pb2.py +0 -45
  38. {tracdap_runtime-0.6.0rc1.dist-info → tracdap_runtime-0.6.0rc3.dist-info}/LICENSE +0 -0
  39. {tracdap_runtime-0.6.0rc1.dist-info → tracdap_runtime-0.6.0rc3.dist-info}/top_level.txt +0 -0
@@ -31,19 +31,19 @@ from . import _helpers
31
31
 
32
32
  try:
33
33
  # AWS SDK
34
- import boto3 # noqa
35
- import botocore.response # noqa
36
- import botocore.exceptions as aws_ex # noqa
37
- boto_available = True
34
+ import boto3
35
+ import botocore.response
36
+ import botocore.exceptions as aws_ex
37
+ __aws_available = True
38
38
  except ImportError:
39
- boto_available = False
39
+ boto3 = None
40
+ botocore = None
41
+ aws_ex = None
42
+ __aws_available = False
40
43
 
41
44
 
42
45
  class AwsStorageProvider(IStorageProvider):
43
46
 
44
- ARROW_NATIVE_FS_PROPERTY = "arrowNativeFs"
45
- ARROW_NATIVE_FS_DEFAULT = False
46
-
47
47
  BUCKET_PROPERTY = "bucket"
48
48
  PREFIX_PROPERTY = "prefix"
49
49
  REGION_PROPERTY = "region"
@@ -56,6 +56,12 @@ class AwsStorageProvider(IStorageProvider):
56
56
  ACCESS_KEY_ID_PROPERTY = "accessKeyId"
57
57
  SECRET_ACCESS_KEY_PROPERTY = "secretAccessKey"
58
58
 
59
+ RUNTIME_FS_PROPERTY = "runtimeFs"
60
+ RUNTIME_FS_AUTO = "auto"
61
+ RUNTIME_FS_ARROW = "arrow"
62
+ RUNTIME_FS_BOTO3 = "boto3"
63
+ RUNTIME_FS_DEFAULT = RUNTIME_FS_AUTO
64
+
59
65
  ARROW_CLIENT_ARGS = {
60
66
  REGION_PROPERTY: "region",
61
67
  ENDPOINT_PROPERTY: "endpoint_override",
@@ -75,14 +81,25 @@ class AwsStorageProvider(IStorageProvider):
75
81
  self._log = _helpers.logger_for_object(self)
76
82
  self._properties = properties
77
83
 
78
- self._arrow_native = _helpers.get_plugin_property_boolean(
79
- properties, self.ARROW_NATIVE_FS_PROPERTY, self.ARROW_NATIVE_FS_DEFAULT)
84
+ self._runtime_fs = _helpers.get_plugin_property(
85
+ properties, self.RUNTIME_FS_PROPERTY) \
86
+ or self.RUNTIME_FS_DEFAULT
80
87
 
81
88
  def has_arrow_native(self) -> bool:
82
- return True if self._arrow_native else False
89
+ if self._runtime_fs == self.RUNTIME_FS_ARROW:
90
+ return True
91
+ elif self._runtime_fs == self.RUNTIME_FS_AUTO:
92
+ return afs.S3FileSystem is not None
93
+ else:
94
+ return False
83
95
 
84
96
  def has_file_storage(self) -> bool:
85
- return True if not self._arrow_native and boto_available else False
97
+ if self._runtime_fs == self.RUNTIME_FS_BOTO3:
98
+ return True
99
+ elif self._runtime_fs == self.RUNTIME_FS_AUTO:
100
+ return afs.S3FileSystem is None
101
+ else:
102
+ return False
86
103
 
87
104
  def get_arrow_native(self) -> afs.SubTreeFileSystem:
88
105
 
@@ -161,7 +178,8 @@ class AwsStorageProvider(IStorageProvider):
161
178
  raise ex.EStartup(message)
162
179
 
163
180
 
164
- plugins.PluginManager.register_plugin(IStorageProvider, AwsStorageProvider, ["S3"])
181
+ if __aws_available:
182
+ plugins.PluginManager.register_plugin(IStorageProvider, AwsStorageProvider, ["S3"])
165
183
 
166
184
 
167
185
  # ----------------------------------------------------------------------------------------------------------------------
@@ -172,196 +190,194 @@ plugins.PluginManager.register_plugin(IStorageProvider, AwsStorageProvider, ["S3
172
190
  # It is likely to be removed in a future release
173
191
 
174
192
 
175
- if boto_available:
193
+ class S3ObjectStorage(IFileStorage):
176
194
 
177
- class S3ObjectStorage(IFileStorage):
195
+ # This is a quick implementation of IFileStorage on S3 using the boto3 AWS SDK
178
196
 
179
- # This is a quick implementation of IFileStorage on S3 using the boto3 AWS SDK
197
+ def __init__(self, config: cfg.PluginConfig, client_args: dict):
180
198
 
181
- def __init__(self, config: cfg.PluginConfig, client_args: dict):
182
-
183
- self._log = _helpers.logger_for_object(self)
199
+ self._log = _helpers.logger_for_object(self)
184
200
 
185
- self._properties = config.properties
186
- self._bucket = _helpers.get_plugin_property(self._properties, AwsStorageProvider.BUCKET_PROPERTY)
187
- self._prefix = _helpers.get_plugin_property(self._properties, AwsStorageProvider.PREFIX_PROPERTY) or ""
201
+ self._properties = config.properties
202
+ self._bucket = _helpers.get_plugin_property(self._properties, AwsStorageProvider.BUCKET_PROPERTY)
203
+ self._prefix = _helpers.get_plugin_property(self._properties, AwsStorageProvider.PREFIX_PROPERTY) or ""
188
204
 
189
- if self._bucket is None or len(self._bucket.strip()) == 0:
190
- message = f"Missing required config property [{AwsStorageProvider.BUCKET_PROPERTY}] for S3 storage"
191
- self._log.error(message)
192
- raise ex.EConfigParse(message)
205
+ if self._bucket is None or len(self._bucket.strip()) == 0:
206
+ message = f"Missing required config property [{AwsStorageProvider.BUCKET_PROPERTY}] for S3 storage"
207
+ self._log.error(message)
208
+ raise ex.EConfigParse(message)
193
209
 
194
- self._client = boto3.client(**client_args)
210
+ self._client = boto3.client(**client_args)
195
211
 
196
- def exists(self, storage_path: str) -> bool:
212
+ def exists(self, storage_path: str) -> bool:
197
213
 
198
- try:
199
- self._log.info(f"EXISTS [{storage_path}]")
214
+ try:
215
+ self._log.info(f"EXISTS [{storage_path}]")
200
216
 
201
- object_key = self._resolve_path(storage_path)
202
- self._client.head_object(Bucket=self._bucket, Key=object_key)
203
- return True
217
+ object_key = self._resolve_path(storage_path)
218
+ self._client.head_object(Bucket=self._bucket, Key=object_key)
219
+ return True
204
220
 
205
- except aws_ex.ClientError as error:
206
- aws_code = error.response['Error']['Code']
207
- if aws_code == str(http.HTTPStatus.NOT_FOUND.value): # noqa
208
- return False
209
- raise ex.EStorageRequest(f"Storage error: {str(error)}") from error
221
+ except aws_ex.ClientError as error:
222
+ aws_code = error.response['Error']['Code']
223
+ if aws_code == str(http.HTTPStatus.NOT_FOUND.value): # noqa
224
+ return False
225
+ raise ex.EStorageRequest(f"Storage error: {str(error)}") from error
210
226
 
211
- def size(self, storage_path: str) -> int:
227
+ def size(self, storage_path: str) -> int:
212
228
 
213
- try:
214
- self._log.info(f"SIZE [{storage_path}]")
229
+ try:
230
+ self._log.info(f"SIZE [{storage_path}]")
215
231
 
216
- object_key = self._resolve_path(storage_path)
217
- response = self._client.head_object(Bucket=self._bucket, Key=object_key)
218
- return response['ContentLength']
232
+ object_key = self._resolve_path(storage_path)
233
+ response = self._client.head_object(Bucket=self._bucket, Key=object_key)
234
+ return response['ContentLength']
219
235
 
220
- except aws_ex.ClientError as error:
221
- raise ex.EStorageRequest(f"Storage error: {str(error)}") from error
236
+ except aws_ex.ClientError as error:
237
+ raise ex.EStorageRequest(f"Storage error: {str(error)}") from error
222
238
 
223
- def stat(self, storage_path: str) -> FileStat:
239
+ def stat(self, storage_path: str) -> FileStat:
224
240
 
225
- self._log.info(f"STAT [{storage_path}]")
241
+ self._log.info(f"STAT [{storage_path}]")
226
242
 
227
- name = storage_path.split("/")[-1]
243
+ name = storage_path.split("/")[-1]
228
244
 
229
- if self.exists(storage_path):
245
+ if self.exists(storage_path):
230
246
 
231
- # Only OBJECTS can support stat atm
232
- # Handling for directories needs to be changed, as part of refactor onto object storage
233
- size = self.size(storage_path)
234
- return FileStat(name, FileType.FILE, storage_path, size)
247
+ # Only OBJECTS can support stat atm
248
+ # Handling for directories needs to be changed, as part of refactor onto object storage
249
+ size = self.size(storage_path)
250
+ return FileStat(name, FileType.FILE, storage_path, size)
235
251
 
236
- else:
252
+ else:
237
253
 
238
- self.ls(storage_path)
239
- return FileStat(name, FileType.DIRECTORY, storage_path, 0)
254
+ self.ls(storage_path)
255
+ return FileStat(name, FileType.DIRECTORY, storage_path, 0)
240
256
 
241
- def ls(self, storage_path: str, recursive: bool = False) -> tp.List[FileStat]:
257
+ def ls(self, storage_path: str, recursive: bool = False) -> tp.List[FileStat]:
242
258
 
243
- self._log.info(f"LS [{storage_path}]")
259
+ self._log.info(f"LS [{storage_path}]")
244
260
 
245
- prefix = self._resolve_path(storage_path) + "/"
261
+ prefix = self._resolve_path(storage_path) + "/"
246
262
 
247
- response = self._client.list_objects_v2(
248
- Bucket=self._bucket,
249
- Prefix=prefix,
250
- Delimiter="/")
263
+ response = self._client.list_objects_v2(
264
+ Bucket=self._bucket,
265
+ Prefix=prefix,
266
+ Delimiter="/")
251
267
 
252
- keys = []
268
+ keys = []
253
269
 
254
- if "Contents" not in response and "CommonPrefixes" not in response:
255
- raise ex.EStorageRequest(f"Storage prefix not found: [{storage_path}]")
270
+ if "Contents" not in response and "CommonPrefixes" not in response:
271
+ raise ex.EStorageRequest(f"Storage prefix not found: [{storage_path}]")
256
272
 
257
- if "Contents" in response:
258
- for entry in response["Contents"]:
259
- raw_key = entry["Key"]
260
- if raw_key == prefix:
261
- continue
262
- key = raw_key.replace(prefix, "")
263
- size = entry["Size"]
264
- mtime = entry["LastModified "]
265
- stat = FileStat(key, FileType.FILE, raw_key, size, mtime=mtime)
266
- keys.append(stat)
273
+ if "Contents" in response:
274
+ for entry in response["Contents"]:
275
+ raw_key = entry["Key"]
276
+ if raw_key == prefix:
277
+ continue
278
+ key = raw_key.replace(prefix, "")
279
+ size = entry["Size"]
280
+ mtime = entry["LastModified "]
281
+ stat = FileStat(key, FileType.FILE, raw_key, size, mtime=mtime)
282
+ keys.append(stat)
267
283
 
268
- if "CommonPrefixes" in response:
269
- for raw_prefix in response["CommonPrefixes"]:
270
- common_prefix = raw_prefix.replace(prefix, "")
271
- stat = FileStat(common_prefix, FileType.DIRECTORY, raw_prefix, 0)
272
- keys.append(stat)
284
+ if "CommonPrefixes" in response:
285
+ for raw_prefix in response["CommonPrefixes"]:
286
+ common_prefix = raw_prefix.replace(prefix, "")
287
+ stat = FileStat(common_prefix, FileType.DIRECTORY, raw_prefix, 0)
288
+ keys.append(stat)
273
289
 
274
- return keys
290
+ return keys
275
291
 
276
- def mkdir(self, storage_path: str, recursive: bool = False):
292
+ def mkdir(self, storage_path: str, recursive: bool = False):
277
293
 
278
- self._log.info(f"MKDIR [{storage_path}]")
294
+ self._log.info(f"MKDIR [{storage_path}]")
279
295
 
280
- # No-op in object storage
281
- pass
296
+ # No-op in object storage
297
+ pass
282
298
 
283
- def rm(self, storage_path: str):
299
+ def rm(self, storage_path: str):
284
300
 
285
- try:
286
- self._log.info(f"RM [{storage_path}]")
301
+ try:
302
+ self._log.info(f"RM [{storage_path}]")
287
303
 
288
- object_key = self._resolve_path(storage_path)
289
- self._client.delete_object(Bucket=self._bucket, Key=object_key)
304
+ object_key = self._resolve_path(storage_path)
305
+ self._client.delete_object(Bucket=self._bucket, Key=object_key)
290
306
 
291
- except aws_ex.ClientError as error:
292
- raise ex.EStorageRequest(f"Storage error: {str(error)}") from error
307
+ except aws_ex.ClientError as error:
308
+ raise ex.EStorageRequest(f"Storage error: {str(error)}") from error
293
309
 
294
- def rmdir(self, storage_path: str):
310
+ def rmdir(self, storage_path: str):
295
311
 
296
- raise RuntimeError("RMDIR (recursive) not available for S3 storage")
312
+ raise RuntimeError("RMDIR (recursive) not available for S3 storage")
297
313
 
298
- def read_bytes(self, storage_path: str) -> bytes:
314
+ def read_bytes(self, storage_path: str) -> bytes:
299
315
 
300
- self._log.info(f"READ BYTES [{storage_path}]")
316
+ self._log.info(f"READ BYTES [{storage_path}]")
301
317
 
302
- body = self._read_impl(storage_path)
303
- return body.read()
318
+ body = self._read_impl(storage_path)
319
+ return body.read()
304
320
 
305
- def read_byte_stream(self, storage_path: str) -> tp.BinaryIO:
321
+ def read_byte_stream(self, storage_path: str) -> tp.BinaryIO:
306
322
 
307
- self._log.info(f"READ BYTE STREAM [{storage_path}]")
323
+ self._log.info(f"READ BYTE STREAM [{storage_path}]")
308
324
 
309
- data = self.read_bytes(storage_path)
310
- return io.BytesIO(data)
325
+ data = self.read_bytes(storage_path)
326
+ return io.BytesIO(data)
311
327
 
312
- def _read_impl(self, storage_path: str) -> botocore.response.StreamingBody:
328
+ def _read_impl(self, storage_path: str):
313
329
 
314
- try:
330
+ try:
315
331
 
316
- object_key = self._resolve_path(storage_path)
317
- response = self._client.get_object(Bucket=self._bucket, Key=object_key)
318
- return response['Body']
332
+ object_key = self._resolve_path(storage_path)
333
+ response = self._client.get_object(Bucket=self._bucket, Key=object_key)
334
+ return response['Body']
319
335
 
320
- except aws_ex.ClientError as error:
321
- raise ex.EStorageRequest(f"Storage error: {str(error)}") from error
336
+ except aws_ex.ClientError as error:
337
+ raise ex.EStorageRequest(f"Storage error: {str(error)}") from error
322
338
 
323
- def write_bytes(self, storage_path: str, data: bytes):
339
+ def write_bytes(self, storage_path: str, data: bytes):
324
340
 
325
- try:
326
- self._log.info(f"WRITE BYTES [{storage_path}]")
341
+ try:
342
+ self._log.info(f"WRITE BYTES [{storage_path}]")
327
343
 
328
- object_key = self._resolve_path(storage_path)
344
+ object_key = self._resolve_path(storage_path)
329
345
 
330
- self._client.put_object(
331
- Bucket=self._bucket,
332
- Key=object_key,
333
- Body=data)
346
+ self._client.put_object(
347
+ Bucket=self._bucket,
348
+ Key=object_key,
349
+ Body=data)
334
350
 
335
- except aws_ex.ClientError as error:
336
- raise ex.EStorageRequest(f"Storage error: {str(error)}") from error
351
+ except aws_ex.ClientError as error:
352
+ raise ex.EStorageRequest(f"Storage error: {str(error)}") from error
337
353
 
338
- def write_byte_stream(self, storage_path: str) -> tp.BinaryIO:
354
+ def write_byte_stream(self, storage_path: str) -> tp.BinaryIO:
339
355
 
340
- self._log.info(f"WRITE BYTE STREAM [{storage_path}]")
356
+ self._log.info(f"WRITE BYTE STREAM [{storage_path}]")
341
357
 
342
- return self._AwsWriteBuf(self, storage_path)
358
+ return self._AwsWriteBuf(self, storage_path)
343
359
 
344
- class _AwsWriteBuf(io.BytesIO):
360
+ class _AwsWriteBuf(io.BytesIO):
345
361
 
346
- def __init__(self, storage, storage_path):
347
- super().__init__()
348
- self._storage = storage
349
- self._storage_path = storage_path
350
- self._written = False
362
+ def __init__(self, storage, storage_path):
363
+ super().__init__()
364
+ self._storage = storage
365
+ self._storage_path = storage_path
366
+ self._written = False
351
367
 
352
- def close(self):
353
- if not self._written:
354
- self.seek(0)
355
- data = self.read()
356
- self._storage.write_bytes(self._storage_path, data)
357
- self._written = True
368
+ def close(self):
369
+ if not self._written:
370
+ self.seek(0)
371
+ data = self.read()
372
+ self._storage.write_bytes(self._storage_path, data)
373
+ self._written = True
358
374
 
359
- def _resolve_path(self, storage_path: str) -> str:
375
+ def _resolve_path(self, storage_path: str) -> str:
360
376
 
361
- if self._prefix is None or self._prefix.strip() == "":
362
- return storage_path
377
+ if self._prefix is None or self._prefix.strip() == "":
378
+ return storage_path
363
379
 
364
- separator = "" if self._prefix.endswith("/") else "/"
365
- full_path = self._prefix + separator + storage_path
380
+ separator = "" if self._prefix.endswith("/") else "/"
381
+ full_path = self._prefix + separator + storage_path
366
382
 
367
- return full_path[1:] if full_path.startswith("/") else full_path
383
+ return full_path[1:] if full_path.startswith("/") else full_path
@@ -0,0 +1,155 @@
1
+ # Copyright 2023 Accenture Global Solutions Limited
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import logging
16
+ import typing as tp
17
+
18
+ # TRAC interfaces
19
+ import tracdap.rt.exceptions as ex
20
+ import tracdap.rt.ext.plugins as plugins
21
+ from tracdap.rt.ext.storage import *
22
+
23
+ import pyarrow.fs as afs
24
+
25
+ try:
26
+ # These dependencies are provided by the optional [azure] feature
27
+ # For local development, pip install -r requirements_plugins.txt
28
+ import azure.storage.blob as az_blob # noqa
29
+ import adlfs # noqa
30
+ __azure_available = True
31
+ except ImportError:
32
+ adlfs = None
33
+ __azure_available = False
34
+
35
+ # Set of common helpers across the core plugins (do not reference rt._impl)
36
+ from . import _helpers
37
+
38
+
39
+ class AzureBlobStorageProvider(IStorageProvider):
40
+
41
+ # This client depends on the Azure fsspec implementation, since there is no native implementation from Arrow
42
+ # To enable it, the tracdap package must be installed with the optional [azure] feature
43
+
44
+ # Current supported authentication mechanisms are "default" and "access_key"
45
+ # Client always uses location mode = primary, version aware = False
46
+
47
+ STORAGE_ACCOUNT_PROPERTY = "storageAccount"
48
+ CONTAINER_PROPERTY = "container"
49
+ PREFIX_PROPERTY = "prefix"
50
+
51
+ CREDENTIALS_PROPERTY = "credentials"
52
+ CREDENTIALS_DEFAULT = "default"
53
+ CREDENTIALS_ACCESS_KEY = "access_key"
54
+
55
+ ACCESS_KEY_PROPERTY = "accessKey"
56
+
57
+ RUNTIME_FS_PROPERTY = "runtimeFs"
58
+ RUNTIME_FS_AUTO = "auto"
59
+ RUNTIME_FS_FSSPEC = "fsspec"
60
+ RUNTIME_FS_DEFAULT = RUNTIME_FS_AUTO
61
+
62
+ def __init__(self, properties: tp.Dict[str, str]):
63
+
64
+ self._log = _helpers.logger_for_object(self)
65
+ self._properties = properties
66
+
67
+ self._runtime_fs = _helpers.get_plugin_property(
68
+ properties, self.RUNTIME_FS_PROPERTY) \
69
+ or self.RUNTIME_FS_DEFAULT
70
+
71
+ # The Azure SDK is very verbose with logging
72
+ # Avoid log noise by raising the log level for the Azure namespace
73
+ azure_log = _helpers.logger_for_namespace("azure.core")
74
+ azure_log.level = logging.WARNING
75
+
76
+ def has_arrow_native(self) -> bool:
77
+ return True
78
+
79
+ def get_arrow_native(self) -> afs.SubTreeFileSystem:
80
+
81
+ if self._runtime_fs == self.RUNTIME_FS_AUTO or self._runtime_fs == self.RUNTIME_FS_FSSPEC:
82
+ azure_fs = self.create_fsspec()
83
+ else:
84
+ message = f"Requested runtime FS [{self._runtime_fs}] is not available for Azure storage"
85
+ self._log.error(message)
86
+ raise ex.EStartup(message)
87
+
88
+ container = _helpers.get_plugin_property(self._properties, self.CONTAINER_PROPERTY)
89
+ prefix = _helpers.get_plugin_property(self._properties, self.PREFIX_PROPERTY)
90
+
91
+ if container is None or container.strip() == "":
92
+ message = f"Missing required config property [{self.CONTAINER_PROPERTY}] for Azure blob storage"
93
+ self._log.error(message)
94
+ raise ex.EConfigParse(message)
95
+
96
+ root_path = f"{container}/{prefix}" if prefix else container
97
+
98
+ return afs.SubTreeFileSystem(root_path, azure_fs)
99
+
100
+ def create_fsspec(self) -> afs.FileSystem:
101
+
102
+ azure_fsspec_args = self.setup_client_args()
103
+ azure_fsspec = adlfs.AzureBlobFileSystem(**azure_fsspec_args)
104
+
105
+ return afs.PyFileSystem(afs.FSSpecHandler(azure_fsspec))
106
+
107
+ def setup_client_args(self) -> tp.Dict[str, tp.Any]:
108
+
109
+ client_args = dict()
110
+
111
+ storage_account = _helpers.get_plugin_property(self._properties, self.STORAGE_ACCOUNT_PROPERTY)
112
+
113
+ if storage_account is None or len(storage_account.strip()) == 0:
114
+ message = f"Missing required config property [{self.STORAGE_ACCOUNT_PROPERTY}] for Azure blob storage"
115
+ self._log.error(message)
116
+ raise ex.EConfigParse(message)
117
+
118
+ client_args["account_name"] = storage_account
119
+
120
+ credentials = self.setup_credentials()
121
+ client_args.update(credentials)
122
+
123
+ return client_args
124
+
125
+ def setup_credentials(self):
126
+
127
+ # Only default (Google ADC) mechanism is supported
128
+ # Arrow GCP FS does also support access tokens, but ADC is probably all we ever need
129
+
130
+ mechanism = _helpers.get_plugin_property(self._properties, self.CREDENTIALS_PROPERTY)
131
+
132
+ if mechanism is None or len(mechanism) == 0 or mechanism.lower() == self.CREDENTIALS_DEFAULT:
133
+ self._log.info(f"Using [{self.CREDENTIALS_DEFAULT}] credentials mechanism")
134
+ return {"anon": False}
135
+
136
+ if mechanism == self.CREDENTIALS_ACCESS_KEY:
137
+
138
+ self._log.info(f"Using [{self.CREDENTIALS_ACCESS_KEY}] credentials mechanism")
139
+
140
+ access_key = _helpers.get_plugin_property(self._properties, self.ACCESS_KEY_PROPERTY)
141
+
142
+ if access_key is None or len(access_key.strip()) == 0:
143
+ message = f"Missing required config property [{self.ACCESS_KEY_PROPERTY}] for Azure blob storage"
144
+ raise ex.EConfigParse(message)
145
+
146
+ return {"account_key": access_key}
147
+
148
+ message = f"Unrecognised credentials mechanism: [{mechanism}]"
149
+ self._log.error(message)
150
+ raise ex.EStartup(message)
151
+
152
+
153
+ # Only register the plugin if the [azure] feature is available
154
+ if __azure_available:
155
+ plugins.PluginManager.register_plugin(IStorageProvider, AzureBlobStorageProvider, ["BLOB"])