tracdap-runtime 0.8.0rc2__py3-none-any.whl → 0.9.0b2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. tracdap/rt/_impl/core/config_parser.py +29 -3
  2. tracdap/rt/_impl/core/data.py +627 -40
  3. tracdap/rt/_impl/core/repos.py +17 -8
  4. tracdap/rt/_impl/core/storage.py +25 -13
  5. tracdap/rt/_impl/core/struct.py +254 -60
  6. tracdap/rt/_impl/core/util.py +125 -11
  7. tracdap/rt/_impl/exec/context.py +35 -8
  8. tracdap/rt/_impl/exec/dev_mode.py +169 -127
  9. tracdap/rt/_impl/exec/engine.py +203 -140
  10. tracdap/rt/_impl/exec/functions.py +228 -263
  11. tracdap/rt/_impl/exec/graph.py +141 -126
  12. tracdap/rt/_impl/exec/graph_builder.py +428 -449
  13. tracdap/rt/_impl/grpc/codec.py +8 -13
  14. tracdap/rt/_impl/grpc/server.py +7 -7
  15. tracdap/rt/_impl/grpc/tracdap/api/internal/runtime_pb2.py +25 -18
  16. tracdap/rt/_impl/grpc/tracdap/api/internal/runtime_pb2.pyi +27 -9
  17. tracdap/rt/_impl/grpc/tracdap/metadata/common_pb2.py +1 -1
  18. tracdap/rt/_impl/grpc/tracdap/metadata/config_pb2.py +1 -1
  19. tracdap/rt/_impl/grpc/tracdap/metadata/custom_pb2.py +1 -1
  20. tracdap/rt/_impl/grpc/tracdap/metadata/data_pb2.py +37 -35
  21. tracdap/rt/_impl/grpc/tracdap/metadata/data_pb2.pyi +37 -43
  22. tracdap/rt/_impl/grpc/tracdap/metadata/file_pb2.py +1 -1
  23. tracdap/rt/_impl/grpc/tracdap/metadata/flow_pb2.py +1 -1
  24. tracdap/rt/_impl/grpc/tracdap/metadata/job_pb2.py +67 -63
  25. tracdap/rt/_impl/grpc/tracdap/metadata/job_pb2.pyi +11 -2
  26. tracdap/rt/_impl/grpc/tracdap/metadata/model_pb2.py +1 -1
  27. tracdap/rt/_impl/grpc/tracdap/metadata/object_id_pb2.py +1 -1
  28. tracdap/rt/_impl/grpc/tracdap/metadata/object_pb2.py +1 -1
  29. tracdap/rt/_impl/grpc/tracdap/metadata/resource_pb2.py +1 -1
  30. tracdap/rt/_impl/grpc/tracdap/metadata/search_pb2.py +1 -1
  31. tracdap/rt/_impl/grpc/tracdap/metadata/storage_pb2.py +11 -9
  32. tracdap/rt/_impl/grpc/tracdap/metadata/storage_pb2.pyi +11 -2
  33. tracdap/rt/_impl/grpc/tracdap/metadata/tag_pb2.py +1 -1
  34. tracdap/rt/_impl/grpc/tracdap/metadata/tag_update_pb2.py +1 -1
  35. tracdap/rt/_impl/grpc/tracdap/metadata/type_pb2.py +23 -19
  36. tracdap/rt/_impl/grpc/tracdap/metadata/type_pb2.pyi +15 -2
  37. tracdap/rt/_impl/runtime.py +3 -9
  38. tracdap/rt/_impl/static_api.py +5 -6
  39. tracdap/rt/_plugins/format_csv.py +2 -2
  40. tracdap/rt/_plugins/repo_git.py +56 -11
  41. tracdap/rt/_plugins/storage_aws.py +165 -150
  42. tracdap/rt/_plugins/storage_azure.py +17 -11
  43. tracdap/rt/_plugins/storage_gcp.py +35 -18
  44. tracdap/rt/_version.py +1 -1
  45. tracdap/rt/api/model_api.py +45 -0
  46. tracdap/rt/config/__init__.py +7 -9
  47. tracdap/rt/config/common.py +3 -14
  48. tracdap/rt/config/job.py +17 -3
  49. tracdap/rt/config/platform.py +9 -32
  50. tracdap/rt/config/result.py +8 -4
  51. tracdap/rt/config/runtime.py +5 -10
  52. tracdap/rt/config/tenant.py +28 -0
  53. tracdap/rt/launch/cli.py +0 -8
  54. tracdap/rt/launch/launch.py +1 -3
  55. tracdap/rt/metadata/__init__.py +35 -35
  56. tracdap/rt/metadata/data.py +19 -31
  57. tracdap/rt/metadata/job.py +3 -1
  58. tracdap/rt/metadata/storage.py +9 -0
  59. tracdap/rt/metadata/type.py +9 -5
  60. {tracdap_runtime-0.8.0rc2.dist-info → tracdap_runtime-0.9.0b2.dist-info}/METADATA +5 -3
  61. {tracdap_runtime-0.8.0rc2.dist-info → tracdap_runtime-0.9.0b2.dist-info}/RECORD +64 -63
  62. {tracdap_runtime-0.8.0rc2.dist-info → tracdap_runtime-0.9.0b2.dist-info}/WHEEL +1 -1
  63. {tracdap_runtime-0.8.0rc2.dist-info → tracdap_runtime-0.9.0b2.dist-info}/licenses/LICENSE +0 -0
  64. {tracdap_runtime-0.8.0rc2.dist-info → tracdap_runtime-0.9.0b2.dist-info}/top_level.txt +0 -0
@@ -23,6 +23,7 @@ import time
23
23
  import dulwich.repo as git_repo
24
24
  import dulwich.client as git_client
25
25
  import dulwich.index as git_index
26
+ import urllib3.exceptions # noqa
26
27
 
27
28
  import tracdap.rt.metadata as meta
28
29
  import tracdap.rt.exceptions as ex
@@ -75,20 +76,45 @@ class GitRepository(IModelRepository):
75
76
 
76
77
  def do_checkout(self, model_def: meta.ModelDefinition, checkout_dir: pathlib.Path) -> pathlib.Path:
77
78
 
78
- self._log.info(
79
- f"Git checkout: repo = [{model_def.repository}], " +
80
- f"group = [{model_def.packageGroup}], package = [{model_def.package}], version = [{model_def.version}]")
79
+ try:
81
80
 
82
- self._log.info(f"Checkout location: [{checkout_dir}]")
81
+ self._log.info(
82
+ f"Git checkout: repo = [{model_def.repository}], " +
83
+ f"group = [{model_def.packageGroup}], package = [{model_def.package}], version = [{model_def.version}]")
83
84
 
84
- if self._native_git:
85
- package_path = self._do_native_checkout(model_def, checkout_dir)
86
- else:
87
- package_path = self._do_python_checkout(model_def, checkout_dir)
85
+ self._log.info(f"Checkout location: [{checkout_dir}]")
86
+
87
+ if self._native_git:
88
+ package_path = self._do_native_checkout(model_def, checkout_dir)
89
+ else:
90
+ package_path = self._do_python_checkout(model_def, checkout_dir)
91
+
92
+ self._log.info(f"Git checkout succeeded for {model_def.package} {model_def.version}")
93
+
94
+ return package_path
95
+
96
+ except Exception as e:
97
+
98
+ error = e
99
+
100
+ # For retry failures, try to find the original cause
101
+ while e.__cause__ is not None:
102
+ if isinstance(e, urllib3.exceptions.MaxRetryError):
103
+ error = e.__cause__
104
+ break
105
+ else:
106
+ e = e.__cause__
107
+
108
+ # Try to sanitize error messages from urllib3
109
+ if isinstance(error, urllib3.exceptions.HTTPError):
110
+ detail = self._clean_urllib3_error(error)
111
+ else:
112
+ detail = str(error)
88
113
 
89
- self._log.info(f"Git checkout succeeded for {model_def.package} {model_def.version}")
114
+ message = f"Failed to check out [{model_def.repository}]: {detail}"
90
115
 
91
- return package_path
116
+ self._log.error(message)
117
+ raise ex.EModelRepo(message) from error
92
118
 
93
119
  def _do_native_checkout(self, model_def: meta.ModelDefinition, checkout_dir: pathlib.Path) -> pathlib.Path:
94
120
 
@@ -150,10 +176,15 @@ class GitRepository(IModelRepository):
150
176
  for line in cmd_err:
151
177
  self._log.info(line)
152
178
 
153
- else:
179
+ elif cmd_err:
180
+
154
181
  for line in cmd_err:
155
182
  self._log.error(line)
156
183
 
184
+ raise ex.EModelRepo(cmd_err[-1])
185
+
186
+ else:
187
+
157
188
  error_msg = f"Git checkout failed for {model_def.package} {model_def.version}"
158
189
  self._log.error(error_msg)
159
190
  raise ex.EModelRepo(error_msg)
@@ -265,6 +296,20 @@ class GitRepository(IModelRepository):
265
296
  def _ref_key(key):
266
297
  return bytes(key, "ascii")
267
298
 
299
+ @classmethod
300
+ def _clean_urllib3_error(cls, error: urllib3.exceptions.HTTPError):
301
+
302
+ match = cls._URLLIB3_ERROR_PATTERN.match(str(error))
303
+
304
+ # Best efforts to clean up the message, fall back on str(error)
305
+ if match:
306
+ return match.group(1)
307
+ else:
308
+ return str(error)
309
+
310
+ # Error message format is like this:
311
+ # <pkg.ClassName object at 0xXXXXXXX>: Message
312
+ _URLLIB3_ERROR_PATTERN = re.compile(r"<[^>]*>: (.*)")
268
313
 
269
314
  # Register plugin
270
315
  plugins.PluginManager.register_plugin(IModelRepository, GitRepository, ["git"])
@@ -30,17 +30,24 @@ from pyarrow import fs as afs
30
30
  from . import _helpers
31
31
 
32
32
 
33
- try:
34
- # AWS SDK
35
- import boto3
36
- import botocore.response
37
- import botocore.exceptions as aws_ex
38
- __aws_available = True
39
- except ImportError:
40
- boto3 = None
41
- botocore = None
42
- aws_ex = None
43
- __aws_available = False
33
+ def _aws_arrow_available():
34
+ try:
35
+ # Shipped as part of PyArrow, but may not be available on all platforms
36
+ return afs.S3FileSystem is not None
37
+ except ImportError:
38
+ return False
39
+
40
+ def _aws_boto3_available():
41
+ try:
42
+ # AWS SDK
43
+ # These dependencies are provided by the optional [aws] feature
44
+ # For local development, pip install -r requirements_plugins.txt
45
+ import boto3 # noqa
46
+ import botocore.response # noqa
47
+ import botocore.exceptions as aws_ex # noqa
48
+ return True
49
+ except ImportError:
50
+ return False
44
51
 
45
52
 
46
53
  class AwsStorageProvider(IStorageProvider):
@@ -87,23 +94,22 @@ class AwsStorageProvider(IStorageProvider):
87
94
  or self.RUNTIME_FS_DEFAULT
88
95
 
89
96
  def has_arrow_native(self) -> bool:
90
- if self._runtime_fs == self.RUNTIME_FS_ARROW:
91
- return True
92
- elif self._runtime_fs == self.RUNTIME_FS_AUTO:
93
- return afs.S3FileSystem is not None
94
- else:
95
- return False
97
+
98
+ return _aws_arrow_available()
96
99
 
97
100
  def has_file_storage(self) -> bool:
98
- if self._runtime_fs == self.RUNTIME_FS_BOTO3:
99
- return True
100
- elif self._runtime_fs == self.RUNTIME_FS_AUTO:
101
- return afs.S3FileSystem is None
102
- else:
101
+
102
+ # Do not advertise the custom storage implementation if arrow native is available
103
+ if _aws_arrow_available():
103
104
  return False
104
105
 
106
+ return _aws_boto3_available()
107
+
105
108
  def get_arrow_native(self) -> afs.SubTreeFileSystem:
106
109
 
110
+ if not _aws_arrow_available():
111
+ raise ex.EStorage(f"S3 storage setup failed: Plugin for [{self.RUNTIME_FS_ARROW}] is not available")
112
+
107
113
  s3fs_args = self.setup_client_args(self.ARROW_CLIENT_ARGS)
108
114
  s3fs = afs.S3FileSystem(**s3fs_args)
109
115
 
@@ -121,6 +127,9 @@ class AwsStorageProvider(IStorageProvider):
121
127
 
122
128
  def get_file_storage(self) -> IFileStorage:
123
129
 
130
+ if not _aws_boto3_available():
131
+ raise ex.EStorage(f"S3 storage setup failed: Plugin for [{self.RUNTIME_FS_BOTO3}] is not available")
132
+
124
133
  client_args = self.setup_client_args(self.BOTO_CLIENT_ARGS)
125
134
  client_args["service_name"] = "s3"
126
135
 
@@ -179,7 +188,7 @@ class AwsStorageProvider(IStorageProvider):
179
188
  raise ex.EStartup(message)
180
189
 
181
190
 
182
- if __aws_available:
191
+ if _aws_arrow_available() or _aws_boto3_available():
183
192
  plugins.PluginManager.register_plugin(IStorageProvider, AwsStorageProvider, ["S3"])
184
193
 
185
194
 
@@ -190,195 +199,201 @@ if __aws_available:
190
199
  # This is the old implementation that was used before Arrow native was made available
191
200
  # It is likely to be removed in a future release
192
201
 
202
+ if _aws_boto3_available():
193
203
 
194
- class S3ObjectStorage(IFileStorage):
204
+ # These dependencies are provided by the optional [aws] feature
205
+ # For local development, pip install -r requirements_plugins.txt
206
+ import boto3 # noqa
207
+ import botocore.exceptions as aws_ex # noqa
195
208
 
196
- # This is a quick implementation of IFileStorage on S3 using the boto3 AWS SDK
209
+ class S3ObjectStorage(IFileStorage):
197
210
 
198
- def __init__(self, config: cfg.PluginConfig, client_args: dict):
211
+ # This is a quick implementation of IFileStorage on S3 using the boto3 AWS SDK
199
212
 
200
- self._log = _helpers.logger_for_object(self)
213
+ def __init__(self, config: cfg.PluginConfig, client_args: dict):
201
214
 
202
- self._properties = config.properties
203
- self._bucket = _helpers.get_plugin_property(self._properties, AwsStorageProvider.BUCKET_PROPERTY)
204
- self._prefix = _helpers.get_plugin_property(self._properties, AwsStorageProvider.PREFIX_PROPERTY) or ""
215
+ self._log = _helpers.logger_for_object(self)
205
216
 
206
- if self._bucket is None or len(self._bucket.strip()) == 0:
207
- message = f"Missing required config property [{AwsStorageProvider.BUCKET_PROPERTY}] for S3 storage"
208
- self._log.error(message)
209
- raise ex.EConfigParse(message)
217
+ self._properties = config.properties
218
+ self._bucket = _helpers.get_plugin_property(self._properties, AwsStorageProvider.BUCKET_PROPERTY)
219
+ self._prefix = _helpers.get_plugin_property(self._properties, AwsStorageProvider.PREFIX_PROPERTY) or ""
210
220
 
211
- self._client = boto3.client(**client_args)
221
+ if self._bucket is None or len(self._bucket.strip()) == 0:
222
+ message = f"Missing required config property [{AwsStorageProvider.BUCKET_PROPERTY}] for S3 storage"
223
+ self._log.error(message)
224
+ raise ex.EConfigParse(message)
212
225
 
213
- def exists(self, storage_path: str) -> bool:
226
+ self._client = boto3.client(**client_args)
214
227
 
215
- try:
216
- self._log.info(f"EXISTS [{storage_path}]")
228
+ def exists(self, storage_path: str) -> bool:
217
229
 
218
- object_key = self._resolve_path(storage_path)
219
- self._client.head_object(Bucket=self._bucket, Key=object_key)
220
- return True
230
+ try:
231
+ self._log.info(f"EXISTS [{storage_path}]")
221
232
 
222
- except aws_ex.ClientError as error:
223
- aws_code = error.response['Error']['Code']
224
- if aws_code == str(http.HTTPStatus.NOT_FOUND.value): # noqa
225
- return False
226
- raise ex.EStorageRequest(f"Storage error: {str(error)}") from error
233
+ object_key = self._resolve_path(storage_path)
234
+ self._client.head_object(Bucket=self._bucket, Key=object_key)
235
+ return True
227
236
 
228
- def size(self, storage_path: str) -> int:
237
+ except aws_ex.ClientError as error:
238
+ aws_code = error.response['Error']['Code']
239
+ if aws_code == str(http.HTTPStatus.NOT_FOUND.value): # noqa
240
+ return False
241
+ raise ex.EStorageRequest(f"Storage error: {str(error)}") from error
229
242
 
230
- try:
231
- self._log.info(f"SIZE [{storage_path}]")
243
+ def size(self, storage_path: str) -> int:
232
244
 
233
- object_key = self._resolve_path(storage_path)
234
- response = self._client.head_object(Bucket=self._bucket, Key=object_key)
235
- return response['ContentLength']
245
+ try:
246
+ self._log.info(f"SIZE [{storage_path}]")
236
247
 
237
- except aws_ex.ClientError as error:
238
- raise ex.EStorageRequest(f"Storage error: {str(error)}") from error
248
+ object_key = self._resolve_path(storage_path)
249
+ response = self._client.head_object(Bucket=self._bucket, Key=object_key)
250
+ return response['ContentLength']
239
251
 
240
- def stat(self, storage_path: str) -> FileStat:
252
+ except aws_ex.ClientError as error:
253
+ raise ex.EStorageRequest(f"Storage error: {str(error)}") from error
241
254
 
242
- self._log.info(f"STAT [{storage_path}]")
255
+ def stat(self, storage_path: str) -> FileStat:
243
256
 
244
- name = storage_path.split("/")[-1]
257
+ self._log.info(f"STAT [{storage_path}]")
245
258
 
246
- if self.exists(storage_path):
259
+ name = storage_path.split("/")[-1]
247
260
 
248
- # Only OBJECTS can support stat atm
249
- # Handling for directories needs to be changed, as part of refactor onto object storage
250
- size = self.size(storage_path)
251
- return FileStat(name, FileType.FILE, storage_path, size)
261
+ if self.exists(storage_path):
252
262
 
253
- else:
263
+ # Only OBJECTS can support stat atm
264
+ # Handling for directories needs to be changed, as part of refactor onto object storage
265
+ size = self.size(storage_path)
266
+ return FileStat(name, FileType.FILE, storage_path, size)
254
267
 
255
- self.ls(storage_path)
256
- return FileStat(name, FileType.DIRECTORY, storage_path, 0)
268
+ else:
257
269
 
258
- def ls(self, storage_path: str, recursive: bool = False) -> tp.List[FileStat]:
270
+ self.ls(storage_path)
271
+ return FileStat(name, FileType.DIRECTORY, storage_path, 0)
259
272
 
260
- self._log.info(f"LS [{storage_path}]")
273
+ def ls(self, storage_path: str, recursive: bool = False) -> tp.List[FileStat]:
261
274
 
262
- prefix = self._resolve_path(storage_path) + "/"
275
+ self._log.info(f"LS [{storage_path}]")
263
276
 
264
- response = self._client.list_objects_v2(
265
- Bucket=self._bucket,
266
- Prefix=prefix,
267
- Delimiter="/")
277
+ prefix = self._resolve_path(storage_path) + "/"
268
278
 
269
- keys = []
279
+ response = self._client.list_objects_v2(
280
+ Bucket=self._bucket,
281
+ Prefix=prefix,
282
+ Delimiter="/")
270
283
 
271
- if "Contents" not in response and "CommonPrefixes" not in response:
272
- raise ex.EStorageRequest(f"Storage prefix not found: [{storage_path}]")
284
+ keys = []
273
285
 
274
- if "Contents" in response:
275
- for entry in response["Contents"]:
276
- raw_key = entry["Key"]
277
- if raw_key == prefix:
278
- continue
279
- key = raw_key.replace(prefix, "")
280
- size = entry["Size"]
281
- mtime = entry["LastModified "]
282
- stat = FileStat(key, FileType.FILE, raw_key, size, mtime=mtime)
283
- keys.append(stat)
286
+ if "Contents" not in response and "CommonPrefixes" not in response:
287
+ raise ex.EStorageRequest(f"Storage prefix not found: [{storage_path}]")
284
288
 
285
- if "CommonPrefixes" in response:
286
- for raw_prefix in response["CommonPrefixes"]:
287
- common_prefix = raw_prefix.replace(prefix, "")
288
- stat = FileStat(common_prefix, FileType.DIRECTORY, raw_prefix, 0)
289
- keys.append(stat)
289
+ if "Contents" in response:
290
+ for entry in response["Contents"]:
291
+ raw_key = entry["Key"]
292
+ if raw_key == prefix:
293
+ continue
294
+ key = raw_key.replace(prefix, "")
295
+ size = entry["Size"]
296
+ mtime = entry["LastModified "]
297
+ stat = FileStat(key, FileType.FILE, raw_key, size, mtime=mtime)
298
+ keys.append(stat)
290
299
 
291
- return keys
300
+ if "CommonPrefixes" in response:
301
+ for raw_prefix in response["CommonPrefixes"]:
302
+ common_prefix = raw_prefix.replace(prefix, "")
303
+ stat = FileStat(common_prefix, FileType.DIRECTORY, raw_prefix, 0)
304
+ keys.append(stat)
292
305
 
293
- def mkdir(self, storage_path: str, recursive: bool = False):
306
+ return keys
294
307
 
295
- self._log.info(f"MKDIR [{storage_path}]")
308
+ def mkdir(self, storage_path: str, recursive: bool = False):
296
309
 
297
- # No-op in object storage
298
- pass
310
+ self._log.info(f"MKDIR [{storage_path}]")
299
311
 
300
- def rm(self, storage_path: str):
312
+ # No-op in object storage
313
+ pass
301
314
 
302
- try:
303
- self._log.info(f"RM [{storage_path}]")
315
+ def rm(self, storage_path: str):
304
316
 
305
- object_key = self._resolve_path(storage_path)
306
- self._client.delete_object(Bucket=self._bucket, Key=object_key)
317
+ try:
318
+ self._log.info(f"RM [{storage_path}]")
307
319
 
308
- except aws_ex.ClientError as error:
309
- raise ex.EStorageRequest(f"Storage error: {str(error)}") from error
320
+ object_key = self._resolve_path(storage_path)
321
+ self._client.delete_object(Bucket=self._bucket, Key=object_key)
310
322
 
311
- def rmdir(self, storage_path: str):
323
+ except aws_ex.ClientError as error:
324
+ raise ex.EStorageRequest(f"Storage error: {str(error)}") from error
312
325
 
313
- raise RuntimeError("RMDIR (recursive) not available for S3 storage")
326
+ def rmdir(self, storage_path: str):
314
327
 
315
- def read_bytes(self, storage_path: str) -> bytes:
328
+ raise RuntimeError("RMDIR (recursive) not available for S3 storage")
316
329
 
317
- self._log.info(f"READ BYTES [{storage_path}]")
330
+ def read_bytes(self, storage_path: str) -> bytes:
318
331
 
319
- body = self._read_impl(storage_path)
320
- return body.read()
332
+ self._log.info(f"READ BYTES [{storage_path}]")
321
333
 
322
- def read_byte_stream(self, storage_path: str) -> tp.BinaryIO:
334
+ body = self._read_impl(storage_path)
335
+ return body.read()
323
336
 
324
- self._log.info(f"READ BYTE STREAM [{storage_path}]")
337
+ def read_byte_stream(self, storage_path: str) -> tp.BinaryIO:
325
338
 
326
- data = self.read_bytes(storage_path)
327
- return io.BytesIO(data)
339
+ self._log.info(f"READ BYTE STREAM [{storage_path}]")
328
340
 
329
- def _read_impl(self, storage_path: str):
341
+ data = self.read_bytes(storage_path)
342
+ return io.BytesIO(data)
330
343
 
331
- try:
344
+ def _read_impl(self, storage_path: str):
332
345
 
333
- object_key = self._resolve_path(storage_path)
334
- response = self._client.get_object(Bucket=self._bucket, Key=object_key)
335
- return response['Body']
346
+ try:
336
347
 
337
- except aws_ex.ClientError as error:
338
- raise ex.EStorageRequest(f"Storage error: {str(error)}") from error
348
+ object_key = self._resolve_path(storage_path)
349
+ response = self._client.get_object(Bucket=self._bucket, Key=object_key)
350
+ return response['Body']
339
351
 
340
- def write_bytes(self, storage_path: str, data: bytes):
352
+ except aws_ex.ClientError as error:
353
+ raise ex.EStorageRequest(f"Storage error: {str(error)}") from error
341
354
 
342
- try:
343
- self._log.info(f"WRITE BYTES [{storage_path}]")
355
+ def write_bytes(self, storage_path: str, data: bytes):
344
356
 
345
- object_key = self._resolve_path(storage_path)
357
+ try:
358
+ self._log.info(f"WRITE BYTES [{storage_path}]")
346
359
 
347
- self._client.put_object(
348
- Bucket=self._bucket,
349
- Key=object_key,
350
- Body=data)
360
+ object_key = self._resolve_path(storage_path)
361
+
362
+ self._client.put_object(
363
+ Bucket=self._bucket,
364
+ Key=object_key,
365
+ Body=data)
351
366
 
352
- except aws_ex.ClientError as error:
353
- raise ex.EStorageRequest(f"Storage error: {str(error)}") from error
367
+ except aws_ex.ClientError as error:
368
+ raise ex.EStorageRequest(f"Storage error: {str(error)}") from error
354
369
 
355
- def write_byte_stream(self, storage_path: str) -> tp.BinaryIO:
370
+ def write_byte_stream(self, storage_path: str) -> tp.BinaryIO:
356
371
 
357
- self._log.info(f"WRITE BYTE STREAM [{storage_path}]")
372
+ self._log.info(f"WRITE BYTE STREAM [{storage_path}]")
358
373
 
359
- return self._AwsWriteBuf(self, storage_path)
374
+ return self._AwsWriteBuf(self, storage_path)
360
375
 
361
- class _AwsWriteBuf(io.BytesIO):
376
+ class _AwsWriteBuf(io.BytesIO):
362
377
 
363
- def __init__(self, storage, storage_path):
364
- super().__init__()
365
- self._storage = storage
366
- self._storage_path = storage_path
367
- self._written = False
378
+ def __init__(self, storage, storage_path):
379
+ super().__init__()
380
+ self._storage = storage
381
+ self._storage_path = storage_path
382
+ self._written = False
368
383
 
369
- def close(self):
370
- if not self._written:
371
- self.seek(0)
372
- data = self.read()
373
- self._storage.write_bytes(self._storage_path, data)
374
- self._written = True
384
+ def close(self):
385
+ if not self._written:
386
+ self.seek(0)
387
+ data = self.read()
388
+ self._storage.write_bytes(self._storage_path, data)
389
+ self._written = True
375
390
 
376
- def _resolve_path(self, storage_path: str) -> str:
391
+ def _resolve_path(self, storage_path: str) -> str:
377
392
 
378
- if self._prefix is None or self._prefix.strip() == "":
379
- return storage_path
393
+ if self._prefix is None or self._prefix.strip() == "":
394
+ return storage_path
380
395
 
381
- separator = "" if self._prefix.endswith("/") else "/"
382
- full_path = self._prefix + separator + storage_path
396
+ separator = "" if self._prefix.endswith("/") else "/"
397
+ full_path = self._prefix + separator + storage_path
383
398
 
384
- return full_path[1:] if full_path.startswith("/") else full_path
399
+ return full_path[1:] if full_path.startswith("/") else full_path
@@ -23,20 +23,21 @@ from tracdap.rt.ext.storage import *
23
23
 
24
24
  import pyarrow.fs as afs
25
25
 
26
- try:
27
- # These dependencies are provided by the optional [azure] feature
28
- # For local development, pip install -r requirements_plugins.txt
29
- import azure.storage.blob as az_blob # noqa
30
- import adlfs # noqa
31
- __azure_available = True
32
- except ImportError:
33
- adlfs = None
34
- __azure_available = False
35
-
36
26
  # Set of common helpers across the core plugins (do not reference rt._impl)
37
27
  from . import _helpers
38
28
 
39
29
 
30
+ def _azure_fsspec_available():
31
+ try:
32
+ # These dependencies are provided by the optional [azure] feature
33
+ # For local development, pip install -r requirements_plugins.txt
34
+ import azure.storage.blob as az_blob # noqa
35
+ import adlfs # noqa
36
+ return True
37
+ except ImportError:
38
+ return False
39
+
40
+
40
41
  class AzureBlobStorageProvider(IStorageProvider):
41
42
 
42
43
  # This client depends on the Azure fsspec implementation, since there is no native implementation from Arrow
@@ -100,6 +101,11 @@ class AzureBlobStorageProvider(IStorageProvider):
100
101
 
101
102
  def create_fsspec(self) -> afs.FileSystem:
102
103
 
104
+ if not _azure_fsspec_available():
105
+ raise ex.EStorage(f"BLOB storage setup failed: Plugin for [{self.RUNTIME_FS_FSSPEC}] is not available")
106
+
107
+ import adlfs # noqa
108
+
103
109
  azure_fsspec_args = self.setup_client_args()
104
110
  azure_fsspec = adlfs.AzureBlobFileSystem(**azure_fsspec_args)
105
111
 
@@ -152,5 +158,5 @@ class AzureBlobStorageProvider(IStorageProvider):
152
158
 
153
159
 
154
160
  # Only register the plugin if the [azure] feature is available
155
- if __azure_available:
161
+ if _azure_fsspec_available():
156
162
  plugins.PluginManager.register_plugin(IStorageProvider, AzureBlobStorageProvider, ["BLOB"])