tracdap-runtime 0.8.0rc2__py3-none-any.whl → 0.9.0b2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tracdap/rt/_impl/core/config_parser.py +29 -3
- tracdap/rt/_impl/core/data.py +627 -40
- tracdap/rt/_impl/core/repos.py +17 -8
- tracdap/rt/_impl/core/storage.py +25 -13
- tracdap/rt/_impl/core/struct.py +254 -60
- tracdap/rt/_impl/core/util.py +125 -11
- tracdap/rt/_impl/exec/context.py +35 -8
- tracdap/rt/_impl/exec/dev_mode.py +169 -127
- tracdap/rt/_impl/exec/engine.py +203 -140
- tracdap/rt/_impl/exec/functions.py +228 -263
- tracdap/rt/_impl/exec/graph.py +141 -126
- tracdap/rt/_impl/exec/graph_builder.py +428 -449
- tracdap/rt/_impl/grpc/codec.py +8 -13
- tracdap/rt/_impl/grpc/server.py +7 -7
- tracdap/rt/_impl/grpc/tracdap/api/internal/runtime_pb2.py +25 -18
- tracdap/rt/_impl/grpc/tracdap/api/internal/runtime_pb2.pyi +27 -9
- tracdap/rt/_impl/grpc/tracdap/metadata/common_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/config_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/custom_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/data_pb2.py +37 -35
- tracdap/rt/_impl/grpc/tracdap/metadata/data_pb2.pyi +37 -43
- tracdap/rt/_impl/grpc/tracdap/metadata/file_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/flow_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/job_pb2.py +67 -63
- tracdap/rt/_impl/grpc/tracdap/metadata/job_pb2.pyi +11 -2
- tracdap/rt/_impl/grpc/tracdap/metadata/model_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/object_id_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/object_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/resource_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/search_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/storage_pb2.py +11 -9
- tracdap/rt/_impl/grpc/tracdap/metadata/storage_pb2.pyi +11 -2
- tracdap/rt/_impl/grpc/tracdap/metadata/tag_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/tag_update_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/type_pb2.py +23 -19
- tracdap/rt/_impl/grpc/tracdap/metadata/type_pb2.pyi +15 -2
- tracdap/rt/_impl/runtime.py +3 -9
- tracdap/rt/_impl/static_api.py +5 -6
- tracdap/rt/_plugins/format_csv.py +2 -2
- tracdap/rt/_plugins/repo_git.py +56 -11
- tracdap/rt/_plugins/storage_aws.py +165 -150
- tracdap/rt/_plugins/storage_azure.py +17 -11
- tracdap/rt/_plugins/storage_gcp.py +35 -18
- tracdap/rt/_version.py +1 -1
- tracdap/rt/api/model_api.py +45 -0
- tracdap/rt/config/__init__.py +7 -9
- tracdap/rt/config/common.py +3 -14
- tracdap/rt/config/job.py +17 -3
- tracdap/rt/config/platform.py +9 -32
- tracdap/rt/config/result.py +8 -4
- tracdap/rt/config/runtime.py +5 -10
- tracdap/rt/config/tenant.py +28 -0
- tracdap/rt/launch/cli.py +0 -8
- tracdap/rt/launch/launch.py +1 -3
- tracdap/rt/metadata/__init__.py +35 -35
- tracdap/rt/metadata/data.py +19 -31
- tracdap/rt/metadata/job.py +3 -1
- tracdap/rt/metadata/storage.py +9 -0
- tracdap/rt/metadata/type.py +9 -5
- {tracdap_runtime-0.8.0rc2.dist-info → tracdap_runtime-0.9.0b2.dist-info}/METADATA +5 -3
- {tracdap_runtime-0.8.0rc2.dist-info → tracdap_runtime-0.9.0b2.dist-info}/RECORD +64 -63
- {tracdap_runtime-0.8.0rc2.dist-info → tracdap_runtime-0.9.0b2.dist-info}/WHEEL +1 -1
- {tracdap_runtime-0.8.0rc2.dist-info → tracdap_runtime-0.9.0b2.dist-info}/licenses/LICENSE +0 -0
- {tracdap_runtime-0.8.0rc2.dist-info → tracdap_runtime-0.9.0b2.dist-info}/top_level.txt +0 -0
tracdap/rt/_plugins/repo_git.py
CHANGED
@@ -23,6 +23,7 @@ import time
|
|
23
23
|
import dulwich.repo as git_repo
|
24
24
|
import dulwich.client as git_client
|
25
25
|
import dulwich.index as git_index
|
26
|
+
import urllib3.exceptions # noqa
|
26
27
|
|
27
28
|
import tracdap.rt.metadata as meta
|
28
29
|
import tracdap.rt.exceptions as ex
|
@@ -75,20 +76,45 @@ class GitRepository(IModelRepository):
|
|
75
76
|
|
76
77
|
def do_checkout(self, model_def: meta.ModelDefinition, checkout_dir: pathlib.Path) -> pathlib.Path:
|
77
78
|
|
78
|
-
|
79
|
-
f"Git checkout: repo = [{model_def.repository}], " +
|
80
|
-
f"group = [{model_def.packageGroup}], package = [{model_def.package}], version = [{model_def.version}]")
|
79
|
+
try:
|
81
80
|
|
82
|
-
|
81
|
+
self._log.info(
|
82
|
+
f"Git checkout: repo = [{model_def.repository}], " +
|
83
|
+
f"group = [{model_def.packageGroup}], package = [{model_def.package}], version = [{model_def.version}]")
|
83
84
|
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
85
|
+
self._log.info(f"Checkout location: [{checkout_dir}]")
|
86
|
+
|
87
|
+
if self._native_git:
|
88
|
+
package_path = self._do_native_checkout(model_def, checkout_dir)
|
89
|
+
else:
|
90
|
+
package_path = self._do_python_checkout(model_def, checkout_dir)
|
91
|
+
|
92
|
+
self._log.info(f"Git checkout succeeded for {model_def.package} {model_def.version}")
|
93
|
+
|
94
|
+
return package_path
|
95
|
+
|
96
|
+
except Exception as e:
|
97
|
+
|
98
|
+
error = e
|
99
|
+
|
100
|
+
# For retry failures, try to find the original cause
|
101
|
+
while e.__cause__ is not None:
|
102
|
+
if isinstance(e, urllib3.exceptions.MaxRetryError):
|
103
|
+
error = e.__cause__
|
104
|
+
break
|
105
|
+
else:
|
106
|
+
e = e.__cause__
|
107
|
+
|
108
|
+
# Try to sanitize error messages from urllib3
|
109
|
+
if isinstance(error, urllib3.exceptions.HTTPError):
|
110
|
+
detail = self._clean_urllib3_error(error)
|
111
|
+
else:
|
112
|
+
detail = str(error)
|
88
113
|
|
89
|
-
|
114
|
+
message = f"Failed to check out [{model_def.repository}]: {detail}"
|
90
115
|
|
91
|
-
|
116
|
+
self._log.error(message)
|
117
|
+
raise ex.EModelRepo(message) from error
|
92
118
|
|
93
119
|
def _do_native_checkout(self, model_def: meta.ModelDefinition, checkout_dir: pathlib.Path) -> pathlib.Path:
|
94
120
|
|
@@ -150,10 +176,15 @@ class GitRepository(IModelRepository):
|
|
150
176
|
for line in cmd_err:
|
151
177
|
self._log.info(line)
|
152
178
|
|
153
|
-
|
179
|
+
elif cmd_err:
|
180
|
+
|
154
181
|
for line in cmd_err:
|
155
182
|
self._log.error(line)
|
156
183
|
|
184
|
+
raise ex.EModelRepo(cmd_err[-1])
|
185
|
+
|
186
|
+
else:
|
187
|
+
|
157
188
|
error_msg = f"Git checkout failed for {model_def.package} {model_def.version}"
|
158
189
|
self._log.error(error_msg)
|
159
190
|
raise ex.EModelRepo(error_msg)
|
@@ -265,6 +296,20 @@ class GitRepository(IModelRepository):
|
|
265
296
|
def _ref_key(key):
|
266
297
|
return bytes(key, "ascii")
|
267
298
|
|
299
|
+
@classmethod
|
300
|
+
def _clean_urllib3_error(cls, error: urllib3.exceptions.HTTPError):
|
301
|
+
|
302
|
+
match = cls._URLLIB3_ERROR_PATTERN.match(str(error))
|
303
|
+
|
304
|
+
# Best efforts to clean up the message, fall back on str(error)
|
305
|
+
if match:
|
306
|
+
return match.group(1)
|
307
|
+
else:
|
308
|
+
return str(error)
|
309
|
+
|
310
|
+
# Error message format is like this:
|
311
|
+
# <pkg.ClassName object at 0xXXXXXXX>: Message
|
312
|
+
_URLLIB3_ERROR_PATTERN = re.compile(r"<[^>]*>: (.*)")
|
268
313
|
|
269
314
|
# Register plugin
|
270
315
|
plugins.PluginManager.register_plugin(IModelRepository, GitRepository, ["git"])
|
@@ -30,17 +30,24 @@ from pyarrow import fs as afs
|
|
30
30
|
from . import _helpers
|
31
31
|
|
32
32
|
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
33
|
+
def _aws_arrow_available():
|
34
|
+
try:
|
35
|
+
# Shipped as part of PyArrow, but may not be available on all platforms
|
36
|
+
return afs.S3FileSystem is not None
|
37
|
+
except ImportError:
|
38
|
+
return False
|
39
|
+
|
40
|
+
def _aws_boto3_available():
|
41
|
+
try:
|
42
|
+
# AWS SDK
|
43
|
+
# These dependencies are provided by the optional [aws] feature
|
44
|
+
# For local development, pip install -r requirements_plugins.txt
|
45
|
+
import boto3 # noqa
|
46
|
+
import botocore.response # noqa
|
47
|
+
import botocore.exceptions as aws_ex # noqa
|
48
|
+
return True
|
49
|
+
except ImportError:
|
50
|
+
return False
|
44
51
|
|
45
52
|
|
46
53
|
class AwsStorageProvider(IStorageProvider):
|
@@ -87,23 +94,22 @@ class AwsStorageProvider(IStorageProvider):
|
|
87
94
|
or self.RUNTIME_FS_DEFAULT
|
88
95
|
|
89
96
|
def has_arrow_native(self) -> bool:
|
90
|
-
|
91
|
-
|
92
|
-
elif self._runtime_fs == self.RUNTIME_FS_AUTO:
|
93
|
-
return afs.S3FileSystem is not None
|
94
|
-
else:
|
95
|
-
return False
|
97
|
+
|
98
|
+
return _aws_arrow_available()
|
96
99
|
|
97
100
|
def has_file_storage(self) -> bool:
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
return afs.S3FileSystem is None
|
102
|
-
else:
|
101
|
+
|
102
|
+
# Do not advertise the custom storage implementation if arrow native is available
|
103
|
+
if _aws_arrow_available():
|
103
104
|
return False
|
104
105
|
|
106
|
+
return _aws_boto3_available()
|
107
|
+
|
105
108
|
def get_arrow_native(self) -> afs.SubTreeFileSystem:
|
106
109
|
|
110
|
+
if not _aws_arrow_available():
|
111
|
+
raise ex.EStorage(f"S3 storage setup failed: Plugin for [{self.RUNTIME_FS_ARROW}] is not available")
|
112
|
+
|
107
113
|
s3fs_args = self.setup_client_args(self.ARROW_CLIENT_ARGS)
|
108
114
|
s3fs = afs.S3FileSystem(**s3fs_args)
|
109
115
|
|
@@ -121,6 +127,9 @@ class AwsStorageProvider(IStorageProvider):
|
|
121
127
|
|
122
128
|
def get_file_storage(self) -> IFileStorage:
|
123
129
|
|
130
|
+
if not _aws_boto3_available():
|
131
|
+
raise ex.EStorage(f"S3 storage setup failed: Plugin for [{self.RUNTIME_FS_BOTO3}] is not available")
|
132
|
+
|
124
133
|
client_args = self.setup_client_args(self.BOTO_CLIENT_ARGS)
|
125
134
|
client_args["service_name"] = "s3"
|
126
135
|
|
@@ -179,7 +188,7 @@ class AwsStorageProvider(IStorageProvider):
|
|
179
188
|
raise ex.EStartup(message)
|
180
189
|
|
181
190
|
|
182
|
-
if
|
191
|
+
if _aws_arrow_available() or _aws_boto3_available():
|
183
192
|
plugins.PluginManager.register_plugin(IStorageProvider, AwsStorageProvider, ["S3"])
|
184
193
|
|
185
194
|
|
@@ -190,195 +199,201 @@ if __aws_available:
|
|
190
199
|
# This is the old implementation that was used before Arrow native was made available
|
191
200
|
# It is likely to be removed in a future release
|
192
201
|
|
202
|
+
if _aws_boto3_available():
|
193
203
|
|
194
|
-
|
204
|
+
# These dependencies are provided by the optional [aws] feature
|
205
|
+
# For local development, pip install -r requirements_plugins.txt
|
206
|
+
import boto3 # noqa
|
207
|
+
import botocore.exceptions as aws_ex # noqa
|
195
208
|
|
196
|
-
|
209
|
+
class S3ObjectStorage(IFileStorage):
|
197
210
|
|
198
|
-
|
211
|
+
# This is a quick implementation of IFileStorage on S3 using the boto3 AWS SDK
|
199
212
|
|
200
|
-
self.
|
213
|
+
def __init__(self, config: cfg.PluginConfig, client_args: dict):
|
201
214
|
|
202
|
-
|
203
|
-
self._bucket = _helpers.get_plugin_property(self._properties, AwsStorageProvider.BUCKET_PROPERTY)
|
204
|
-
self._prefix = _helpers.get_plugin_property(self._properties, AwsStorageProvider.PREFIX_PROPERTY) or ""
|
215
|
+
self._log = _helpers.logger_for_object(self)
|
205
216
|
|
206
|
-
|
207
|
-
|
208
|
-
self.
|
209
|
-
raise ex.EConfigParse(message)
|
217
|
+
self._properties = config.properties
|
218
|
+
self._bucket = _helpers.get_plugin_property(self._properties, AwsStorageProvider.BUCKET_PROPERTY)
|
219
|
+
self._prefix = _helpers.get_plugin_property(self._properties, AwsStorageProvider.PREFIX_PROPERTY) or ""
|
210
220
|
|
211
|
-
|
221
|
+
if self._bucket is None or len(self._bucket.strip()) == 0:
|
222
|
+
message = f"Missing required config property [{AwsStorageProvider.BUCKET_PROPERTY}] for S3 storage"
|
223
|
+
self._log.error(message)
|
224
|
+
raise ex.EConfigParse(message)
|
212
225
|
|
213
|
-
|
226
|
+
self._client = boto3.client(**client_args)
|
214
227
|
|
215
|
-
|
216
|
-
self._log.info(f"EXISTS [{storage_path}]")
|
228
|
+
def exists(self, storage_path: str) -> bool:
|
217
229
|
|
218
|
-
|
219
|
-
|
220
|
-
return True
|
230
|
+
try:
|
231
|
+
self._log.info(f"EXISTS [{storage_path}]")
|
221
232
|
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
return False
|
226
|
-
raise ex.EStorageRequest(f"Storage error: {str(error)}") from error
|
233
|
+
object_key = self._resolve_path(storage_path)
|
234
|
+
self._client.head_object(Bucket=self._bucket, Key=object_key)
|
235
|
+
return True
|
227
236
|
|
228
|
-
|
237
|
+
except aws_ex.ClientError as error:
|
238
|
+
aws_code = error.response['Error']['Code']
|
239
|
+
if aws_code == str(http.HTTPStatus.NOT_FOUND.value): # noqa
|
240
|
+
return False
|
241
|
+
raise ex.EStorageRequest(f"Storage error: {str(error)}") from error
|
229
242
|
|
230
|
-
|
231
|
-
self._log.info(f"SIZE [{storage_path}]")
|
243
|
+
def size(self, storage_path: str) -> int:
|
232
244
|
|
233
|
-
|
234
|
-
|
235
|
-
return response['ContentLength']
|
245
|
+
try:
|
246
|
+
self._log.info(f"SIZE [{storage_path}]")
|
236
247
|
|
237
|
-
|
238
|
-
|
248
|
+
object_key = self._resolve_path(storage_path)
|
249
|
+
response = self._client.head_object(Bucket=self._bucket, Key=object_key)
|
250
|
+
return response['ContentLength']
|
239
251
|
|
240
|
-
|
252
|
+
except aws_ex.ClientError as error:
|
253
|
+
raise ex.EStorageRequest(f"Storage error: {str(error)}") from error
|
241
254
|
|
242
|
-
self
|
255
|
+
def stat(self, storage_path: str) -> FileStat:
|
243
256
|
|
244
|
-
|
257
|
+
self._log.info(f"STAT [{storage_path}]")
|
245
258
|
|
246
|
-
|
259
|
+
name = storage_path.split("/")[-1]
|
247
260
|
|
248
|
-
|
249
|
-
# Handling for directories needs to be changed, as part of refactor onto object storage
|
250
|
-
size = self.size(storage_path)
|
251
|
-
return FileStat(name, FileType.FILE, storage_path, size)
|
261
|
+
if self.exists(storage_path):
|
252
262
|
|
253
|
-
|
263
|
+
# Only OBJECTS can support stat atm
|
264
|
+
# Handling for directories needs to be changed, as part of refactor onto object storage
|
265
|
+
size = self.size(storage_path)
|
266
|
+
return FileStat(name, FileType.FILE, storage_path, size)
|
254
267
|
|
255
|
-
|
256
|
-
return FileStat(name, FileType.DIRECTORY, storage_path, 0)
|
268
|
+
else:
|
257
269
|
|
258
|
-
|
270
|
+
self.ls(storage_path)
|
271
|
+
return FileStat(name, FileType.DIRECTORY, storage_path, 0)
|
259
272
|
|
260
|
-
self.
|
273
|
+
def ls(self, storage_path: str, recursive: bool = False) -> tp.List[FileStat]:
|
261
274
|
|
262
|
-
|
275
|
+
self._log.info(f"LS [{storage_path}]")
|
263
276
|
|
264
|
-
|
265
|
-
Bucket=self._bucket,
|
266
|
-
Prefix=prefix,
|
267
|
-
Delimiter="/")
|
277
|
+
prefix = self._resolve_path(storage_path) + "/"
|
268
278
|
|
269
|
-
|
279
|
+
response = self._client.list_objects_v2(
|
280
|
+
Bucket=self._bucket,
|
281
|
+
Prefix=prefix,
|
282
|
+
Delimiter="/")
|
270
283
|
|
271
|
-
|
272
|
-
raise ex.EStorageRequest(f"Storage prefix not found: [{storage_path}]")
|
284
|
+
keys = []
|
273
285
|
|
274
|
-
|
275
|
-
|
276
|
-
raw_key = entry["Key"]
|
277
|
-
if raw_key == prefix:
|
278
|
-
continue
|
279
|
-
key = raw_key.replace(prefix, "")
|
280
|
-
size = entry["Size"]
|
281
|
-
mtime = entry["LastModified "]
|
282
|
-
stat = FileStat(key, FileType.FILE, raw_key, size, mtime=mtime)
|
283
|
-
keys.append(stat)
|
286
|
+
if "Contents" not in response and "CommonPrefixes" not in response:
|
287
|
+
raise ex.EStorageRequest(f"Storage prefix not found: [{storage_path}]")
|
284
288
|
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
289
|
+
if "Contents" in response:
|
290
|
+
for entry in response["Contents"]:
|
291
|
+
raw_key = entry["Key"]
|
292
|
+
if raw_key == prefix:
|
293
|
+
continue
|
294
|
+
key = raw_key.replace(prefix, "")
|
295
|
+
size = entry["Size"]
|
296
|
+
mtime = entry["LastModified "]
|
297
|
+
stat = FileStat(key, FileType.FILE, raw_key, size, mtime=mtime)
|
298
|
+
keys.append(stat)
|
290
299
|
|
291
|
-
|
300
|
+
if "CommonPrefixes" in response:
|
301
|
+
for raw_prefix in response["CommonPrefixes"]:
|
302
|
+
common_prefix = raw_prefix.replace(prefix, "")
|
303
|
+
stat = FileStat(common_prefix, FileType.DIRECTORY, raw_prefix, 0)
|
304
|
+
keys.append(stat)
|
292
305
|
|
293
|
-
|
306
|
+
return keys
|
294
307
|
|
295
|
-
self
|
308
|
+
def mkdir(self, storage_path: str, recursive: bool = False):
|
296
309
|
|
297
|
-
|
298
|
-
pass
|
310
|
+
self._log.info(f"MKDIR [{storage_path}]")
|
299
311
|
|
300
|
-
|
312
|
+
# No-op in object storage
|
313
|
+
pass
|
301
314
|
|
302
|
-
|
303
|
-
self._log.info(f"RM [{storage_path}]")
|
315
|
+
def rm(self, storage_path: str):
|
304
316
|
|
305
|
-
|
306
|
-
|
317
|
+
try:
|
318
|
+
self._log.info(f"RM [{storage_path}]")
|
307
319
|
|
308
|
-
|
309
|
-
|
320
|
+
object_key = self._resolve_path(storage_path)
|
321
|
+
self._client.delete_object(Bucket=self._bucket, Key=object_key)
|
310
322
|
|
311
|
-
|
323
|
+
except aws_ex.ClientError as error:
|
324
|
+
raise ex.EStorageRequest(f"Storage error: {str(error)}") from error
|
312
325
|
|
313
|
-
|
326
|
+
def rmdir(self, storage_path: str):
|
314
327
|
|
315
|
-
|
328
|
+
raise RuntimeError("RMDIR (recursive) not available for S3 storage")
|
316
329
|
|
317
|
-
self
|
330
|
+
def read_bytes(self, storage_path: str) -> bytes:
|
318
331
|
|
319
|
-
|
320
|
-
return body.read()
|
332
|
+
self._log.info(f"READ BYTES [{storage_path}]")
|
321
333
|
|
322
|
-
|
334
|
+
body = self._read_impl(storage_path)
|
335
|
+
return body.read()
|
323
336
|
|
324
|
-
self
|
337
|
+
def read_byte_stream(self, storage_path: str) -> tp.BinaryIO:
|
325
338
|
|
326
|
-
|
327
|
-
return io.BytesIO(data)
|
339
|
+
self._log.info(f"READ BYTE STREAM [{storage_path}]")
|
328
340
|
|
329
|
-
|
341
|
+
data = self.read_bytes(storage_path)
|
342
|
+
return io.BytesIO(data)
|
330
343
|
|
331
|
-
|
344
|
+
def _read_impl(self, storage_path: str):
|
332
345
|
|
333
|
-
|
334
|
-
response = self._client.get_object(Bucket=self._bucket, Key=object_key)
|
335
|
-
return response['Body']
|
346
|
+
try:
|
336
347
|
|
337
|
-
|
338
|
-
|
348
|
+
object_key = self._resolve_path(storage_path)
|
349
|
+
response = self._client.get_object(Bucket=self._bucket, Key=object_key)
|
350
|
+
return response['Body']
|
339
351
|
|
340
|
-
|
352
|
+
except aws_ex.ClientError as error:
|
353
|
+
raise ex.EStorageRequest(f"Storage error: {str(error)}") from error
|
341
354
|
|
342
|
-
|
343
|
-
self._log.info(f"WRITE BYTES [{storage_path}]")
|
355
|
+
def write_bytes(self, storage_path: str, data: bytes):
|
344
356
|
|
345
|
-
|
357
|
+
try:
|
358
|
+
self._log.info(f"WRITE BYTES [{storage_path}]")
|
346
359
|
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
360
|
+
object_key = self._resolve_path(storage_path)
|
361
|
+
|
362
|
+
self._client.put_object(
|
363
|
+
Bucket=self._bucket,
|
364
|
+
Key=object_key,
|
365
|
+
Body=data)
|
351
366
|
|
352
|
-
|
353
|
-
|
367
|
+
except aws_ex.ClientError as error:
|
368
|
+
raise ex.EStorageRequest(f"Storage error: {str(error)}") from error
|
354
369
|
|
355
|
-
|
370
|
+
def write_byte_stream(self, storage_path: str) -> tp.BinaryIO:
|
356
371
|
|
357
|
-
|
372
|
+
self._log.info(f"WRITE BYTE STREAM [{storage_path}]")
|
358
373
|
|
359
|
-
|
374
|
+
return self._AwsWriteBuf(self, storage_path)
|
360
375
|
|
361
|
-
|
376
|
+
class _AwsWriteBuf(io.BytesIO):
|
362
377
|
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
378
|
+
def __init__(self, storage, storage_path):
|
379
|
+
super().__init__()
|
380
|
+
self._storage = storage
|
381
|
+
self._storage_path = storage_path
|
382
|
+
self._written = False
|
368
383
|
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
384
|
+
def close(self):
|
385
|
+
if not self._written:
|
386
|
+
self.seek(0)
|
387
|
+
data = self.read()
|
388
|
+
self._storage.write_bytes(self._storage_path, data)
|
389
|
+
self._written = True
|
375
390
|
|
376
|
-
|
391
|
+
def _resolve_path(self, storage_path: str) -> str:
|
377
392
|
|
378
|
-
|
379
|
-
|
393
|
+
if self._prefix is None or self._prefix.strip() == "":
|
394
|
+
return storage_path
|
380
395
|
|
381
|
-
|
382
|
-
|
396
|
+
separator = "" if self._prefix.endswith("/") else "/"
|
397
|
+
full_path = self._prefix + separator + storage_path
|
383
398
|
|
384
|
-
|
399
|
+
return full_path[1:] if full_path.startswith("/") else full_path
|
@@ -23,20 +23,21 @@ from tracdap.rt.ext.storage import *
|
|
23
23
|
|
24
24
|
import pyarrow.fs as afs
|
25
25
|
|
26
|
-
try:
|
27
|
-
# These dependencies are provided by the optional [azure] feature
|
28
|
-
# For local development, pip install -r requirements_plugins.txt
|
29
|
-
import azure.storage.blob as az_blob # noqa
|
30
|
-
import adlfs # noqa
|
31
|
-
__azure_available = True
|
32
|
-
except ImportError:
|
33
|
-
adlfs = None
|
34
|
-
__azure_available = False
|
35
|
-
|
36
26
|
# Set of common helpers across the core plugins (do not reference rt._impl)
|
37
27
|
from . import _helpers
|
38
28
|
|
39
29
|
|
30
|
+
def _azure_fsspec_available():
|
31
|
+
try:
|
32
|
+
# These dependencies are provided by the optional [azure] feature
|
33
|
+
# For local development, pip install -r requirements_plugins.txt
|
34
|
+
import azure.storage.blob as az_blob # noqa
|
35
|
+
import adlfs # noqa
|
36
|
+
return True
|
37
|
+
except ImportError:
|
38
|
+
return False
|
39
|
+
|
40
|
+
|
40
41
|
class AzureBlobStorageProvider(IStorageProvider):
|
41
42
|
|
42
43
|
# This client depends on the Azure fsspec implementation, since there is no native implementation from Arrow
|
@@ -100,6 +101,11 @@ class AzureBlobStorageProvider(IStorageProvider):
|
|
100
101
|
|
101
102
|
def create_fsspec(self) -> afs.FileSystem:
|
102
103
|
|
104
|
+
if not _azure_fsspec_available():
|
105
|
+
raise ex.EStorage(f"BLOB storage setup failed: Plugin for [{self.RUNTIME_FS_FSSPEC}] is not available")
|
106
|
+
|
107
|
+
import adlfs # noqa
|
108
|
+
|
103
109
|
azure_fsspec_args = self.setup_client_args()
|
104
110
|
azure_fsspec = adlfs.AzureBlobFileSystem(**azure_fsspec_args)
|
105
111
|
|
@@ -152,5 +158,5 @@ class AzureBlobStorageProvider(IStorageProvider):
|
|
152
158
|
|
153
159
|
|
154
160
|
# Only register the plugin if the [azure] feature is available
|
155
|
-
if
|
161
|
+
if _azure_fsspec_available():
|
156
162
|
plugins.PluginManager.register_plugin(IStorageProvider, AzureBlobStorageProvider, ["BLOB"])
|