tracdap-runtime 0.9.0b1__py3-none-any.whl → 0.9.0b2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tracdap/rt/_impl/core/config_parser.py +29 -3
- tracdap/rt/_impl/core/data.py +93 -51
- tracdap/rt/_impl/core/repos.py +15 -13
- tracdap/rt/_impl/core/storage.py +17 -12
- tracdap/rt/_impl/core/struct.py +254 -60
- tracdap/rt/_impl/core/util.py +94 -23
- tracdap/rt/_impl/exec/context.py +35 -8
- tracdap/rt/_impl/exec/dev_mode.py +60 -40
- tracdap/rt/_impl/exec/engine.py +44 -50
- tracdap/rt/_impl/exec/functions.py +12 -8
- tracdap/rt/_impl/exec/graph.py +3 -3
- tracdap/rt/_impl/exec/graph_builder.py +22 -5
- tracdap/rt/_impl/grpc/codec.py +4 -11
- tracdap/rt/_impl/grpc/tracdap/metadata/data_pb2.py +36 -34
- tracdap/rt/_impl/grpc/tracdap/metadata/data_pb2.pyi +37 -43
- tracdap/rt/_impl/grpc/tracdap/metadata/job_pb2.py +64 -64
- tracdap/rt/_impl/grpc/tracdap/metadata/type_pb2.py +22 -18
- tracdap/rt/_impl/grpc/tracdap/metadata/type_pb2.pyi +15 -2
- tracdap/rt/_impl/runtime.py +2 -16
- tracdap/rt/_impl/static_api.py +5 -6
- tracdap/rt/_plugins/format_csv.py +2 -2
- tracdap/rt/_plugins/storage_aws.py +165 -150
- tracdap/rt/_plugins/storage_azure.py +17 -11
- tracdap/rt/_plugins/storage_gcp.py +35 -18
- tracdap/rt/_version.py +1 -1
- tracdap/rt/api/model_api.py +45 -0
- tracdap/rt/config/__init__.py +8 -10
- tracdap/rt/config/common.py +0 -16
- tracdap/rt/config/job.py +4 -0
- tracdap/rt/config/platform.py +9 -32
- tracdap/rt/config/runtime.py +4 -11
- tracdap/rt/config/tenant.py +28 -0
- tracdap/rt/launch/cli.py +0 -8
- tracdap/rt/launch/launch.py +1 -3
- tracdap/rt/metadata/__init__.py +18 -19
- tracdap/rt/metadata/data.py +19 -31
- tracdap/rt/metadata/job.py +1 -1
- tracdap/rt/metadata/type.py +9 -5
- {tracdap_runtime-0.9.0b1.dist-info → tracdap_runtime-0.9.0b2.dist-info}/METADATA +3 -3
- {tracdap_runtime-0.9.0b1.dist-info → tracdap_runtime-0.9.0b2.dist-info}/RECORD +43 -42
- {tracdap_runtime-0.9.0b1.dist-info → tracdap_runtime-0.9.0b2.dist-info}/WHEEL +1 -1
- {tracdap_runtime-0.9.0b1.dist-info → tracdap_runtime-0.9.0b2.dist-info}/licenses/LICENSE +0 -0
- {tracdap_runtime-0.9.0b1.dist-info → tracdap_runtime-0.9.0b2.dist-info}/top_level.txt +0 -0
@@ -385,9 +385,9 @@ class CsvStorageFormat(IDataFormat):
|
|
385
385
|
|
386
386
|
if python_type == bool:
|
387
387
|
if isinstance(raw_value, str):
|
388
|
-
if raw_value.lower()
|
388
|
+
if raw_value.strip().lower()in self.__TRUE_VALUES:
|
389
389
|
return True
|
390
|
-
if raw_value.lower() in self.__FALSE_VALUES:
|
390
|
+
if raw_value.strip().lower() in self.__FALSE_VALUES:
|
391
391
|
return False
|
392
392
|
if isinstance(raw_value, int) or isinstance(raw_value, float):
|
393
393
|
if raw_value == 1:
|
@@ -30,17 +30,24 @@ from pyarrow import fs as afs
|
|
30
30
|
from . import _helpers
|
31
31
|
|
32
32
|
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
33
|
+
def _aws_arrow_available():
|
34
|
+
try:
|
35
|
+
# Shipped as part of PyArrow, but may not be available on all platforms
|
36
|
+
return afs.S3FileSystem is not None
|
37
|
+
except ImportError:
|
38
|
+
return False
|
39
|
+
|
40
|
+
def _aws_boto3_available():
|
41
|
+
try:
|
42
|
+
# AWS SDK
|
43
|
+
# These dependencies are provided by the optional [aws] feature
|
44
|
+
# For local development, pip install -r requirements_plugins.txt
|
45
|
+
import boto3 # noqa
|
46
|
+
import botocore.response # noqa
|
47
|
+
import botocore.exceptions as aws_ex # noqa
|
48
|
+
return True
|
49
|
+
except ImportError:
|
50
|
+
return False
|
44
51
|
|
45
52
|
|
46
53
|
class AwsStorageProvider(IStorageProvider):
|
@@ -87,23 +94,22 @@ class AwsStorageProvider(IStorageProvider):
|
|
87
94
|
or self.RUNTIME_FS_DEFAULT
|
88
95
|
|
89
96
|
def has_arrow_native(self) -> bool:
|
90
|
-
|
91
|
-
|
92
|
-
elif self._runtime_fs == self.RUNTIME_FS_AUTO:
|
93
|
-
return afs.S3FileSystem is not None
|
94
|
-
else:
|
95
|
-
return False
|
97
|
+
|
98
|
+
return _aws_arrow_available()
|
96
99
|
|
97
100
|
def has_file_storage(self) -> bool:
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
return afs.S3FileSystem is None
|
102
|
-
else:
|
101
|
+
|
102
|
+
# Do not advertise the custom storage implementation if arrow native is available
|
103
|
+
if _aws_arrow_available():
|
103
104
|
return False
|
104
105
|
|
106
|
+
return _aws_boto3_available()
|
107
|
+
|
105
108
|
def get_arrow_native(self) -> afs.SubTreeFileSystem:
|
106
109
|
|
110
|
+
if not _aws_arrow_available():
|
111
|
+
raise ex.EStorage(f"S3 storage setup failed: Plugin for [{self.RUNTIME_FS_ARROW}] is not available")
|
112
|
+
|
107
113
|
s3fs_args = self.setup_client_args(self.ARROW_CLIENT_ARGS)
|
108
114
|
s3fs = afs.S3FileSystem(**s3fs_args)
|
109
115
|
|
@@ -121,6 +127,9 @@ class AwsStorageProvider(IStorageProvider):
|
|
121
127
|
|
122
128
|
def get_file_storage(self) -> IFileStorage:
|
123
129
|
|
130
|
+
if not _aws_boto3_available():
|
131
|
+
raise ex.EStorage(f"S3 storage setup failed: Plugin for [{self.RUNTIME_FS_BOTO3}] is not available")
|
132
|
+
|
124
133
|
client_args = self.setup_client_args(self.BOTO_CLIENT_ARGS)
|
125
134
|
client_args["service_name"] = "s3"
|
126
135
|
|
@@ -179,7 +188,7 @@ class AwsStorageProvider(IStorageProvider):
|
|
179
188
|
raise ex.EStartup(message)
|
180
189
|
|
181
190
|
|
182
|
-
if
|
191
|
+
if _aws_arrow_available() or _aws_boto3_available():
|
183
192
|
plugins.PluginManager.register_plugin(IStorageProvider, AwsStorageProvider, ["S3"])
|
184
193
|
|
185
194
|
|
@@ -190,195 +199,201 @@ if __aws_available:
|
|
190
199
|
# This is the old implementation that was used before Arrow native was made available
|
191
200
|
# It is likely to be removed in a future release
|
192
201
|
|
202
|
+
if _aws_boto3_available():
|
193
203
|
|
194
|
-
|
204
|
+
# These dependencies are provided by the optional [aws] feature
|
205
|
+
# For local development, pip install -r requirements_plugins.txt
|
206
|
+
import boto3 # noqa
|
207
|
+
import botocore.exceptions as aws_ex # noqa
|
195
208
|
|
196
|
-
|
209
|
+
class S3ObjectStorage(IFileStorage):
|
197
210
|
|
198
|
-
|
211
|
+
# This is a quick implementation of IFileStorage on S3 using the boto3 AWS SDK
|
199
212
|
|
200
|
-
self.
|
213
|
+
def __init__(self, config: cfg.PluginConfig, client_args: dict):
|
201
214
|
|
202
|
-
|
203
|
-
self._bucket = _helpers.get_plugin_property(self._properties, AwsStorageProvider.BUCKET_PROPERTY)
|
204
|
-
self._prefix = _helpers.get_plugin_property(self._properties, AwsStorageProvider.PREFIX_PROPERTY) or ""
|
215
|
+
self._log = _helpers.logger_for_object(self)
|
205
216
|
|
206
|
-
|
207
|
-
|
208
|
-
self.
|
209
|
-
raise ex.EConfigParse(message)
|
217
|
+
self._properties = config.properties
|
218
|
+
self._bucket = _helpers.get_plugin_property(self._properties, AwsStorageProvider.BUCKET_PROPERTY)
|
219
|
+
self._prefix = _helpers.get_plugin_property(self._properties, AwsStorageProvider.PREFIX_PROPERTY) or ""
|
210
220
|
|
211
|
-
|
221
|
+
if self._bucket is None or len(self._bucket.strip()) == 0:
|
222
|
+
message = f"Missing required config property [{AwsStorageProvider.BUCKET_PROPERTY}] for S3 storage"
|
223
|
+
self._log.error(message)
|
224
|
+
raise ex.EConfigParse(message)
|
212
225
|
|
213
|
-
|
226
|
+
self._client = boto3.client(**client_args)
|
214
227
|
|
215
|
-
|
216
|
-
self._log.info(f"EXISTS [{storage_path}]")
|
228
|
+
def exists(self, storage_path: str) -> bool:
|
217
229
|
|
218
|
-
|
219
|
-
|
220
|
-
return True
|
230
|
+
try:
|
231
|
+
self._log.info(f"EXISTS [{storage_path}]")
|
221
232
|
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
return False
|
226
|
-
raise ex.EStorageRequest(f"Storage error: {str(error)}") from error
|
233
|
+
object_key = self._resolve_path(storage_path)
|
234
|
+
self._client.head_object(Bucket=self._bucket, Key=object_key)
|
235
|
+
return True
|
227
236
|
|
228
|
-
|
237
|
+
except aws_ex.ClientError as error:
|
238
|
+
aws_code = error.response['Error']['Code']
|
239
|
+
if aws_code == str(http.HTTPStatus.NOT_FOUND.value): # noqa
|
240
|
+
return False
|
241
|
+
raise ex.EStorageRequest(f"Storage error: {str(error)}") from error
|
229
242
|
|
230
|
-
|
231
|
-
self._log.info(f"SIZE [{storage_path}]")
|
243
|
+
def size(self, storage_path: str) -> int:
|
232
244
|
|
233
|
-
|
234
|
-
|
235
|
-
return response['ContentLength']
|
245
|
+
try:
|
246
|
+
self._log.info(f"SIZE [{storage_path}]")
|
236
247
|
|
237
|
-
|
238
|
-
|
248
|
+
object_key = self._resolve_path(storage_path)
|
249
|
+
response = self._client.head_object(Bucket=self._bucket, Key=object_key)
|
250
|
+
return response['ContentLength']
|
239
251
|
|
240
|
-
|
252
|
+
except aws_ex.ClientError as error:
|
253
|
+
raise ex.EStorageRequest(f"Storage error: {str(error)}") from error
|
241
254
|
|
242
|
-
self
|
255
|
+
def stat(self, storage_path: str) -> FileStat:
|
243
256
|
|
244
|
-
|
257
|
+
self._log.info(f"STAT [{storage_path}]")
|
245
258
|
|
246
|
-
|
259
|
+
name = storage_path.split("/")[-1]
|
247
260
|
|
248
|
-
|
249
|
-
# Handling for directories needs to be changed, as part of refactor onto object storage
|
250
|
-
size = self.size(storage_path)
|
251
|
-
return FileStat(name, FileType.FILE, storage_path, size)
|
261
|
+
if self.exists(storage_path):
|
252
262
|
|
253
|
-
|
263
|
+
# Only OBJECTS can support stat atm
|
264
|
+
# Handling for directories needs to be changed, as part of refactor onto object storage
|
265
|
+
size = self.size(storage_path)
|
266
|
+
return FileStat(name, FileType.FILE, storage_path, size)
|
254
267
|
|
255
|
-
|
256
|
-
return FileStat(name, FileType.DIRECTORY, storage_path, 0)
|
268
|
+
else:
|
257
269
|
|
258
|
-
|
270
|
+
self.ls(storage_path)
|
271
|
+
return FileStat(name, FileType.DIRECTORY, storage_path, 0)
|
259
272
|
|
260
|
-
self.
|
273
|
+
def ls(self, storage_path: str, recursive: bool = False) -> tp.List[FileStat]:
|
261
274
|
|
262
|
-
|
275
|
+
self._log.info(f"LS [{storage_path}]")
|
263
276
|
|
264
|
-
|
265
|
-
Bucket=self._bucket,
|
266
|
-
Prefix=prefix,
|
267
|
-
Delimiter="/")
|
277
|
+
prefix = self._resolve_path(storage_path) + "/"
|
268
278
|
|
269
|
-
|
279
|
+
response = self._client.list_objects_v2(
|
280
|
+
Bucket=self._bucket,
|
281
|
+
Prefix=prefix,
|
282
|
+
Delimiter="/")
|
270
283
|
|
271
|
-
|
272
|
-
raise ex.EStorageRequest(f"Storage prefix not found: [{storage_path}]")
|
284
|
+
keys = []
|
273
285
|
|
274
|
-
|
275
|
-
|
276
|
-
raw_key = entry["Key"]
|
277
|
-
if raw_key == prefix:
|
278
|
-
continue
|
279
|
-
key = raw_key.replace(prefix, "")
|
280
|
-
size = entry["Size"]
|
281
|
-
mtime = entry["LastModified "]
|
282
|
-
stat = FileStat(key, FileType.FILE, raw_key, size, mtime=mtime)
|
283
|
-
keys.append(stat)
|
286
|
+
if "Contents" not in response and "CommonPrefixes" not in response:
|
287
|
+
raise ex.EStorageRequest(f"Storage prefix not found: [{storage_path}]")
|
284
288
|
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
289
|
+
if "Contents" in response:
|
290
|
+
for entry in response["Contents"]:
|
291
|
+
raw_key = entry["Key"]
|
292
|
+
if raw_key == prefix:
|
293
|
+
continue
|
294
|
+
key = raw_key.replace(prefix, "")
|
295
|
+
size = entry["Size"]
|
296
|
+
mtime = entry["LastModified "]
|
297
|
+
stat = FileStat(key, FileType.FILE, raw_key, size, mtime=mtime)
|
298
|
+
keys.append(stat)
|
290
299
|
|
291
|
-
|
300
|
+
if "CommonPrefixes" in response:
|
301
|
+
for raw_prefix in response["CommonPrefixes"]:
|
302
|
+
common_prefix = raw_prefix.replace(prefix, "")
|
303
|
+
stat = FileStat(common_prefix, FileType.DIRECTORY, raw_prefix, 0)
|
304
|
+
keys.append(stat)
|
292
305
|
|
293
|
-
|
306
|
+
return keys
|
294
307
|
|
295
|
-
self
|
308
|
+
def mkdir(self, storage_path: str, recursive: bool = False):
|
296
309
|
|
297
|
-
|
298
|
-
pass
|
310
|
+
self._log.info(f"MKDIR [{storage_path}]")
|
299
311
|
|
300
|
-
|
312
|
+
# No-op in object storage
|
313
|
+
pass
|
301
314
|
|
302
|
-
|
303
|
-
self._log.info(f"RM [{storage_path}]")
|
315
|
+
def rm(self, storage_path: str):
|
304
316
|
|
305
|
-
|
306
|
-
|
317
|
+
try:
|
318
|
+
self._log.info(f"RM [{storage_path}]")
|
307
319
|
|
308
|
-
|
309
|
-
|
320
|
+
object_key = self._resolve_path(storage_path)
|
321
|
+
self._client.delete_object(Bucket=self._bucket, Key=object_key)
|
310
322
|
|
311
|
-
|
323
|
+
except aws_ex.ClientError as error:
|
324
|
+
raise ex.EStorageRequest(f"Storage error: {str(error)}") from error
|
312
325
|
|
313
|
-
|
326
|
+
def rmdir(self, storage_path: str):
|
314
327
|
|
315
|
-
|
328
|
+
raise RuntimeError("RMDIR (recursive) not available for S3 storage")
|
316
329
|
|
317
|
-
self
|
330
|
+
def read_bytes(self, storage_path: str) -> bytes:
|
318
331
|
|
319
|
-
|
320
|
-
return body.read()
|
332
|
+
self._log.info(f"READ BYTES [{storage_path}]")
|
321
333
|
|
322
|
-
|
334
|
+
body = self._read_impl(storage_path)
|
335
|
+
return body.read()
|
323
336
|
|
324
|
-
self
|
337
|
+
def read_byte_stream(self, storage_path: str) -> tp.BinaryIO:
|
325
338
|
|
326
|
-
|
327
|
-
return io.BytesIO(data)
|
339
|
+
self._log.info(f"READ BYTE STREAM [{storage_path}]")
|
328
340
|
|
329
|
-
|
341
|
+
data = self.read_bytes(storage_path)
|
342
|
+
return io.BytesIO(data)
|
330
343
|
|
331
|
-
|
344
|
+
def _read_impl(self, storage_path: str):
|
332
345
|
|
333
|
-
|
334
|
-
response = self._client.get_object(Bucket=self._bucket, Key=object_key)
|
335
|
-
return response['Body']
|
346
|
+
try:
|
336
347
|
|
337
|
-
|
338
|
-
|
348
|
+
object_key = self._resolve_path(storage_path)
|
349
|
+
response = self._client.get_object(Bucket=self._bucket, Key=object_key)
|
350
|
+
return response['Body']
|
339
351
|
|
340
|
-
|
352
|
+
except aws_ex.ClientError as error:
|
353
|
+
raise ex.EStorageRequest(f"Storage error: {str(error)}") from error
|
341
354
|
|
342
|
-
|
343
|
-
self._log.info(f"WRITE BYTES [{storage_path}]")
|
355
|
+
def write_bytes(self, storage_path: str, data: bytes):
|
344
356
|
|
345
|
-
|
357
|
+
try:
|
358
|
+
self._log.info(f"WRITE BYTES [{storage_path}]")
|
346
359
|
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
360
|
+
object_key = self._resolve_path(storage_path)
|
361
|
+
|
362
|
+
self._client.put_object(
|
363
|
+
Bucket=self._bucket,
|
364
|
+
Key=object_key,
|
365
|
+
Body=data)
|
351
366
|
|
352
|
-
|
353
|
-
|
367
|
+
except aws_ex.ClientError as error:
|
368
|
+
raise ex.EStorageRequest(f"Storage error: {str(error)}") from error
|
354
369
|
|
355
|
-
|
370
|
+
def write_byte_stream(self, storage_path: str) -> tp.BinaryIO:
|
356
371
|
|
357
|
-
|
372
|
+
self._log.info(f"WRITE BYTE STREAM [{storage_path}]")
|
358
373
|
|
359
|
-
|
374
|
+
return self._AwsWriteBuf(self, storage_path)
|
360
375
|
|
361
|
-
|
376
|
+
class _AwsWriteBuf(io.BytesIO):
|
362
377
|
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
378
|
+
def __init__(self, storage, storage_path):
|
379
|
+
super().__init__()
|
380
|
+
self._storage = storage
|
381
|
+
self._storage_path = storage_path
|
382
|
+
self._written = False
|
368
383
|
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
384
|
+
def close(self):
|
385
|
+
if not self._written:
|
386
|
+
self.seek(0)
|
387
|
+
data = self.read()
|
388
|
+
self._storage.write_bytes(self._storage_path, data)
|
389
|
+
self._written = True
|
375
390
|
|
376
|
-
|
391
|
+
def _resolve_path(self, storage_path: str) -> str:
|
377
392
|
|
378
|
-
|
379
|
-
|
393
|
+
if self._prefix is None or self._prefix.strip() == "":
|
394
|
+
return storage_path
|
380
395
|
|
381
|
-
|
382
|
-
|
396
|
+
separator = "" if self._prefix.endswith("/") else "/"
|
397
|
+
full_path = self._prefix + separator + storage_path
|
383
398
|
|
384
|
-
|
399
|
+
return full_path[1:] if full_path.startswith("/") else full_path
|
@@ -23,20 +23,21 @@ from tracdap.rt.ext.storage import *
|
|
23
23
|
|
24
24
|
import pyarrow.fs as afs
|
25
25
|
|
26
|
-
try:
|
27
|
-
# These dependencies are provided by the optional [azure] feature
|
28
|
-
# For local development, pip install -r requirements_plugins.txt
|
29
|
-
import azure.storage.blob as az_blob # noqa
|
30
|
-
import adlfs # noqa
|
31
|
-
__azure_available = True
|
32
|
-
except ImportError:
|
33
|
-
adlfs = None
|
34
|
-
__azure_available = False
|
35
|
-
|
36
26
|
# Set of common helpers across the core plugins (do not reference rt._impl)
|
37
27
|
from . import _helpers
|
38
28
|
|
39
29
|
|
30
|
+
def _azure_fsspec_available():
|
31
|
+
try:
|
32
|
+
# These dependencies are provided by the optional [azure] feature
|
33
|
+
# For local development, pip install -r requirements_plugins.txt
|
34
|
+
import azure.storage.blob as az_blob # noqa
|
35
|
+
import adlfs # noqa
|
36
|
+
return True
|
37
|
+
except ImportError:
|
38
|
+
return False
|
39
|
+
|
40
|
+
|
40
41
|
class AzureBlobStorageProvider(IStorageProvider):
|
41
42
|
|
42
43
|
# This client depends on the Azure fsspec implementation, since there is no native implementation from Arrow
|
@@ -100,6 +101,11 @@ class AzureBlobStorageProvider(IStorageProvider):
|
|
100
101
|
|
101
102
|
def create_fsspec(self) -> afs.FileSystem:
|
102
103
|
|
104
|
+
if not _azure_fsspec_available():
|
105
|
+
raise ex.EStorage(f"BLOB storage setup failed: Plugin for [{self.RUNTIME_FS_FSSPEC}] is not available")
|
106
|
+
|
107
|
+
import adlfs # noqa
|
108
|
+
|
103
109
|
azure_fsspec_args = self.setup_client_args()
|
104
110
|
azure_fsspec = adlfs.AzureBlobFileSystem(**azure_fsspec_args)
|
105
111
|
|
@@ -152,5 +158,5 @@ class AzureBlobStorageProvider(IStorageProvider):
|
|
152
158
|
|
153
159
|
|
154
160
|
# Only register the plugin if the [azure] feature is available
|
155
|
-
if
|
161
|
+
if _azure_fsspec_available():
|
156
162
|
plugins.PluginManager.register_plugin(IStorageProvider, AzureBlobStorageProvider, ["BLOB"])
|
@@ -28,20 +28,27 @@ from pyarrow import fs as pa_fs
|
|
28
28
|
from . import _helpers
|
29
29
|
|
30
30
|
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
31
|
+
def _gcp_arrow_available():
|
32
|
+
try:
|
33
|
+
# Shipped as part of PyArrow, but may not be available on all platforms
|
34
|
+
return pa_fs.GcsFileSystem is not None
|
35
|
+
except ImportError:
|
36
|
+
return False
|
37
|
+
|
38
|
+
def _gcp_fsspec_available():
|
39
|
+
try:
|
40
|
+
# These dependencies are provided by the optional [gcp] feature
|
41
|
+
# For local development, pip install -r requirements_plugins.txt
|
42
|
+
import google.cloud.storage as gcs # noqa
|
43
|
+
import gcsfs # noqa
|
44
|
+
return True
|
45
|
+
except ImportError:
|
46
|
+
return False
|
41
47
|
|
42
48
|
|
43
49
|
class GcpStorageProvider(IStorageProvider):
|
44
50
|
|
51
|
+
PROJECT_PROPERTY = "project"
|
45
52
|
BUCKET_PROPERTY = "bucket"
|
46
53
|
PREFIX_PROPERTY = "prefix"
|
47
54
|
REGION_PROPERTY = "region"
|
@@ -62,20 +69,17 @@ class GcpStorageProvider(IStorageProvider):
|
|
62
69
|
RUNTIME_FS_DEFAULT = RUNTIME_FS_AUTO
|
63
70
|
|
64
71
|
ARROW_CLIENT_ARGS = {
|
72
|
+
PROJECT_PROPERTY: "project_id",
|
65
73
|
REGION_PROPERTY: "default_bucket_location",
|
66
|
-
ENDPOINT_PROPERTY: "endpoint_override"
|
74
|
+
ENDPOINT_PROPERTY: "endpoint_override",
|
67
75
|
}
|
68
76
|
|
69
77
|
FSSPEC_CLIENT_ARGS = {
|
78
|
+
PROJECT_PROPERTY: "project",
|
70
79
|
REGION_PROPERTY: "default_location",
|
71
80
|
ENDPOINT_PROPERTY: "endpoint_url"
|
72
81
|
}
|
73
82
|
|
74
|
-
try:
|
75
|
-
__arrow_available = pa_fs.GcsFileSystem is not None
|
76
|
-
except ImportError:
|
77
|
-
__arrow_available = False
|
78
|
-
|
79
83
|
def __init__(self, properties: tp.Dict[str, str]):
|
80
84
|
|
81
85
|
self._log = _helpers.logger_for_object(self)
|
@@ -91,7 +95,7 @@ class GcpStorageProvider(IStorageProvider):
|
|
91
95
|
def get_arrow_native(self) -> pa_fs.SubTreeFileSystem:
|
92
96
|
|
93
97
|
if self._runtime_fs == self.RUNTIME_FS_AUTO:
|
94
|
-
gcs_fs = self.create_arrow() if
|
98
|
+
gcs_fs = self.create_arrow() if _gcp_arrow_available() else self.create_fsspec()
|
95
99
|
elif self._runtime_fs == self.RUNTIME_FS_ARROW:
|
96
100
|
gcs_fs = self.create_arrow()
|
97
101
|
elif self._runtime_fs == self.RUNTIME_FS_FSSPEC:
|
@@ -115,12 +119,20 @@ class GcpStorageProvider(IStorageProvider):
|
|
115
119
|
|
116
120
|
def create_arrow(self) -> pa_fs.FileSystem:
|
117
121
|
|
122
|
+
if not _gcp_arrow_available():
|
123
|
+
raise ex.EStorage(f"GCS storage setup failed: Plugin for [{self.RUNTIME_FS_ARROW}] is not available")
|
124
|
+
|
118
125
|
gcs_arrow_args = self.setup_client_args(self.ARROW_CLIENT_ARGS)
|
119
126
|
|
120
127
|
return pa_fs.GcsFileSystem(**gcs_arrow_args)
|
121
128
|
|
122
129
|
def create_fsspec(self) -> pa_fs.FileSystem:
|
123
130
|
|
131
|
+
if not _gcp_fsspec_available():
|
132
|
+
raise ex.EStorage(f"GCS storage setup failed: Plugin for [{self.RUNTIME_FS_FSSPEC}] is not available")
|
133
|
+
|
134
|
+
import gcsfs # noqa
|
135
|
+
|
124
136
|
gcs_fsspec_args = self.setup_client_args(self.FSSPEC_CLIENT_ARGS)
|
125
137
|
gcs_fsspec = gcsfs.GCSFileSystem(**gcs_fsspec_args)
|
126
138
|
|
@@ -130,9 +142,14 @@ class GcpStorageProvider(IStorageProvider):
|
|
130
142
|
|
131
143
|
client_args = dict()
|
132
144
|
|
145
|
+
project = _helpers.get_plugin_property(self._properties, self.PROJECT_PROPERTY)
|
133
146
|
region = _helpers.get_plugin_property(self._properties, self.REGION_PROPERTY)
|
134
147
|
endpoint = _helpers.get_plugin_property(self._properties, self.ENDPOINT_PROPERTY)
|
135
148
|
|
149
|
+
if project is not None:
|
150
|
+
project_key = arg_mapping[self.PROJECT_PROPERTY]
|
151
|
+
client_args[project_key] = project
|
152
|
+
|
136
153
|
if region is not None:
|
137
154
|
region_key = arg_mapping[self.REGION_PROPERTY]
|
138
155
|
client_args[region_key] = region
|
@@ -180,5 +197,5 @@ class GcpStorageProvider(IStorageProvider):
|
|
180
197
|
raise ex.EStartup(message)
|
181
198
|
|
182
199
|
|
183
|
-
if
|
200
|
+
if _gcp_arrow_available() or _gcp_fsspec_available():
|
184
201
|
plugins.PluginManager.register_plugin(IStorageProvider, GcpStorageProvider, ["GCS"])
|
tracdap/rt/_version.py
CHANGED