tracdap-runtime 0.6.4__py3-none-any.whl → 0.6.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tracdap/rt/_exec/context.py +556 -36
- tracdap/rt/_exec/dev_mode.py +320 -198
- tracdap/rt/_exec/engine.py +331 -62
- tracdap/rt/_exec/functions.py +151 -22
- tracdap/rt/_exec/graph.py +47 -13
- tracdap/rt/_exec/graph_builder.py +383 -175
- tracdap/rt/_exec/runtime.py +7 -5
- tracdap/rt/_impl/config_parser.py +11 -4
- tracdap/rt/_impl/data.py +329 -152
- tracdap/rt/_impl/ext/__init__.py +13 -0
- tracdap/rt/_impl/ext/sql.py +116 -0
- tracdap/rt/_impl/ext/storage.py +57 -0
- tracdap/rt/_impl/grpc/tracdap/metadata/job_pb2.py +82 -30
- tracdap/rt/_impl/grpc/tracdap/metadata/job_pb2.pyi +155 -2
- tracdap/rt/_impl/grpc/tracdap/metadata/model_pb2.py +12 -10
- tracdap/rt/_impl/grpc/tracdap/metadata/model_pb2.pyi +14 -2
- tracdap/rt/_impl/grpc/tracdap/metadata/resource_pb2.py +29 -0
- tracdap/rt/_impl/grpc/tracdap/metadata/resource_pb2.pyi +16 -0
- tracdap/rt/_impl/models.py +8 -0
- tracdap/rt/_impl/static_api.py +29 -0
- tracdap/rt/_impl/storage.py +39 -27
- tracdap/rt/_impl/util.py +10 -0
- tracdap/rt/_impl/validation.py +140 -18
- tracdap/rt/_plugins/repo_git.py +1 -1
- tracdap/rt/_plugins/storage_sql.py +417 -0
- tracdap/rt/_plugins/storage_sql_dialects.py +117 -0
- tracdap/rt/_version.py +1 -1
- tracdap/rt/api/experimental.py +267 -0
- tracdap/rt/api/hook.py +14 -0
- tracdap/rt/api/model_api.py +48 -6
- tracdap/rt/config/__init__.py +2 -2
- tracdap/rt/config/common.py +6 -0
- tracdap/rt/metadata/__init__.py +29 -20
- tracdap/rt/metadata/job.py +99 -0
- tracdap/rt/metadata/model.py +18 -0
- tracdap/rt/metadata/resource.py +24 -0
- {tracdap_runtime-0.6.4.dist-info → tracdap_runtime-0.6.6.dist-info}/METADATA +5 -1
- {tracdap_runtime-0.6.4.dist-info → tracdap_runtime-0.6.6.dist-info}/RECORD +41 -32
- {tracdap_runtime-0.6.4.dist-info → tracdap_runtime-0.6.6.dist-info}/WHEEL +1 -1
- {tracdap_runtime-0.6.4.dist-info → tracdap_runtime-0.6.6.dist-info}/LICENSE +0 -0
- {tracdap_runtime-0.6.4.dist-info → tracdap_runtime-0.6.6.dist-info}/top_level.txt +0 -0
tracdap/rt/_exec/context.py
CHANGED
@@ -19,13 +19,13 @@ import typing as tp
|
|
19
19
|
import re
|
20
20
|
import traceback
|
21
21
|
|
22
|
-
import pandas as pd
|
23
|
-
|
24
22
|
import tracdap.rt.api as _api
|
23
|
+
import tracdap.rt.api.experimental as _eapi
|
25
24
|
import tracdap.rt.metadata as _meta
|
26
25
|
import tracdap.rt.exceptions as _ex
|
27
26
|
import tracdap.rt._impl.type_system as _types # noqa
|
28
27
|
import tracdap.rt._impl.data as _data # noqa
|
28
|
+
import tracdap.rt._impl.storage as _storage # noqa
|
29
29
|
import tracdap.rt._impl.util as _util # noqa
|
30
30
|
import tracdap.rt._impl.validation as _val # noqa
|
31
31
|
|
@@ -55,12 +55,11 @@ class TracContextImpl(_api.TracContext):
|
|
55
55
|
Output views will contain schemas but no data.
|
56
56
|
"""
|
57
57
|
|
58
|
-
__DEFAULT_TEMPORAL_OBJECTS = False
|
59
|
-
|
60
58
|
def __init__(self,
|
61
59
|
model_def: _meta.ModelDefinition,
|
62
60
|
model_class: _api.TracModel.__class__,
|
63
61
|
local_ctx: tp.Dict[str, tp.Any],
|
62
|
+
dynamic_outputs: tp.List[str] = None,
|
64
63
|
checkout_directory: pathlib.Path = None):
|
65
64
|
|
66
65
|
self.__ctx_log = _util.logger_for_object(self)
|
@@ -68,12 +67,14 @@ class TracContextImpl(_api.TracContext):
|
|
68
67
|
|
69
68
|
self.__model_def = model_def
|
70
69
|
self.__model_class = model_class
|
71
|
-
self.__local_ctx = local_ctx
|
70
|
+
self.__local_ctx = local_ctx if local_ctx is not None else {}
|
71
|
+
self.__dynamic_outputs = dynamic_outputs if dynamic_outputs is not None else []
|
72
72
|
|
73
73
|
self.__val = TracContextValidator(
|
74
74
|
self.__ctx_log,
|
75
75
|
self.__model_def,
|
76
76
|
self.__local_ctx,
|
77
|
+
self.__dynamic_outputs,
|
77
78
|
checkout_directory)
|
78
79
|
|
79
80
|
def get_parameter(self, parameter_name: str) -> tp.Any:
|
@@ -131,18 +132,22 @@ class TracContextImpl(_api.TracContext):
|
|
131
132
|
else:
|
132
133
|
return copy.deepcopy(data_view.trac_schema)
|
133
134
|
|
134
|
-
def
|
135
|
+
def get_table(self, dataset_name: str, framework: _eapi.DataFramework[_eapi.DATA_API], **framework_args) -> _eapi.DATA_API:
|
135
136
|
|
136
|
-
_val.validate_signature(self.
|
137
|
+
_val.validate_signature(self.get_table, dataset_name, framework)
|
138
|
+
_val.require_package(framework.protocol_name, framework.api_type)
|
137
139
|
|
138
140
|
self.__val.check_dataset_valid_identifier(dataset_name)
|
139
141
|
self.__val.check_dataset_defined_in_model(dataset_name)
|
140
142
|
self.__val.check_dataset_available_in_context(dataset_name)
|
143
|
+
self.__val.check_data_framework_args(framework, framework_args)
|
141
144
|
|
142
145
|
static_schema = self.__get_static_schema(self.__model_def, dataset_name)
|
143
146
|
data_view = self.__local_ctx.get(dataset_name)
|
144
147
|
part_key = _data.DataPartKey.for_root()
|
145
148
|
|
149
|
+
converter = _data.DataConverter.for_framework(framework, **framework_args)
|
150
|
+
|
146
151
|
self.__val.check_context_object_type(dataset_name, data_view, _data.DataView)
|
147
152
|
self.__val.check_dataset_schema_defined(dataset_name, data_view)
|
148
153
|
self.__val.check_dataset_part_present(dataset_name, data_view, part_key)
|
@@ -155,10 +160,18 @@ class TracContextImpl(_api.TracContext):
|
|
155
160
|
else:
|
156
161
|
schema = data_view.arrow_schema
|
157
162
|
|
158
|
-
|
159
|
-
|
163
|
+
table = _data.DataMapping.view_to_arrow(data_view, part_key)
|
164
|
+
|
165
|
+
# Data conformance is applied automatically inside the converter, if schema != None
|
166
|
+
return converter.from_internal(table, schema)
|
160
167
|
|
161
|
-
|
168
|
+
def get_pandas_table(self, dataset_name: str, use_temporal_objects: tp.Optional[bool] = None) -> "_data.pandas.DataFrame":
|
169
|
+
|
170
|
+
return self.get_table(dataset_name, _eapi.PANDAS, use_temporal_objects=use_temporal_objects)
|
171
|
+
|
172
|
+
def get_polars_table(self, dataset_name: str) -> "_data.polars.DataFrame":
|
173
|
+
|
174
|
+
return self.get_table(dataset_name, _eapi.POLARS)
|
162
175
|
|
163
176
|
def put_schema(self, dataset_name: str, schema: _meta.SchemaDefinition):
|
164
177
|
|
@@ -190,18 +203,29 @@ class TracContextImpl(_api.TracContext):
|
|
190
203
|
|
191
204
|
self.__local_ctx[dataset_name] = updated_view
|
192
205
|
|
193
|
-
def
|
206
|
+
def put_table(
|
207
|
+
self, dataset_name: str, dataset: _eapi.DATA_API,
|
208
|
+
framework: tp.Optional[_eapi.DataFramework[_eapi.DATA_API]] = None,
|
209
|
+
**framework_args):
|
210
|
+
|
211
|
+
_val.validate_signature(self.put_table, dataset_name, dataset, framework)
|
194
212
|
|
195
|
-
|
213
|
+
if framework is None:
|
214
|
+
framework = _data.DataConverter.get_framework(dataset)
|
215
|
+
|
216
|
+
_val.require_package(framework.protocol_name, framework.api_type)
|
196
217
|
|
197
218
|
self.__val.check_dataset_valid_identifier(dataset_name)
|
198
219
|
self.__val.check_dataset_is_model_output(dataset_name)
|
199
|
-
self.__val.check_provided_dataset_type(dataset,
|
220
|
+
self.__val.check_provided_dataset_type(dataset, framework.api_type)
|
221
|
+
self.__val.check_data_framework_args(framework, framework_args)
|
200
222
|
|
201
223
|
static_schema = self.__get_static_schema(self.__model_def, dataset_name)
|
202
224
|
data_view = self.__local_ctx.get(dataset_name)
|
203
225
|
part_key = _data.DataPartKey.for_root()
|
204
226
|
|
227
|
+
converter = _data.DataConverter.for_framework(framework)
|
228
|
+
|
205
229
|
if data_view is None:
|
206
230
|
if static_schema is not None:
|
207
231
|
data_view = _data.DataView.for_trac_schema(static_schema)
|
@@ -219,13 +243,22 @@ class TracContextImpl(_api.TracContext):
|
|
219
243
|
else:
|
220
244
|
schema = data_view.arrow_schema
|
221
245
|
|
222
|
-
# Data conformance is applied inside
|
246
|
+
# Data conformance is applied automatically inside the converter, if schema != None
|
247
|
+
table = converter.to_internal(dataset, schema)
|
248
|
+
item = _data.DataItem(schema, table)
|
223
249
|
|
224
|
-
|
225
|
-
updated_view = _data.DataMapping.add_item_to_view(data_view, part_key, updated_item)
|
250
|
+
updated_view = _data.DataMapping.add_item_to_view(data_view, part_key, item)
|
226
251
|
|
227
252
|
self.__local_ctx[dataset_name] = updated_view
|
228
253
|
|
254
|
+
def put_pandas_table(self, dataset_name: str, dataset: "_data.pandas.DataFrame"):
|
255
|
+
|
256
|
+
self.put_table(dataset_name, dataset, _eapi.PANDAS)
|
257
|
+
|
258
|
+
def put_polars_table(self, dataset_name: str, dataset: "_data.polars.DataFrame"):
|
259
|
+
|
260
|
+
self.put_table(dataset_name, dataset, _eapi.POLARS)
|
261
|
+
|
229
262
|
def log(self) -> logging.Logger:
|
230
263
|
|
231
264
|
_val.validate_signature(self.log)
|
@@ -260,22 +293,367 @@ class TracContextImpl(_api.TracContext):
|
|
260
293
|
return schema_def
|
261
294
|
|
262
295
|
|
263
|
-
class
|
264
|
-
|
265
|
-
__VALID_IDENTIFIER = re.compile("^[a-zA-Z_]\\w*$",)
|
266
|
-
__RESERVED_IDENTIFIER = re.compile("^(trac_|_)\\w*")
|
296
|
+
class TracDataContextImpl(TracContextImpl, _eapi.TracDataContext):
|
267
297
|
|
268
298
|
def __init__(
|
269
|
-
self,
|
270
|
-
|
271
|
-
|
272
|
-
checkout_directory: pathlib.Path):
|
299
|
+
self, model_def: _meta.ModelDefinition, model_class: _api.TracModel.__class__,
|
300
|
+
local_ctx: tp.Dict[str, tp.Any], dynamic_outputs: tp.List[str],
|
301
|
+
storage_map: tp.Dict[str, tp.Union[_eapi.TracFileStorage, _eapi.TracDataStorage]],
|
302
|
+
checkout_directory: pathlib.Path = None):
|
303
|
+
|
304
|
+
super().__init__(model_def, model_class, local_ctx, dynamic_outputs, checkout_directory)
|
273
305
|
|
274
|
-
self.__log = log
|
275
306
|
self.__model_def = model_def
|
276
307
|
self.__local_ctx = local_ctx
|
308
|
+
self.__dynamic_outputs = dynamic_outputs
|
309
|
+
self.__storage_map = storage_map
|
310
|
+
self.__checkout_directory = checkout_directory
|
311
|
+
|
312
|
+
self.__val = self._TracContextImpl__val # noqa
|
313
|
+
|
314
|
+
def get_file_storage(self, storage_key: str) -> _eapi.TracFileStorage:
|
315
|
+
|
316
|
+
_val.validate_signature(self.get_file_storage, storage_key)
|
317
|
+
|
318
|
+
self.__val.check_storage_valid_identifier(storage_key)
|
319
|
+
self.__val.check_storage_available(self.__storage_map, storage_key)
|
320
|
+
self.__val.check_storage_type(self.__storage_map, storage_key, _eapi.TracFileStorage)
|
321
|
+
|
322
|
+
return self.__storage_map[storage_key]
|
323
|
+
|
324
|
+
def get_data_storage(
|
325
|
+
self, storage_key: str,
|
326
|
+
framework: _eapi.DataFramework[_eapi.DATA_API],
|
327
|
+
**framework_args) -> _eapi.TracDataStorage[_eapi.DATA_API]:
|
328
|
+
|
329
|
+
_val.validate_signature(self.get_file_storage, storage_key)
|
330
|
+
|
331
|
+
self.__val.check_storage_valid_identifier(storage_key)
|
332
|
+
self.__val.check_storage_available(self.__storage_map, storage_key)
|
333
|
+
self.__val.check_storage_type(self.__storage_map, storage_key, _eapi.TracDataStorage)
|
334
|
+
self.__val.check_data_framework_args(framework, framework_args)
|
335
|
+
|
336
|
+
storage = self.__storage_map[storage_key]
|
337
|
+
converter = _data.DataConverter.for_framework(framework, **framework_args)
|
338
|
+
|
339
|
+
# Create a shallow copy of the storage impl with a converter for the requested data framework
|
340
|
+
# At some point we will need a storage factory class, bc the internal data API can also be different
|
341
|
+
storage = copy.copy(storage)
|
342
|
+
storage._TracDataStorageImpl__converter = converter
|
343
|
+
|
344
|
+
return storage
|
345
|
+
|
346
|
+
def add_data_import(self, dataset_name: str):
|
347
|
+
|
348
|
+
_val.validate_signature(self.add_data_import, dataset_name)
|
349
|
+
|
350
|
+
self.__val.check_dataset_valid_identifier(dataset_name)
|
351
|
+
self.__val.check_dataset_not_defined_in_model(dataset_name)
|
352
|
+
self.__val.check_dataset_not_available_in_context(dataset_name)
|
353
|
+
|
354
|
+
self.__local_ctx[dataset_name] = _data.DataView.create_empty()
|
355
|
+
self.__dynamic_outputs.append(dataset_name)
|
356
|
+
|
357
|
+
def set_source_metadata(self, dataset_name: str, storage_key: str, source_info: tp.Union[_eapi.FileStat, str]):
|
358
|
+
|
359
|
+
_val.validate_signature(self.set_source_metadata, dataset_name, storage_key, source_info)
|
360
|
+
|
361
|
+
self.__val.check_dataset_valid_identifier(dataset_name)
|
362
|
+
self.__val.check_dataset_available_in_context(dataset_name)
|
363
|
+
self.__val.check_storage_valid_identifier(storage_key)
|
364
|
+
self.__val.check_storage_available(self.__storage_map, storage_key)
|
365
|
+
|
366
|
+
storage = self.__storage_map[storage_key]
|
367
|
+
|
368
|
+
if isinstance(storage, _eapi.TracFileStorage):
|
369
|
+
if not isinstance(source_info, _eapi.FileStat):
|
370
|
+
self.__val.report_public_error(f"Expected storage_info to be a FileStat, [{storage_key}] refers to file storage")
|
371
|
+
|
372
|
+
if isinstance(storage, _eapi.TracDataStorage):
|
373
|
+
if not isinstance(source_info, str):
|
374
|
+
self.__val.report_public_error(f"Expected storage_info to be a table name, [{storage_key}] refers to dadta storage")
|
375
|
+
|
376
|
+
pass # Not implemented yet, only required when imports are sent back to the platform
|
377
|
+
|
378
|
+
def set_attribute(self, dataset_name: str, attribute_name: str, value: tp.Any):
|
379
|
+
|
380
|
+
_val.validate_signature(self.set_attribute, dataset_name, attribute_name, value)
|
381
|
+
|
382
|
+
pass # Not implemented yet, only required when imports are sent back to the platform
|
383
|
+
|
384
|
+
def set_schema(self, dataset_name: str, schema: _meta.SchemaDefinition):
|
385
|
+
|
386
|
+
_val.validate_signature(self.set_schema, dataset_name, schema)
|
387
|
+
|
388
|
+
# Forward to existing method (these should be swapped round)
|
389
|
+
self.put_schema(dataset_name, schema)
|
390
|
+
|
391
|
+
|
392
|
+
class TracFileStorageImpl(_eapi.TracFileStorage):
|
393
|
+
|
394
|
+
def __init__(self, storage_key: str, storage_impl: _storage.IFileStorage, write_access: bool, checkout_directory):
|
395
|
+
|
396
|
+
self.__storage_key = storage_key
|
397
|
+
|
398
|
+
self.__exists = lambda sp: storage_impl.exists(sp)
|
399
|
+
self.__size = lambda sp: storage_impl.size(sp)
|
400
|
+
self.__stat = lambda sp: storage_impl.stat(sp)
|
401
|
+
self.__ls = lambda sp, rec: storage_impl.ls(sp, rec)
|
402
|
+
self.__read_byte_stream = lambda sp: storage_impl.read_byte_stream(sp)
|
403
|
+
|
404
|
+
if write_access:
|
405
|
+
self.__mkdir = lambda sp, rec: storage_impl.mkdir(sp, rec)
|
406
|
+
self.__rm = lambda sp: storage_impl.rm(sp)
|
407
|
+
self.__rmdir = lambda sp: storage_impl.rmdir(sp)
|
408
|
+
self.__write_byte_stream = lambda sp: storage_impl.write_byte_stream(sp)
|
409
|
+
else:
|
410
|
+
self.__mkdir = None
|
411
|
+
self.__rm = None
|
412
|
+
self.__rmdir = None
|
413
|
+
self.__write_byte_stream = None
|
414
|
+
|
415
|
+
self.__log = _util.logger_for_object(self)
|
416
|
+
self.__val = TracStorageValidator(self.__log, checkout_directory, self.__storage_key)
|
417
|
+
|
418
|
+
def get_storage_key(self) -> str:
|
419
|
+
|
420
|
+
_val.validate_signature(self.get_storage_key)
|
421
|
+
|
422
|
+
return self.__storage_key
|
423
|
+
|
424
|
+
def exists(self, storage_path: str) -> bool:
|
425
|
+
|
426
|
+
_val.validate_signature(self.exists, storage_path)
|
427
|
+
|
428
|
+
self.__val.check_operation_available(self.exists, self.__exists)
|
429
|
+
self.__val.check_storage_path_is_valid(storage_path)
|
430
|
+
|
431
|
+
return self.__exists(storage_path)
|
432
|
+
|
433
|
+
def size(self, storage_path: str) -> int:
|
434
|
+
|
435
|
+
_val.validate_signature(self.size, storage_path)
|
436
|
+
|
437
|
+
self.__val.check_operation_available(self.size, self.__size)
|
438
|
+
self.__val.check_storage_path_is_valid(storage_path)
|
439
|
+
|
440
|
+
return self.__size(storage_path)
|
441
|
+
|
442
|
+
def stat(self, storage_path: str) -> _eapi.FileStat:
|
443
|
+
|
444
|
+
_val.validate_signature(self.stat, storage_path)
|
445
|
+
|
446
|
+
self.__val.check_operation_available(self.stat, self.__stat)
|
447
|
+
self.__val.check_storage_path_is_valid(storage_path)
|
448
|
+
|
449
|
+
stat = self.__stat(storage_path)
|
450
|
+
return _eapi.FileStat(**stat.__dict__)
|
451
|
+
|
452
|
+
def ls(self, storage_path: str, recursive: bool = False) -> tp.List[_eapi.FileStat]:
|
453
|
+
|
454
|
+
_val.validate_signature(self.ls, storage_path, recursive)
|
455
|
+
|
456
|
+
self.__val.check_operation_available(self.ls, self.__ls)
|
457
|
+
self.__val.check_storage_path_is_valid(storage_path)
|
458
|
+
|
459
|
+
listing = self.__ls(storage_path, recursive)
|
460
|
+
return list(_eapi.FileStat(**stat.__dict__) for stat in listing)
|
461
|
+
|
462
|
+
def mkdir(self, storage_path: str, recursive: bool = False):
|
463
|
+
|
464
|
+
_val.validate_signature(self.mkdir, storage_path, recursive)
|
465
|
+
|
466
|
+
self.__val.check_operation_available(self.mkdir, self.__mkdir)
|
467
|
+
self.__val.check_storage_path_is_valid(storage_path)
|
468
|
+
self.__val.check_storage_path_is_not_root(storage_path)
|
469
|
+
|
470
|
+
self.__mkdir(storage_path, recursive)
|
471
|
+
|
472
|
+
def rm(self, storage_path: str):
|
473
|
+
|
474
|
+
_val.validate_signature(self.rm, storage_path)
|
475
|
+
|
476
|
+
self.__val.check_operation_available(self.rm, self.__rm)
|
477
|
+
self.__val.check_storage_path_is_valid(storage_path)
|
478
|
+
self.__val.check_storage_path_is_not_root(storage_path)
|
479
|
+
|
480
|
+
self.__rm(storage_path)
|
481
|
+
|
482
|
+
def rmdir(self, storage_path: str):
|
483
|
+
|
484
|
+
_val.validate_signature(self.rmdir, storage_path)
|
485
|
+
|
486
|
+
self.__val.check_operation_available(self.rmdir, self.__rmdir)
|
487
|
+
self.__val.check_storage_path_is_valid(storage_path)
|
488
|
+
self.__val.check_storage_path_is_not_root(storage_path)
|
489
|
+
|
490
|
+
self.__rmdir(storage_path)
|
491
|
+
|
492
|
+
def read_byte_stream(self, storage_path: str) -> tp.ContextManager[tp.BinaryIO]:
|
493
|
+
|
494
|
+
_val.validate_signature(self.read_byte_stream, storage_path)
|
495
|
+
|
496
|
+
self.__val.check_operation_available(self.read_byte_stream, self.__read_byte_stream)
|
497
|
+
self.__val.check_storage_path_is_valid(storage_path)
|
498
|
+
|
499
|
+
return self.__read_byte_stream(storage_path)
|
500
|
+
|
501
|
+
def read_bytes(self, storage_path: str) -> bytes:
|
502
|
+
|
503
|
+
_val.validate_signature(self.read_bytes, storage_path)
|
504
|
+
|
505
|
+
self.__val.check_operation_available(self.read_bytes, self.__read_byte_stream)
|
506
|
+
self.__val.check_storage_path_is_valid(storage_path)
|
507
|
+
|
508
|
+
return super().read_bytes(storage_path)
|
509
|
+
|
510
|
+
def write_byte_stream(self, storage_path: str) -> tp.ContextManager[tp.BinaryIO]:
|
511
|
+
|
512
|
+
_val.validate_signature(self.write_byte_stream, storage_path)
|
513
|
+
|
514
|
+
self.__val.check_operation_available(self.write_byte_stream, self.__write_byte_stream)
|
515
|
+
self.__val.check_storage_path_is_valid(storage_path)
|
516
|
+
self.__val.check_storage_path_is_not_root(storage_path)
|
517
|
+
|
518
|
+
return self.__write_byte_stream(storage_path)
|
519
|
+
|
520
|
+
def write_bytes(self, storage_path: str, data: bytes):
|
521
|
+
|
522
|
+
_val.validate_signature(self.write_bytes, storage_path)
|
523
|
+
|
524
|
+
self.__val.check_operation_available(self.write_bytes, self.__write_byte_stream)
|
525
|
+
self.__val.check_storage_path_is_valid(storage_path)
|
526
|
+
self.__val.check_storage_path_is_not_root(storage_path)
|
527
|
+
|
528
|
+
super().write_bytes(storage_path, data)
|
529
|
+
|
530
|
+
|
531
|
+
class TracDataStorageImpl(_eapi.TracDataStorage[_eapi.DATA_API]):
|
532
|
+
|
533
|
+
def __init__(
|
534
|
+
self, storage_key: str, storage_impl: _storage.IDataStorageBase[_data.T_INTERNAL_DATA, _data.T_INTERNAL_SCHEMA],
|
535
|
+
data_converter: _data.DataConverter[_eapi.DATA_API, _data.T_INTERNAL_DATA, _data.T_INTERNAL_SCHEMA],
|
536
|
+
write_access: bool, checkout_directory):
|
537
|
+
|
538
|
+
self.__storage_key = storage_key
|
539
|
+
self.__converter = data_converter
|
540
|
+
|
541
|
+
self.__has_table = lambda tn: storage_impl.has_table(tn)
|
542
|
+
self.__list_tables = lambda: storage_impl.list_tables()
|
543
|
+
self.__read_table = lambda tn: storage_impl.read_table(tn)
|
544
|
+
self.__native_read_query = lambda q, ps: storage_impl.native_read_query(q, **ps)
|
545
|
+
|
546
|
+
if write_access:
|
547
|
+
self.__create_table = lambda tn, s: storage_impl.create_table(tn, s)
|
548
|
+
self.__write_table = lambda tn, ds: storage_impl.write_table(tn, ds)
|
549
|
+
else:
|
550
|
+
self.__create_table = None
|
551
|
+
self.__write_table = None
|
552
|
+
|
553
|
+
self.__log = _util.logger_for_object(self)
|
554
|
+
self.__val = TracStorageValidator(self.__log, checkout_directory, self.__storage_key)
|
555
|
+
|
556
|
+
def has_table(self, table_name: str) -> bool:
|
557
|
+
|
558
|
+
_val.validate_signature(self.has_table, table_name)
|
559
|
+
|
560
|
+
self.__val.check_operation_available(self.has_table, self.__has_table)
|
561
|
+
self.__val.check_table_name_is_valid(table_name)
|
562
|
+
self.__val.check_storage_path_is_valid(table_name)
|
563
|
+
|
564
|
+
try:
|
565
|
+
return self.__has_table(table_name)
|
566
|
+
except _ex.EStorageRequest as e:
|
567
|
+
self.__val.report_public_error(e)
|
568
|
+
|
569
|
+
def list_tables(self) -> tp.List[str]:
|
570
|
+
|
571
|
+
_val.validate_signature(self.list_tables)
|
572
|
+
|
573
|
+
self.__val.check_operation_available(self.list_tables, self.__list_tables)
|
574
|
+
|
575
|
+
try:
|
576
|
+
return self.__list_tables()
|
577
|
+
except _ex.EStorageRequest as e:
|
578
|
+
self.__val.report_public_error(e)
|
579
|
+
|
580
|
+
def create_table(self, table_name: str, schema: _api.SchemaDefinition):
|
581
|
+
|
582
|
+
_val.validate_signature(self.create_table, table_name, schema)
|
583
|
+
|
584
|
+
self.__val.check_operation_available(self.create_table, self.__create_table)
|
585
|
+
self.__val.check_table_name_is_valid(table_name)
|
586
|
+
self.__val.check_storage_path_is_valid(table_name)
|
587
|
+
|
588
|
+
arrow_schema = _data.DataMapping.trac_to_arrow_schema(schema)
|
589
|
+
|
590
|
+
try:
|
591
|
+
self.__create_table(table_name, arrow_schema)
|
592
|
+
except _ex.EStorageRequest as e:
|
593
|
+
self.__val.report_public_error(e)
|
594
|
+
|
595
|
+
def read_table(self, table_name: str) -> _eapi.DATA_API:
|
596
|
+
|
597
|
+
_val.validate_signature(self.read_table, table_name)
|
598
|
+
|
599
|
+
self.__val.check_operation_available(self.read_table, self.__read_table)
|
600
|
+
self.__val.check_table_name_is_valid(table_name)
|
601
|
+
self.__val.check_table_name_not_reserved(table_name)
|
602
|
+
|
603
|
+
try:
|
604
|
+
raw_data = self.__read_table(table_name)
|
605
|
+
return self.__converter.from_internal(raw_data)
|
606
|
+
|
607
|
+
except _ex.EStorageRequest as e:
|
608
|
+
self.__val.report_public_error(e)
|
609
|
+
|
610
|
+
def native_read_query(self, query: str, **parameters) -> _eapi.DATA_API:
|
611
|
+
|
612
|
+
_val.validate_signature(self.native_read_query, query, **parameters)
|
613
|
+
|
614
|
+
self.__val.check_operation_available(self.native_read_query, self.__native_read_query)
|
615
|
+
|
616
|
+
# TODO: validate query and parameters
|
617
|
+
# Some validation is performed by the impl
|
618
|
+
|
619
|
+
try:
|
620
|
+
raw_data = self.__native_read_query(query, **parameters)
|
621
|
+
return self.__converter.from_internal(raw_data)
|
622
|
+
|
623
|
+
except _ex.EStorageRequest as e:
|
624
|
+
self.__val.report_public_error(e)
|
625
|
+
|
626
|
+
def write_table(self, table_name: str, dataset: _eapi.DATA_API):
|
627
|
+
|
628
|
+
_val.validate_signature(self.write_table, table_name, dataset)
|
629
|
+
|
630
|
+
self.__val.check_operation_available(self.read_table, self.__read_table)
|
631
|
+
self.__val.check_table_name_is_valid(table_name)
|
632
|
+
self.__val.check_table_name_not_reserved(table_name)
|
633
|
+
self.__val.check_provided_dataset_type(dataset, self.__converter.framework.api_type)
|
634
|
+
|
635
|
+
try:
|
636
|
+
raw_data = self.__converter.to_internal(dataset)
|
637
|
+
self.__write_table(table_name, raw_data)
|
638
|
+
|
639
|
+
except _ex.EStorageRequest as e:
|
640
|
+
self.__val.report_public_error(e)
|
641
|
+
|
642
|
+
|
643
|
+
class TracContextErrorReporter:
|
644
|
+
|
645
|
+
_VALID_IDENTIFIER = re.compile("^[a-zA-Z_]\\w*$",)
|
646
|
+
_RESERVED_IDENTIFIER = re.compile("^(trac_|_)\\w*")
|
647
|
+
|
648
|
+
def __init__(self, log: logging.Logger, checkout_directory: pathlib.Path):
|
649
|
+
|
650
|
+
self.__log = log
|
277
651
|
self.__checkout_directory = checkout_directory
|
278
652
|
|
653
|
+
def report_public_error(self, exception: Exception):
|
654
|
+
|
655
|
+
self._report_error(str(exception), exception)
|
656
|
+
|
279
657
|
def _report_error(self, message, cause: Exception = None):
|
280
658
|
|
281
659
|
full_stack = traceback.extract_stack()
|
@@ -292,12 +670,38 @@ class TracContextValidator:
|
|
292
670
|
else:
|
293
671
|
raise _ex.ERuntimeValidation(message)
|
294
672
|
|
673
|
+
@staticmethod
|
674
|
+
def _type_name(type_: type):
|
675
|
+
|
676
|
+
module = type_.__module__
|
677
|
+
|
678
|
+
if module is None or module == str.__class__.__module__ or module == tp.__name__:
|
679
|
+
return _val.type_name(type_, False)
|
680
|
+
else:
|
681
|
+
return _val.type_name(type_, True)
|
682
|
+
|
683
|
+
|
684
|
+
class TracContextValidator(TracContextErrorReporter):
|
685
|
+
|
686
|
+
def __init__(
|
687
|
+
self, log: logging.Logger,
|
688
|
+
model_def: _meta.ModelDefinition,
|
689
|
+
local_ctx: tp.Dict[str, tp.Any],
|
690
|
+
dynamic_outputs: tp.List[str],
|
691
|
+
checkout_directory: pathlib.Path):
|
692
|
+
|
693
|
+
super().__init__(log, checkout_directory)
|
694
|
+
|
695
|
+
self.__model_def = model_def
|
696
|
+
self.__local_ctx = local_ctx
|
697
|
+
self.__dynamic_outputs = dynamic_outputs
|
698
|
+
|
295
699
|
def check_param_valid_identifier(self, param_name: str):
|
296
700
|
|
297
701
|
if param_name is None:
|
298
702
|
self._report_error(f"Parameter name is null")
|
299
703
|
|
300
|
-
if not self.
|
704
|
+
if not self._VALID_IDENTIFIER.match(param_name):
|
301
705
|
self._report_error(f"Parameter name {param_name} is not a valid identifier")
|
302
706
|
|
303
707
|
def check_param_defined_in_model(self, param_name: str):
|
@@ -315,9 +719,17 @@ class TracContextValidator:
|
|
315
719
|
if dataset_name is None:
|
316
720
|
self._report_error(f"Dataset name is null")
|
317
721
|
|
318
|
-
if not self.
|
722
|
+
if not self._VALID_IDENTIFIER.match(dataset_name):
|
319
723
|
self._report_error(f"Dataset name {dataset_name} is not a valid identifier")
|
320
724
|
|
725
|
+
def check_dataset_not_defined_in_model(self, dataset_name: str):
|
726
|
+
|
727
|
+
if dataset_name in self.__model_def.inputs or dataset_name in self.__model_def.outputs:
|
728
|
+
self._report_error(f"Dataset {dataset_name} is already defined in the model")
|
729
|
+
|
730
|
+
if dataset_name in self.__model_def.parameters:
|
731
|
+
self._report_error(f"Dataset name {dataset_name} is already in use as a model parameter")
|
732
|
+
|
321
733
|
def check_dataset_defined_in_model(self, dataset_name: str):
|
322
734
|
|
323
735
|
if dataset_name not in self.__model_def.inputs and dataset_name not in self.__model_def.outputs:
|
@@ -325,17 +737,18 @@ class TracContextValidator:
|
|
325
737
|
|
326
738
|
def check_dataset_is_model_output(self, dataset_name: str):
|
327
739
|
|
328
|
-
if dataset_name not in self.__model_def.outputs:
|
740
|
+
if dataset_name not in self.__model_def.outputs and dataset_name not in self.__dynamic_outputs:
|
329
741
|
self._report_error(f"Dataset {dataset_name} is not defined as a model output")
|
330
742
|
|
331
743
|
def check_dataset_is_dynamic_output(self, dataset_name: str):
|
332
744
|
|
333
745
|
model_output: _meta.ModelOutputSchema = self.__model_def.outputs.get(dataset_name)
|
746
|
+
dynamic_output = dataset_name in self.__dynamic_outputs
|
334
747
|
|
335
|
-
if model_output is None:
|
748
|
+
if model_output is None and not dynamic_output:
|
336
749
|
self._report_error(f"Dataset {dataset_name} is not defined as a model output")
|
337
750
|
|
338
|
-
if not model_output.dynamic:
|
751
|
+
if model_output and not model_output.dynamic:
|
339
752
|
self._report_error(f"Model output {dataset_name} is not a dynamic output")
|
340
753
|
|
341
754
|
def check_dataset_available_in_context(self, item_name: str):
|
@@ -343,6 +756,11 @@ class TracContextValidator:
|
|
343
756
|
if item_name not in self.__local_ctx:
|
344
757
|
self._report_error(f"Dataset {item_name} is not available in the current context")
|
345
758
|
|
759
|
+
def check_dataset_not_available_in_context(self, item_name: str):
|
760
|
+
|
761
|
+
if item_name in self.__local_ctx:
|
762
|
+
self._report_error(f"Dataset {item_name} already exists in the current context")
|
763
|
+
|
346
764
|
def check_dataset_schema_defined(self, dataset_name: str, data_view: _data.DataView):
|
347
765
|
|
348
766
|
schema = data_view.trac_schema if data_view is not None else None
|
@@ -415,12 +833,114 @@ class TracContextValidator:
|
|
415
833
|
f"The object referenced by [{item_name}] in the current context has the wrong type" +
|
416
834
|
f" (expected {expected_type_name}, got {actual_type_name})")
|
417
835
|
|
418
|
-
|
419
|
-
def _type_name(type_: type):
|
836
|
+
def check_data_framework_args(self, framework: _eapi.DataFramework, framework_args: tp.Dict[str, tp.Any]):
|
420
837
|
|
421
|
-
|
838
|
+
expected_args = _data.DataConverter.get_framework_args(framework)
|
839
|
+
unexpected_args = list(filter(lambda arg: arg not in expected_args, framework_args.keys()))
|
840
|
+
|
841
|
+
if any(unexpected_args):
|
842
|
+
unknown_args = ", ".join(unexpected_args)
|
843
|
+
self._report_error(f"Using [{framework}], some arguments were not recognized: [{unknown_args}]")
|
844
|
+
|
845
|
+
for arg_name, arg_type in expected_args.items():
|
846
|
+
|
847
|
+
arg_value = framework_args.get(arg_name)
|
848
|
+
|
849
|
+
if _val.check_type(arg_type, arg_value):
|
850
|
+
continue
|
851
|
+
|
852
|
+
if arg_value is None:
|
853
|
+
self._report_error(f"Using [{framework}], required argument [{arg_name}] is missing")
|
854
|
+
|
855
|
+
else:
|
856
|
+
expected_type_name = self._type_name(arg_type)
|
857
|
+
actual_type_name = self._type_name(type(arg_value))
|
858
|
+
|
859
|
+
self._report_error(
|
860
|
+
f"Using [{framework}], argument [{arg_name}] has the wrong type" +
|
861
|
+
f" (expected {expected_type_name}, got {actual_type_name})")
|
862
|
+
|
863
|
+
def check_storage_valid_identifier(self, storage_key):
|
864
|
+
|
865
|
+
if storage_key is None:
|
866
|
+
self._report_error(f"Storage key is null")
|
867
|
+
|
868
|
+
if not self._VALID_IDENTIFIER.match(storage_key):
|
869
|
+
self._report_error(f"Storage key {storage_key} is not a valid identifier")
|
870
|
+
|
871
|
+
def check_storage_available(self, storage_map: tp.Dict, storage_key: str):
|
872
|
+
|
873
|
+
storage_instance = storage_map.get(storage_key)
|
874
|
+
|
875
|
+
if storage_instance is None:
|
876
|
+
self._report_error(f"Storage not available for storage key [{storage_key}]")
|
877
|
+
|
878
|
+
def check_storage_type(
|
879
|
+
self, storage_map: tp.Dict, storage_key: str,
|
880
|
+
storage_type: tp.Union[_eapi.TracFileStorage.__class__]):
|
881
|
+
|
882
|
+
storage_instance = storage_map.get(storage_key)
|
883
|
+
|
884
|
+
if not isinstance(storage_instance, storage_type):
|
885
|
+
if storage_type == _eapi.TracFileStorage:
|
886
|
+
self._report_error(f"Storage key [{storage_key}] refers to data storage, not file storage")
|
887
|
+
else:
|
888
|
+
self._report_error(f"Storage key [{storage_key}] refers to file storage, not data storage")
|
422
889
|
|
423
|
-
if module is None or module == str.__class__.__module__:
|
424
|
-
return type_.__qualname__
|
425
890
|
|
426
|
-
|
891
|
+
class TracStorageValidator(TracContextErrorReporter):
|
892
|
+
|
893
|
+
def __init__(self, log, checkout_directory, storage_key):
|
894
|
+
super().__init__(log, checkout_directory)
|
895
|
+
self.__storage_key = storage_key
|
896
|
+
|
897
|
+
def check_operation_available(self, public_func: tp.Callable, impl_func: tp.Callable):
|
898
|
+
|
899
|
+
if impl_func is None:
|
900
|
+
self._report_error(f"Operation [{public_func.__name__}] is not available for storage [{self.__storage_key}]")
|
901
|
+
|
902
|
+
def check_storage_path_is_valid(self, storage_path: str):
|
903
|
+
|
904
|
+
if _val.StorageValidator.storage_path_is_empty(storage_path):
|
905
|
+
self._report_error(f"Storage path is None or empty")
|
906
|
+
|
907
|
+
if _val.StorageValidator.storage_path_invalid(storage_path):
|
908
|
+
self._report_error(f"Storage path [{storage_path}] contains invalid characters")
|
909
|
+
|
910
|
+
if _val.StorageValidator.storage_path_not_relative(storage_path):
|
911
|
+
self._report_error(f"Storage path [{storage_path}] is not a relative path")
|
912
|
+
|
913
|
+
if _val.StorageValidator.storage_path_outside_root(storage_path):
|
914
|
+
self._report_error(f"Storage path [{storage_path}] is outside the storage root")
|
915
|
+
|
916
|
+
def check_storage_path_is_not_root(self, storage_path: str):
|
917
|
+
|
918
|
+
if _val.StorageValidator.storage_path_is_empty(storage_path):
|
919
|
+
self._report_error(f"Storage path [{storage_path}] is not allowed")
|
920
|
+
|
921
|
+
def check_table_name_is_valid(self, table_name: str):
|
922
|
+
|
923
|
+
if table_name is None:
|
924
|
+
self._report_error(f"Table name is null")
|
925
|
+
|
926
|
+
if not self._VALID_IDENTIFIER.match(table_name):
|
927
|
+
self._report_error(f"Table name {table_name} is not a valid identifier")
|
928
|
+
|
929
|
+
def check_table_name_not_reserved(self, table_name: str):
|
930
|
+
|
931
|
+
if self._RESERVED_IDENTIFIER.match(table_name):
|
932
|
+
self._report_error(f"Table name {table_name} is a reserved identifier")
|
933
|
+
|
934
|
+
def check_provided_dataset_type(self, dataset: tp.Any, expected_type: type):
|
935
|
+
|
936
|
+
if dataset is None:
|
937
|
+
self._report_error(f"Provided dataset is null")
|
938
|
+
|
939
|
+
if not isinstance(dataset, expected_type):
|
940
|
+
|
941
|
+
expected_type_name = self._type_name(expected_type)
|
942
|
+
actual_type_name = self._type_name(type(dataset))
|
943
|
+
|
944
|
+
self._report_error(
|
945
|
+
f"Provided dataset is the wrong type" +
|
946
|
+
f" (expected {expected_type_name}, got {actual_type_name})")
|