tracdap-runtime 0.6.4__py3-none-any.whl → 0.6.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. tracdap/rt/_exec/context.py +556 -36
  2. tracdap/rt/_exec/dev_mode.py +320 -198
  3. tracdap/rt/_exec/engine.py +331 -62
  4. tracdap/rt/_exec/functions.py +151 -22
  5. tracdap/rt/_exec/graph.py +47 -13
  6. tracdap/rt/_exec/graph_builder.py +383 -175
  7. tracdap/rt/_exec/runtime.py +7 -5
  8. tracdap/rt/_impl/config_parser.py +11 -4
  9. tracdap/rt/_impl/data.py +329 -152
  10. tracdap/rt/_impl/ext/__init__.py +13 -0
  11. tracdap/rt/_impl/ext/sql.py +116 -0
  12. tracdap/rt/_impl/ext/storage.py +57 -0
  13. tracdap/rt/_impl/grpc/tracdap/metadata/job_pb2.py +82 -30
  14. tracdap/rt/_impl/grpc/tracdap/metadata/job_pb2.pyi +155 -2
  15. tracdap/rt/_impl/grpc/tracdap/metadata/model_pb2.py +12 -10
  16. tracdap/rt/_impl/grpc/tracdap/metadata/model_pb2.pyi +14 -2
  17. tracdap/rt/_impl/grpc/tracdap/metadata/resource_pb2.py +29 -0
  18. tracdap/rt/_impl/grpc/tracdap/metadata/resource_pb2.pyi +16 -0
  19. tracdap/rt/_impl/models.py +8 -0
  20. tracdap/rt/_impl/static_api.py +29 -0
  21. tracdap/rt/_impl/storage.py +39 -27
  22. tracdap/rt/_impl/util.py +10 -0
  23. tracdap/rt/_impl/validation.py +140 -18
  24. tracdap/rt/_plugins/repo_git.py +1 -1
  25. tracdap/rt/_plugins/storage_sql.py +417 -0
  26. tracdap/rt/_plugins/storage_sql_dialects.py +117 -0
  27. tracdap/rt/_version.py +1 -1
  28. tracdap/rt/api/experimental.py +267 -0
  29. tracdap/rt/api/hook.py +14 -0
  30. tracdap/rt/api/model_api.py +48 -6
  31. tracdap/rt/config/__init__.py +2 -2
  32. tracdap/rt/config/common.py +6 -0
  33. tracdap/rt/metadata/__init__.py +29 -20
  34. tracdap/rt/metadata/job.py +99 -0
  35. tracdap/rt/metadata/model.py +18 -0
  36. tracdap/rt/metadata/resource.py +24 -0
  37. {tracdap_runtime-0.6.4.dist-info → tracdap_runtime-0.6.6.dist-info}/METADATA +5 -1
  38. {tracdap_runtime-0.6.4.dist-info → tracdap_runtime-0.6.6.dist-info}/RECORD +41 -32
  39. {tracdap_runtime-0.6.4.dist-info → tracdap_runtime-0.6.6.dist-info}/WHEEL +1 -1
  40. {tracdap_runtime-0.6.4.dist-info → tracdap_runtime-0.6.6.dist-info}/LICENSE +0 -0
  41. {tracdap_runtime-0.6.4.dist-info → tracdap_runtime-0.6.6.dist-info}/top_level.txt +0 -0
@@ -19,13 +19,13 @@ import typing as tp
19
19
  import re
20
20
  import traceback
21
21
 
22
- import pandas as pd
23
-
24
22
  import tracdap.rt.api as _api
23
+ import tracdap.rt.api.experimental as _eapi
25
24
  import tracdap.rt.metadata as _meta
26
25
  import tracdap.rt.exceptions as _ex
27
26
  import tracdap.rt._impl.type_system as _types # noqa
28
27
  import tracdap.rt._impl.data as _data # noqa
28
+ import tracdap.rt._impl.storage as _storage # noqa
29
29
  import tracdap.rt._impl.util as _util # noqa
30
30
  import tracdap.rt._impl.validation as _val # noqa
31
31
 
@@ -55,12 +55,11 @@ class TracContextImpl(_api.TracContext):
55
55
  Output views will contain schemas but no data.
56
56
  """
57
57
 
58
- __DEFAULT_TEMPORAL_OBJECTS = False
59
-
60
58
  def __init__(self,
61
59
  model_def: _meta.ModelDefinition,
62
60
  model_class: _api.TracModel.__class__,
63
61
  local_ctx: tp.Dict[str, tp.Any],
62
+ dynamic_outputs: tp.List[str] = None,
64
63
  checkout_directory: pathlib.Path = None):
65
64
 
66
65
  self.__ctx_log = _util.logger_for_object(self)
@@ -68,12 +67,14 @@ class TracContextImpl(_api.TracContext):
68
67
 
69
68
  self.__model_def = model_def
70
69
  self.__model_class = model_class
71
- self.__local_ctx = local_ctx or {}
70
+ self.__local_ctx = local_ctx if local_ctx is not None else {}
71
+ self.__dynamic_outputs = dynamic_outputs if dynamic_outputs is not None else []
72
72
 
73
73
  self.__val = TracContextValidator(
74
74
  self.__ctx_log,
75
75
  self.__model_def,
76
76
  self.__local_ctx,
77
+ self.__dynamic_outputs,
77
78
  checkout_directory)
78
79
 
79
80
  def get_parameter(self, parameter_name: str) -> tp.Any:
@@ -131,18 +132,22 @@ class TracContextImpl(_api.TracContext):
131
132
  else:
132
133
  return copy.deepcopy(data_view.trac_schema)
133
134
 
134
- def get_pandas_table(self, dataset_name: str, use_temporal_objects: tp.Optional[bool] = None) -> pd.DataFrame:
135
+ def get_table(self, dataset_name: str, framework: _eapi.DataFramework[_eapi.DATA_API], **framework_args) -> _eapi.DATA_API:
135
136
 
136
- _val.validate_signature(self.get_pandas_table, dataset_name, use_temporal_objects)
137
+ _val.validate_signature(self.get_table, dataset_name, framework)
138
+ _val.require_package(framework.protocol_name, framework.api_type)
137
139
 
138
140
  self.__val.check_dataset_valid_identifier(dataset_name)
139
141
  self.__val.check_dataset_defined_in_model(dataset_name)
140
142
  self.__val.check_dataset_available_in_context(dataset_name)
143
+ self.__val.check_data_framework_args(framework, framework_args)
141
144
 
142
145
  static_schema = self.__get_static_schema(self.__model_def, dataset_name)
143
146
  data_view = self.__local_ctx.get(dataset_name)
144
147
  part_key = _data.DataPartKey.for_root()
145
148
 
149
+ converter = _data.DataConverter.for_framework(framework, **framework_args)
150
+
146
151
  self.__val.check_context_object_type(dataset_name, data_view, _data.DataView)
147
152
  self.__val.check_dataset_schema_defined(dataset_name, data_view)
148
153
  self.__val.check_dataset_part_present(dataset_name, data_view, part_key)
@@ -155,10 +160,18 @@ class TracContextImpl(_api.TracContext):
155
160
  else:
156
161
  schema = data_view.arrow_schema
157
162
 
158
- if use_temporal_objects is None:
159
- use_temporal_objects = self.__DEFAULT_TEMPORAL_OBJECTS
163
+ table = _data.DataMapping.view_to_arrow(data_view, part_key)
164
+
165
+ # Data conformance is applied automatically inside the converter, if schema != None
166
+ return converter.from_internal(table, schema)
160
167
 
161
- return _data.DataMapping.view_to_pandas(data_view, part_key, schema, use_temporal_objects)
168
+ def get_pandas_table(self, dataset_name: str, use_temporal_objects: tp.Optional[bool] = None) -> "_data.pandas.DataFrame":
169
+
170
+ return self.get_table(dataset_name, _eapi.PANDAS, use_temporal_objects=use_temporal_objects)
171
+
172
+ def get_polars_table(self, dataset_name: str) -> "_data.polars.DataFrame":
173
+
174
+ return self.get_table(dataset_name, _eapi.POLARS)
162
175
 
163
176
  def put_schema(self, dataset_name: str, schema: _meta.SchemaDefinition):
164
177
 
@@ -190,18 +203,29 @@ class TracContextImpl(_api.TracContext):
190
203
 
191
204
  self.__local_ctx[dataset_name] = updated_view
192
205
 
193
- def put_pandas_table(self, dataset_name: str, dataset: pd.DataFrame):
206
+ def put_table(
207
+ self, dataset_name: str, dataset: _eapi.DATA_API,
208
+ framework: tp.Optional[_eapi.DataFramework[_eapi.DATA_API]] = None,
209
+ **framework_args):
210
+
211
+ _val.validate_signature(self.put_table, dataset_name, dataset, framework)
194
212
 
195
- _val.validate_signature(self.put_pandas_table, dataset_name, dataset)
213
+ if framework is None:
214
+ framework = _data.DataConverter.get_framework(dataset)
215
+
216
+ _val.require_package(framework.protocol_name, framework.api_type)
196
217
 
197
218
  self.__val.check_dataset_valid_identifier(dataset_name)
198
219
  self.__val.check_dataset_is_model_output(dataset_name)
199
- self.__val.check_provided_dataset_type(dataset, pd.DataFrame)
220
+ self.__val.check_provided_dataset_type(dataset, framework.api_type)
221
+ self.__val.check_data_framework_args(framework, framework_args)
200
222
 
201
223
  static_schema = self.__get_static_schema(self.__model_def, dataset_name)
202
224
  data_view = self.__local_ctx.get(dataset_name)
203
225
  part_key = _data.DataPartKey.for_root()
204
226
 
227
+ converter = _data.DataConverter.for_framework(framework)
228
+
205
229
  if data_view is None:
206
230
  if static_schema is not None:
207
231
  data_view = _data.DataView.for_trac_schema(static_schema)
@@ -219,13 +243,22 @@ class TracContextImpl(_api.TracContext):
219
243
  else:
220
244
  schema = data_view.arrow_schema
221
245
 
222
- # Data conformance is applied inside these conversion functions
246
+ # Data conformance is applied automatically inside the converter, if schema != None
247
+ table = converter.to_internal(dataset, schema)
248
+ item = _data.DataItem(schema, table)
223
249
 
224
- updated_item = _data.DataMapping.pandas_to_item(dataset, schema)
225
- updated_view = _data.DataMapping.add_item_to_view(data_view, part_key, updated_item)
250
+ updated_view = _data.DataMapping.add_item_to_view(data_view, part_key, item)
226
251
 
227
252
  self.__local_ctx[dataset_name] = updated_view
228
253
 
254
+ def put_pandas_table(self, dataset_name: str, dataset: "_data.pandas.DataFrame"):
255
+
256
+ self.put_table(dataset_name, dataset, _eapi.PANDAS)
257
+
258
+ def put_polars_table(self, dataset_name: str, dataset: "_data.polars.DataFrame"):
259
+
260
+ self.put_table(dataset_name, dataset, _eapi.POLARS)
261
+
229
262
  def log(self) -> logging.Logger:
230
263
 
231
264
  _val.validate_signature(self.log)
@@ -260,22 +293,367 @@ class TracContextImpl(_api.TracContext):
260
293
  return schema_def
261
294
 
262
295
 
263
- class TracContextValidator:
264
-
265
- __VALID_IDENTIFIER = re.compile("^[a-zA-Z_]\\w*$",)
266
- __RESERVED_IDENTIFIER = re.compile("^(trac_|_)\\w*")
296
+ class TracDataContextImpl(TracContextImpl, _eapi.TracDataContext):
267
297
 
268
298
  def __init__(
269
- self, log: logging.Logger,
270
- model_def: _meta.ModelDefinition,
271
- local_ctx: tp.Dict[str, tp.Any],
272
- checkout_directory: pathlib.Path):
299
+ self, model_def: _meta.ModelDefinition, model_class: _api.TracModel.__class__,
300
+ local_ctx: tp.Dict[str, tp.Any], dynamic_outputs: tp.List[str],
301
+ storage_map: tp.Dict[str, tp.Union[_eapi.TracFileStorage, _eapi.TracDataStorage]],
302
+ checkout_directory: pathlib.Path = None):
303
+
304
+ super().__init__(model_def, model_class, local_ctx, dynamic_outputs, checkout_directory)
273
305
 
274
- self.__log = log
275
306
  self.__model_def = model_def
276
307
  self.__local_ctx = local_ctx
308
+ self.__dynamic_outputs = dynamic_outputs
309
+ self.__storage_map = storage_map
310
+ self.__checkout_directory = checkout_directory
311
+
312
+ self.__val = self._TracContextImpl__val # noqa
313
+
314
+ def get_file_storage(self, storage_key: str) -> _eapi.TracFileStorage:
315
+
316
+ _val.validate_signature(self.get_file_storage, storage_key)
317
+
318
+ self.__val.check_storage_valid_identifier(storage_key)
319
+ self.__val.check_storage_available(self.__storage_map, storage_key)
320
+ self.__val.check_storage_type(self.__storage_map, storage_key, _eapi.TracFileStorage)
321
+
322
+ return self.__storage_map[storage_key]
323
+
324
+ def get_data_storage(
325
+ self, storage_key: str,
326
+ framework: _eapi.DataFramework[_eapi.DATA_API],
327
+ **framework_args) -> _eapi.TracDataStorage[_eapi.DATA_API]:
328
+
329
+ _val.validate_signature(self.get_file_storage, storage_key)
330
+
331
+ self.__val.check_storage_valid_identifier(storage_key)
332
+ self.__val.check_storage_available(self.__storage_map, storage_key)
333
+ self.__val.check_storage_type(self.__storage_map, storage_key, _eapi.TracDataStorage)
334
+ self.__val.check_data_framework_args(framework, framework_args)
335
+
336
+ storage = self.__storage_map[storage_key]
337
+ converter = _data.DataConverter.for_framework(framework, **framework_args)
338
+
339
+ # Create a shallow copy of the storage impl with a converter for the requested data framework
340
+ # At some point we will need a storage factory class, bc the internal data API can also be different
341
+ storage = copy.copy(storage)
342
+ storage._TracDataStorageImpl__converter = converter
343
+
344
+ return storage
345
+
346
+ def add_data_import(self, dataset_name: str):
347
+
348
+ _val.validate_signature(self.add_data_import, dataset_name)
349
+
350
+ self.__val.check_dataset_valid_identifier(dataset_name)
351
+ self.__val.check_dataset_not_defined_in_model(dataset_name)
352
+ self.__val.check_dataset_not_available_in_context(dataset_name)
353
+
354
+ self.__local_ctx[dataset_name] = _data.DataView.create_empty()
355
+ self.__dynamic_outputs.append(dataset_name)
356
+
357
+ def set_source_metadata(self, dataset_name: str, storage_key: str, source_info: tp.Union[_eapi.FileStat, str]):
358
+
359
+ _val.validate_signature(self.set_source_metadata, dataset_name, storage_key, source_info)
360
+
361
+ self.__val.check_dataset_valid_identifier(dataset_name)
362
+ self.__val.check_dataset_available_in_context(dataset_name)
363
+ self.__val.check_storage_valid_identifier(storage_key)
364
+ self.__val.check_storage_available(self.__storage_map, storage_key)
365
+
366
+ storage = self.__storage_map[storage_key]
367
+
368
+ if isinstance(storage, _eapi.TracFileStorage):
369
+ if not isinstance(source_info, _eapi.FileStat):
370
+ self.__val.report_public_error(f"Expected storage_info to be a FileStat, [{storage_key}] refers to file storage")
371
+
372
+ if isinstance(storage, _eapi.TracDataStorage):
373
+ if not isinstance(source_info, str):
374
+ self.__val.report_public_error(f"Expected storage_info to be a table name, [{storage_key}] refers to dadta storage")
375
+
376
+ pass # Not implemented yet, only required when imports are sent back to the platform
377
+
378
+ def set_attribute(self, dataset_name: str, attribute_name: str, value: tp.Any):
379
+
380
+ _val.validate_signature(self.set_attribute, dataset_name, attribute_name, value)
381
+
382
+ pass # Not implemented yet, only required when imports are sent back to the platform
383
+
384
+ def set_schema(self, dataset_name: str, schema: _meta.SchemaDefinition):
385
+
386
+ _val.validate_signature(self.set_schema, dataset_name, schema)
387
+
388
+ # Forward to existing method (these should be swapped round)
389
+ self.put_schema(dataset_name, schema)
390
+
391
+
392
+ class TracFileStorageImpl(_eapi.TracFileStorage):
393
+
394
+ def __init__(self, storage_key: str, storage_impl: _storage.IFileStorage, write_access: bool, checkout_directory):
395
+
396
+ self.__storage_key = storage_key
397
+
398
+ self.__exists = lambda sp: storage_impl.exists(sp)
399
+ self.__size = lambda sp: storage_impl.size(sp)
400
+ self.__stat = lambda sp: storage_impl.stat(sp)
401
+ self.__ls = lambda sp, rec: storage_impl.ls(sp, rec)
402
+ self.__read_byte_stream = lambda sp: storage_impl.read_byte_stream(sp)
403
+
404
+ if write_access:
405
+ self.__mkdir = lambda sp, rec: storage_impl.mkdir(sp, rec)
406
+ self.__rm = lambda sp: storage_impl.rm(sp)
407
+ self.__rmdir = lambda sp: storage_impl.rmdir(sp)
408
+ self.__write_byte_stream = lambda sp: storage_impl.write_byte_stream(sp)
409
+ else:
410
+ self.__mkdir = None
411
+ self.__rm = None
412
+ self.__rmdir = None
413
+ self.__write_byte_stream = None
414
+
415
+ self.__log = _util.logger_for_object(self)
416
+ self.__val = TracStorageValidator(self.__log, checkout_directory, self.__storage_key)
417
+
418
+ def get_storage_key(self) -> str:
419
+
420
+ _val.validate_signature(self.get_storage_key)
421
+
422
+ return self.__storage_key
423
+
424
+ def exists(self, storage_path: str) -> bool:
425
+
426
+ _val.validate_signature(self.exists, storage_path)
427
+
428
+ self.__val.check_operation_available(self.exists, self.__exists)
429
+ self.__val.check_storage_path_is_valid(storage_path)
430
+
431
+ return self.__exists(storage_path)
432
+
433
+ def size(self, storage_path: str) -> int:
434
+
435
+ _val.validate_signature(self.size, storage_path)
436
+
437
+ self.__val.check_operation_available(self.size, self.__size)
438
+ self.__val.check_storage_path_is_valid(storage_path)
439
+
440
+ return self.__size(storage_path)
441
+
442
+ def stat(self, storage_path: str) -> _eapi.FileStat:
443
+
444
+ _val.validate_signature(self.stat, storage_path)
445
+
446
+ self.__val.check_operation_available(self.stat, self.__stat)
447
+ self.__val.check_storage_path_is_valid(storage_path)
448
+
449
+ stat = self.__stat(storage_path)
450
+ return _eapi.FileStat(**stat.__dict__)
451
+
452
+ def ls(self, storage_path: str, recursive: bool = False) -> tp.List[_eapi.FileStat]:
453
+
454
+ _val.validate_signature(self.ls, storage_path, recursive)
455
+
456
+ self.__val.check_operation_available(self.ls, self.__ls)
457
+ self.__val.check_storage_path_is_valid(storage_path)
458
+
459
+ listing = self.__ls(storage_path, recursive)
460
+ return list(_eapi.FileStat(**stat.__dict__) for stat in listing)
461
+
462
+ def mkdir(self, storage_path: str, recursive: bool = False):
463
+
464
+ _val.validate_signature(self.mkdir, storage_path, recursive)
465
+
466
+ self.__val.check_operation_available(self.mkdir, self.__mkdir)
467
+ self.__val.check_storage_path_is_valid(storage_path)
468
+ self.__val.check_storage_path_is_not_root(storage_path)
469
+
470
+ self.__mkdir(storage_path, recursive)
471
+
472
+ def rm(self, storage_path: str):
473
+
474
+ _val.validate_signature(self.rm, storage_path)
475
+
476
+ self.__val.check_operation_available(self.rm, self.__rm)
477
+ self.__val.check_storage_path_is_valid(storage_path)
478
+ self.__val.check_storage_path_is_not_root(storage_path)
479
+
480
+ self.__rm(storage_path)
481
+
482
+ def rmdir(self, storage_path: str):
483
+
484
+ _val.validate_signature(self.rmdir, storage_path)
485
+
486
+ self.__val.check_operation_available(self.rmdir, self.__rmdir)
487
+ self.__val.check_storage_path_is_valid(storage_path)
488
+ self.__val.check_storage_path_is_not_root(storage_path)
489
+
490
+ self.__rmdir(storage_path)
491
+
492
+ def read_byte_stream(self, storage_path: str) -> tp.ContextManager[tp.BinaryIO]:
493
+
494
+ _val.validate_signature(self.read_byte_stream, storage_path)
495
+
496
+ self.__val.check_operation_available(self.read_byte_stream, self.__read_byte_stream)
497
+ self.__val.check_storage_path_is_valid(storage_path)
498
+
499
+ return self.__read_byte_stream(storage_path)
500
+
501
+ def read_bytes(self, storage_path: str) -> bytes:
502
+
503
+ _val.validate_signature(self.read_bytes, storage_path)
504
+
505
+ self.__val.check_operation_available(self.read_bytes, self.__read_byte_stream)
506
+ self.__val.check_storage_path_is_valid(storage_path)
507
+
508
+ return super().read_bytes(storage_path)
509
+
510
+ def write_byte_stream(self, storage_path: str) -> tp.ContextManager[tp.BinaryIO]:
511
+
512
+ _val.validate_signature(self.write_byte_stream, storage_path)
513
+
514
+ self.__val.check_operation_available(self.write_byte_stream, self.__write_byte_stream)
515
+ self.__val.check_storage_path_is_valid(storage_path)
516
+ self.__val.check_storage_path_is_not_root(storage_path)
517
+
518
+ return self.__write_byte_stream(storage_path)
519
+
520
+ def write_bytes(self, storage_path: str, data: bytes):
521
+
522
+ _val.validate_signature(self.write_bytes, storage_path)
523
+
524
+ self.__val.check_operation_available(self.write_bytes, self.__write_byte_stream)
525
+ self.__val.check_storage_path_is_valid(storage_path)
526
+ self.__val.check_storage_path_is_not_root(storage_path)
527
+
528
+ super().write_bytes(storage_path, data)
529
+
530
+
531
+ class TracDataStorageImpl(_eapi.TracDataStorage[_eapi.DATA_API]):
532
+
533
+ def __init__(
534
+ self, storage_key: str, storage_impl: _storage.IDataStorageBase[_data.T_INTERNAL_DATA, _data.T_INTERNAL_SCHEMA],
535
+ data_converter: _data.DataConverter[_eapi.DATA_API, _data.T_INTERNAL_DATA, _data.T_INTERNAL_SCHEMA],
536
+ write_access: bool, checkout_directory):
537
+
538
+ self.__storage_key = storage_key
539
+ self.__converter = data_converter
540
+
541
+ self.__has_table = lambda tn: storage_impl.has_table(tn)
542
+ self.__list_tables = lambda: storage_impl.list_tables()
543
+ self.__read_table = lambda tn: storage_impl.read_table(tn)
544
+ self.__native_read_query = lambda q, ps: storage_impl.native_read_query(q, **ps)
545
+
546
+ if write_access:
547
+ self.__create_table = lambda tn, s: storage_impl.create_table(tn, s)
548
+ self.__write_table = lambda tn, ds: storage_impl.write_table(tn, ds)
549
+ else:
550
+ self.__create_table = None
551
+ self.__write_table = None
552
+
553
+ self.__log = _util.logger_for_object(self)
554
+ self.__val = TracStorageValidator(self.__log, checkout_directory, self.__storage_key)
555
+
556
+ def has_table(self, table_name: str) -> bool:
557
+
558
+ _val.validate_signature(self.has_table, table_name)
559
+
560
+ self.__val.check_operation_available(self.has_table, self.__has_table)
561
+ self.__val.check_table_name_is_valid(table_name)
562
+ self.__val.check_storage_path_is_valid(table_name)
563
+
564
+ try:
565
+ return self.__has_table(table_name)
566
+ except _ex.EStorageRequest as e:
567
+ self.__val.report_public_error(e)
568
+
569
+ def list_tables(self) -> tp.List[str]:
570
+
571
+ _val.validate_signature(self.list_tables)
572
+
573
+ self.__val.check_operation_available(self.list_tables, self.__list_tables)
574
+
575
+ try:
576
+ return self.__list_tables()
577
+ except _ex.EStorageRequest as e:
578
+ self.__val.report_public_error(e)
579
+
580
+ def create_table(self, table_name: str, schema: _api.SchemaDefinition):
581
+
582
+ _val.validate_signature(self.create_table, table_name, schema)
583
+
584
+ self.__val.check_operation_available(self.create_table, self.__create_table)
585
+ self.__val.check_table_name_is_valid(table_name)
586
+ self.__val.check_storage_path_is_valid(table_name)
587
+
588
+ arrow_schema = _data.DataMapping.trac_to_arrow_schema(schema)
589
+
590
+ try:
591
+ self.__create_table(table_name, arrow_schema)
592
+ except _ex.EStorageRequest as e:
593
+ self.__val.report_public_error(e)
594
+
595
+ def read_table(self, table_name: str) -> _eapi.DATA_API:
596
+
597
+ _val.validate_signature(self.read_table, table_name)
598
+
599
+ self.__val.check_operation_available(self.read_table, self.__read_table)
600
+ self.__val.check_table_name_is_valid(table_name)
601
+ self.__val.check_table_name_not_reserved(table_name)
602
+
603
+ try:
604
+ raw_data = self.__read_table(table_name)
605
+ return self.__converter.from_internal(raw_data)
606
+
607
+ except _ex.EStorageRequest as e:
608
+ self.__val.report_public_error(e)
609
+
610
+ def native_read_query(self, query: str, **parameters) -> _eapi.DATA_API:
611
+
612
+ _val.validate_signature(self.native_read_query, query, **parameters)
613
+
614
+ self.__val.check_operation_available(self.native_read_query, self.__native_read_query)
615
+
616
+ # TODO: validate query and parameters
617
+ # Some validation is performed by the impl
618
+
619
+ try:
620
+ raw_data = self.__native_read_query(query, **parameters)
621
+ return self.__converter.from_internal(raw_data)
622
+
623
+ except _ex.EStorageRequest as e:
624
+ self.__val.report_public_error(e)
625
+
626
+ def write_table(self, table_name: str, dataset: _eapi.DATA_API):
627
+
628
+ _val.validate_signature(self.write_table, table_name, dataset)
629
+
630
+ self.__val.check_operation_available(self.read_table, self.__read_table)
631
+ self.__val.check_table_name_is_valid(table_name)
632
+ self.__val.check_table_name_not_reserved(table_name)
633
+ self.__val.check_provided_dataset_type(dataset, self.__converter.framework.api_type)
634
+
635
+ try:
636
+ raw_data = self.__converter.to_internal(dataset)
637
+ self.__write_table(table_name, raw_data)
638
+
639
+ except _ex.EStorageRequest as e:
640
+ self.__val.report_public_error(e)
641
+
642
+
643
+ class TracContextErrorReporter:
644
+
645
+ _VALID_IDENTIFIER = re.compile("^[a-zA-Z_]\\w*$",)
646
+ _RESERVED_IDENTIFIER = re.compile("^(trac_|_)\\w*")
647
+
648
+ def __init__(self, log: logging.Logger, checkout_directory: pathlib.Path):
649
+
650
+ self.__log = log
277
651
  self.__checkout_directory = checkout_directory
278
652
 
653
+ def report_public_error(self, exception: Exception):
654
+
655
+ self._report_error(str(exception), exception)
656
+
279
657
  def _report_error(self, message, cause: Exception = None):
280
658
 
281
659
  full_stack = traceback.extract_stack()
@@ -292,12 +670,38 @@ class TracContextValidator:
292
670
  else:
293
671
  raise _ex.ERuntimeValidation(message)
294
672
 
673
+ @staticmethod
674
+ def _type_name(type_: type):
675
+
676
+ module = type_.__module__
677
+
678
+ if module is None or module == str.__class__.__module__ or module == tp.__name__:
679
+ return _val.type_name(type_, False)
680
+ else:
681
+ return _val.type_name(type_, True)
682
+
683
+
684
+ class TracContextValidator(TracContextErrorReporter):
685
+
686
+ def __init__(
687
+ self, log: logging.Logger,
688
+ model_def: _meta.ModelDefinition,
689
+ local_ctx: tp.Dict[str, tp.Any],
690
+ dynamic_outputs: tp.List[str],
691
+ checkout_directory: pathlib.Path):
692
+
693
+ super().__init__(log, checkout_directory)
694
+
695
+ self.__model_def = model_def
696
+ self.__local_ctx = local_ctx
697
+ self.__dynamic_outputs = dynamic_outputs
698
+
295
699
  def check_param_valid_identifier(self, param_name: str):
296
700
 
297
701
  if param_name is None:
298
702
  self._report_error(f"Parameter name is null")
299
703
 
300
- if not self.__VALID_IDENTIFIER.match(param_name):
704
+ if not self._VALID_IDENTIFIER.match(param_name):
301
705
  self._report_error(f"Parameter name {param_name} is not a valid identifier")
302
706
 
303
707
  def check_param_defined_in_model(self, param_name: str):
@@ -315,9 +719,17 @@ class TracContextValidator:
315
719
  if dataset_name is None:
316
720
  self._report_error(f"Dataset name is null")
317
721
 
318
- if not self.__VALID_IDENTIFIER.match(dataset_name):
722
+ if not self._VALID_IDENTIFIER.match(dataset_name):
319
723
  self._report_error(f"Dataset name {dataset_name} is not a valid identifier")
320
724
 
725
+ def check_dataset_not_defined_in_model(self, dataset_name: str):
726
+
727
+ if dataset_name in self.__model_def.inputs or dataset_name in self.__model_def.outputs:
728
+ self._report_error(f"Dataset {dataset_name} is already defined in the model")
729
+
730
+ if dataset_name in self.__model_def.parameters:
731
+ self._report_error(f"Dataset name {dataset_name} is already in use as a model parameter")
732
+
321
733
  def check_dataset_defined_in_model(self, dataset_name: str):
322
734
 
323
735
  if dataset_name not in self.__model_def.inputs and dataset_name not in self.__model_def.outputs:
@@ -325,17 +737,18 @@ class TracContextValidator:
325
737
 
326
738
  def check_dataset_is_model_output(self, dataset_name: str):
327
739
 
328
- if dataset_name not in self.__model_def.outputs:
740
+ if dataset_name not in self.__model_def.outputs and dataset_name not in self.__dynamic_outputs:
329
741
  self._report_error(f"Dataset {dataset_name} is not defined as a model output")
330
742
 
331
743
  def check_dataset_is_dynamic_output(self, dataset_name: str):
332
744
 
333
745
  model_output: _meta.ModelOutputSchema = self.__model_def.outputs.get(dataset_name)
746
+ dynamic_output = dataset_name in self.__dynamic_outputs
334
747
 
335
- if model_output is None:
748
+ if model_output is None and not dynamic_output:
336
749
  self._report_error(f"Dataset {dataset_name} is not defined as a model output")
337
750
 
338
- if not model_output.dynamic:
751
+ if model_output and not model_output.dynamic:
339
752
  self._report_error(f"Model output {dataset_name} is not a dynamic output")
340
753
 
341
754
  def check_dataset_available_in_context(self, item_name: str):
@@ -343,6 +756,11 @@ class TracContextValidator:
343
756
  if item_name not in self.__local_ctx:
344
757
  self._report_error(f"Dataset {item_name} is not available in the current context")
345
758
 
759
+ def check_dataset_not_available_in_context(self, item_name: str):
760
+
761
+ if item_name in self.__local_ctx:
762
+ self._report_error(f"Dataset {item_name} already exists in the current context")
763
+
346
764
  def check_dataset_schema_defined(self, dataset_name: str, data_view: _data.DataView):
347
765
 
348
766
  schema = data_view.trac_schema if data_view is not None else None
@@ -415,12 +833,114 @@ class TracContextValidator:
415
833
  f"The object referenced by [{item_name}] in the current context has the wrong type" +
416
834
  f" (expected {expected_type_name}, got {actual_type_name})")
417
835
 
418
- @staticmethod
419
- def _type_name(type_: type):
836
+ def check_data_framework_args(self, framework: _eapi.DataFramework, framework_args: tp.Dict[str, tp.Any]):
420
837
 
421
- module = type_.__module__
838
+ expected_args = _data.DataConverter.get_framework_args(framework)
839
+ unexpected_args = list(filter(lambda arg: arg not in expected_args, framework_args.keys()))
840
+
841
+ if any(unexpected_args):
842
+ unknown_args = ", ".join(unexpected_args)
843
+ self._report_error(f"Using [{framework}], some arguments were not recognized: [{unknown_args}]")
844
+
845
+ for arg_name, arg_type in expected_args.items():
846
+
847
+ arg_value = framework_args.get(arg_name)
848
+
849
+ if _val.check_type(arg_type, arg_value):
850
+ continue
851
+
852
+ if arg_value is None:
853
+ self._report_error(f"Using [{framework}], required argument [{arg_name}] is missing")
854
+
855
+ else:
856
+ expected_type_name = self._type_name(arg_type)
857
+ actual_type_name = self._type_name(type(arg_value))
858
+
859
+ self._report_error(
860
+ f"Using [{framework}], argument [{arg_name}] has the wrong type" +
861
+ f" (expected {expected_type_name}, got {actual_type_name})")
862
+
863
+ def check_storage_valid_identifier(self, storage_key):
864
+
865
+ if storage_key is None:
866
+ self._report_error(f"Storage key is null")
867
+
868
+ if not self._VALID_IDENTIFIER.match(storage_key):
869
+ self._report_error(f"Storage key {storage_key} is not a valid identifier")
870
+
871
+ def check_storage_available(self, storage_map: tp.Dict, storage_key: str):
872
+
873
+ storage_instance = storage_map.get(storage_key)
874
+
875
+ if storage_instance is None:
876
+ self._report_error(f"Storage not available for storage key [{storage_key}]")
877
+
878
+ def check_storage_type(
879
+ self, storage_map: tp.Dict, storage_key: str,
880
+ storage_type: tp.Union[_eapi.TracFileStorage.__class__]):
881
+
882
+ storage_instance = storage_map.get(storage_key)
883
+
884
+ if not isinstance(storage_instance, storage_type):
885
+ if storage_type == _eapi.TracFileStorage:
886
+ self._report_error(f"Storage key [{storage_key}] refers to data storage, not file storage")
887
+ else:
888
+ self._report_error(f"Storage key [{storage_key}] refers to file storage, not data storage")
422
889
 
423
- if module is None or module == str.__class__.__module__:
424
- return type_.__qualname__
425
890
 
426
- return module + '.' + type_.__name__
891
+ class TracStorageValidator(TracContextErrorReporter):
892
+
893
+ def __init__(self, log, checkout_directory, storage_key):
894
+ super().__init__(log, checkout_directory)
895
+ self.__storage_key = storage_key
896
+
897
+ def check_operation_available(self, public_func: tp.Callable, impl_func: tp.Callable):
898
+
899
+ if impl_func is None:
900
+ self._report_error(f"Operation [{public_func.__name__}] is not available for storage [{self.__storage_key}]")
901
+
902
+ def check_storage_path_is_valid(self, storage_path: str):
903
+
904
+ if _val.StorageValidator.storage_path_is_empty(storage_path):
905
+ self._report_error(f"Storage path is None or empty")
906
+
907
+ if _val.StorageValidator.storage_path_invalid(storage_path):
908
+ self._report_error(f"Storage path [{storage_path}] contains invalid characters")
909
+
910
+ if _val.StorageValidator.storage_path_not_relative(storage_path):
911
+ self._report_error(f"Storage path [{storage_path}] is not a relative path")
912
+
913
+ if _val.StorageValidator.storage_path_outside_root(storage_path):
914
+ self._report_error(f"Storage path [{storage_path}] is outside the storage root")
915
+
916
+ def check_storage_path_is_not_root(self, storage_path: str):
917
+
918
+ if _val.StorageValidator.storage_path_is_empty(storage_path):
919
+ self._report_error(f"Storage path [{storage_path}] is not allowed")
920
+
921
+ def check_table_name_is_valid(self, table_name: str):
922
+
923
+ if table_name is None:
924
+ self._report_error(f"Table name is null")
925
+
926
+ if not self._VALID_IDENTIFIER.match(table_name):
927
+ self._report_error(f"Table name {table_name} is not a valid identifier")
928
+
929
+ def check_table_name_not_reserved(self, table_name: str):
930
+
931
+ if self._RESERVED_IDENTIFIER.match(table_name):
932
+ self._report_error(f"Table name {table_name} is a reserved identifier")
933
+
934
+ def check_provided_dataset_type(self, dataset: tp.Any, expected_type: type):
935
+
936
+ if dataset is None:
937
+ self._report_error(f"Provided dataset is null")
938
+
939
+ if not isinstance(dataset, expected_type):
940
+
941
+ expected_type_name = self._type_name(expected_type)
942
+ actual_type_name = self._type_name(type(dataset))
943
+
944
+ self._report_error(
945
+ f"Provided dataset is the wrong type" +
946
+ f" (expected {expected_type_name}, got {actual_type_name})")