tracdap-runtime 0.7.0rc1__py3-none-any.whl → 0.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. tracdap/rt/_impl/core/__init__.py +14 -0
  2. tracdap/rt/_impl/{config_parser.py → core/config_parser.py} +61 -36
  3. tracdap/rt/_impl/{data.py → core/data.py} +136 -32
  4. tracdap/rt/_impl/core/logging.py +195 -0
  5. tracdap/rt/_impl/{models.py → core/models.py} +15 -12
  6. tracdap/rt/_impl/{repos.py → core/repos.py} +12 -3
  7. tracdap/rt/_impl/{schemas.py → core/schemas.py} +5 -5
  8. tracdap/rt/_impl/{shim.py → core/shim.py} +5 -4
  9. tracdap/rt/_impl/{storage.py → core/storage.py} +21 -10
  10. tracdap/rt/_impl/core/struct.py +547 -0
  11. tracdap/rt/_impl/{type_system.py → core/type_system.py} +73 -33
  12. tracdap/rt/_impl/{util.py → core/util.py} +1 -111
  13. tracdap/rt/_impl/{validation.py → core/validation.py} +99 -31
  14. tracdap/rt/_impl/exec/__init__.py +14 -0
  15. tracdap/rt/{_exec → _impl/exec}/actors.py +12 -14
  16. tracdap/rt/{_exec → _impl/exec}/context.py +228 -82
  17. tracdap/rt/{_exec → _impl/exec}/dev_mode.py +176 -89
  18. tracdap/rt/{_exec → _impl/exec}/engine.py +230 -105
  19. tracdap/rt/{_exec → _impl/exec}/functions.py +191 -100
  20. tracdap/rt/{_exec → _impl/exec}/graph.py +24 -36
  21. tracdap/rt/{_exec → _impl/exec}/graph_builder.py +252 -115
  22. tracdap/rt/_impl/grpc/codec.py +1 -1
  23. tracdap/rt/{_exec → _impl/grpc}/server.py +7 -6
  24. tracdap/rt/_impl/grpc/tracdap/api/internal/runtime_pb2.py +3 -3
  25. tracdap/rt/_impl/grpc/tracdap/api/internal/runtime_pb2_grpc.py +1 -1
  26. tracdap/rt/_impl/grpc/tracdap/metadata/common_pb2.py +1 -1
  27. tracdap/rt/_impl/grpc/tracdap/metadata/config_pb2.py +40 -0
  28. tracdap/rt/_impl/grpc/tracdap/metadata/config_pb2.pyi +62 -0
  29. tracdap/rt/_impl/grpc/tracdap/metadata/custom_pb2.py +1 -1
  30. tracdap/rt/_impl/grpc/tracdap/metadata/data_pb2.py +32 -20
  31. tracdap/rt/_impl/grpc/tracdap/metadata/data_pb2.pyi +48 -2
  32. tracdap/rt/_impl/grpc/tracdap/metadata/file_pb2.py +4 -2
  33. tracdap/rt/_impl/grpc/tracdap/metadata/file_pb2.pyi +8 -0
  34. tracdap/rt/_impl/grpc/tracdap/metadata/flow_pb2.py +1 -1
  35. tracdap/rt/_impl/grpc/tracdap/metadata/job_pb2.py +65 -63
  36. tracdap/rt/_impl/grpc/tracdap/metadata/job_pb2.pyi +16 -2
  37. tracdap/rt/_impl/grpc/tracdap/metadata/model_pb2.py +28 -26
  38. tracdap/rt/_impl/grpc/tracdap/metadata/model_pb2.pyi +14 -4
  39. tracdap/rt/_impl/grpc/tracdap/metadata/object_id_pb2.py +4 -4
  40. tracdap/rt/_impl/grpc/tracdap/metadata/object_id_pb2.pyi +6 -0
  41. tracdap/rt/_impl/grpc/tracdap/metadata/object_pb2.py +9 -7
  42. tracdap/rt/_impl/grpc/tracdap/metadata/object_pb2.pyi +12 -4
  43. tracdap/rt/_impl/grpc/tracdap/metadata/resource_pb2.py +18 -5
  44. tracdap/rt/_impl/grpc/tracdap/metadata/resource_pb2.pyi +42 -2
  45. tracdap/rt/_impl/grpc/tracdap/metadata/search_pb2.py +1 -1
  46. tracdap/rt/_impl/grpc/tracdap/metadata/{stoarge_pb2.py → storage_pb2.py} +4 -4
  47. tracdap/rt/_impl/grpc/tracdap/metadata/tag_pb2.py +1 -1
  48. tracdap/rt/_impl/grpc/tracdap/metadata/tag_update_pb2.py +1 -1
  49. tracdap/rt/_impl/grpc/tracdap/metadata/type_pb2.py +1 -1
  50. tracdap/rt/{_exec → _impl}/runtime.py +32 -18
  51. tracdap/rt/_impl/static_api.py +66 -38
  52. tracdap/rt/_plugins/format_csv.py +1 -1
  53. tracdap/rt/_plugins/repo_git.py +56 -11
  54. tracdap/rt/_plugins/storage_sql.py +13 -6
  55. tracdap/rt/_version.py +1 -1
  56. tracdap/rt/api/__init__.py +5 -24
  57. tracdap/rt/api/constants.py +57 -0
  58. tracdap/rt/api/experimental.py +32 -0
  59. tracdap/rt/api/hook.py +26 -7
  60. tracdap/rt/api/model_api.py +16 -0
  61. tracdap/rt/api/static_api.py +265 -127
  62. tracdap/rt/config/__init__.py +11 -11
  63. tracdap/rt/config/common.py +2 -26
  64. tracdap/rt/config/dynamic.py +28 -0
  65. tracdap/rt/config/platform.py +17 -31
  66. tracdap/rt/config/runtime.py +2 -0
  67. tracdap/rt/ext/embed.py +2 -2
  68. tracdap/rt/ext/plugins.py +3 -3
  69. tracdap/rt/launch/launch.py +12 -14
  70. tracdap/rt/metadata/__init__.py +31 -21
  71. tracdap/rt/metadata/config.py +95 -0
  72. tracdap/rt/metadata/data.py +40 -0
  73. tracdap/rt/metadata/file.py +10 -0
  74. tracdap/rt/metadata/job.py +16 -0
  75. tracdap/rt/metadata/model.py +12 -2
  76. tracdap/rt/metadata/object.py +9 -1
  77. tracdap/rt/metadata/object_id.py +6 -0
  78. tracdap/rt/metadata/resource.py +41 -1
  79. {tracdap_runtime-0.7.0rc1.dist-info → tracdap_runtime-0.8.0.dist-info}/METADATA +33 -27
  80. tracdap_runtime-0.8.0.dist-info/RECORD +129 -0
  81. {tracdap_runtime-0.7.0rc1.dist-info → tracdap_runtime-0.8.0.dist-info}/WHEEL +1 -1
  82. tracdap/rt/_exec/__init__.py +0 -0
  83. tracdap_runtime-0.7.0rc1.dist-info/RECORD +0 -121
  84. /tracdap/rt/_impl/{guard_rails.py → core/guard_rails.py} +0 -0
  85. /tracdap/rt/_impl/grpc/tracdap/metadata/{stoarge_pb2.pyi → storage_pb2.pyi} +0 -0
  86. /tracdap/rt/metadata/{stoarge.py → storage.py} +0 -0
  87. {tracdap_runtime-0.7.0rc1.dist-info → tracdap_runtime-0.8.0.dist-info/licenses}/LICENSE +0 -0
  88. {tracdap_runtime-0.7.0rc1.dist-info → tracdap_runtime-0.8.0.dist-info}/top_level.txt +0 -0
@@ -13,27 +13,29 @@
13
13
  # See the License for the specific language governing permissions and
14
14
  # limitations under the License.
15
15
 
16
- from __future__ import annotations
17
-
16
+ import copy
18
17
  import datetime
19
18
  import abc
19
+ import io
20
+ import pathlib
20
21
  import random
21
22
  import dataclasses as dc # noqa
22
23
 
23
24
  import tracdap.rt.api as _api
24
25
  import tracdap.rt.config as _config
25
26
  import tracdap.rt.exceptions as _ex
26
- import tracdap.rt._exec.context as _ctx
27
- import tracdap.rt._exec.graph_builder as _graph
28
- import tracdap.rt._impl.config_parser as _cfg_p # noqa
29
- import tracdap.rt._impl.type_system as _types # noqa
30
- import tracdap.rt._impl.data as _data # noqa
31
- import tracdap.rt._impl.storage as _storage # noqa
32
- import tracdap.rt._impl.models as _models # noqa
33
- import tracdap.rt._impl.util as _util # noqa
27
+ import tracdap.rt._impl.exec.context as _ctx
28
+ import tracdap.rt._impl.exec.graph_builder as _graph
29
+ import tracdap.rt._impl.core.type_system as _types
30
+ import tracdap.rt._impl.core.data as _data
31
+ import tracdap.rt._impl.core.logging as _logging
32
+ import tracdap.rt._impl.core.storage as _storage
33
+ import tracdap.rt._impl.core.struct as _struct
34
+ import tracdap.rt._impl.core.models as _models
35
+ import tracdap.rt._impl.core.util as _util
34
36
 
35
- from tracdap.rt._exec.graph import *
36
- from tracdap.rt._exec.graph import _T
37
+ from tracdap.rt._impl.exec.graph import *
38
+ from tracdap.rt._impl.exec.graph import _T
37
39
 
38
40
 
39
41
  class NodeContext:
@@ -227,11 +229,22 @@ class BuildJobResultFunc(NodeFunction[_config.JobResult]):
227
229
  job_result.jobId = self.node.job_id
228
230
  job_result.statusCode = meta.JobStatusCode.SUCCEEDED
229
231
 
232
+ if self.node.result_id is not None:
233
+
234
+ result_def = meta.ResultDefinition()
235
+ result_def.jobId = _util.selector_for(self.node.job_id)
236
+ result_def.statusCode = meta.JobStatusCode.SUCCEEDED
237
+
238
+ result_key = _util.object_key(self.node.result_id)
239
+ result_obj = meta.ObjectDefinition(objectType=meta.ObjectType.RESULT, result=result_def)
240
+
241
+ job_result.results[result_key] = result_obj
242
+
230
243
  # TODO: Handle individual failed results
231
244
 
232
- for obj_id, node_id in self.node.outputs.objects.items():
245
+ for obj_key, node_id in self.node.outputs.objects.items():
233
246
  obj_def = _ctx_lookup(node_id, ctx)
234
- job_result.results[obj_id] = obj_def
247
+ job_result.results[obj_key] = obj_def
235
248
 
236
249
  for bundle_id in self.node.outputs.bundles:
237
250
  bundle = _ctx_lookup(bundle_id, ctx)
@@ -241,9 +254,9 @@ class BuildJobResultFunc(NodeFunction[_config.JobResult]):
241
254
 
242
255
  runtime_outputs = _ctx_lookup(self.node.runtime_outputs, ctx)
243
256
 
244
- for obj_id, node_id in runtime_outputs.objects.items():
257
+ for obj_key, node_id in runtime_outputs.objects.items():
245
258
  obj_def = _ctx_lookup(node_id, ctx)
246
- job_result.results[obj_id] = obj_def
259
+ job_result.results[obj_key] = obj_def
247
260
 
248
261
  for bundle_id in runtime_outputs.bundles:
249
262
  bundle = _ctx_lookup(bundle_id, ctx)
@@ -252,37 +265,6 @@ class BuildJobResultFunc(NodeFunction[_config.JobResult]):
252
265
  return job_result
253
266
 
254
267
 
255
- class SaveJobResultFunc(NodeFunction[None]):
256
-
257
- def __init__(self, node: SaveJobResultNode):
258
- super().__init__()
259
- self.node = node
260
-
261
- def _execute(self, ctx: NodeContext) -> None:
262
-
263
- job_result = _ctx_lookup(self.node.job_result_id, ctx)
264
-
265
- if not self.node.result_spec.save_result:
266
- return None
267
-
268
- job_result_format = self.node.result_spec.result_format
269
- job_result_str = _cfg_p.ConfigQuoter.quote(job_result, job_result_format)
270
- job_result_bytes = bytes(job_result_str, "utf-8")
271
-
272
- job_key = _util.object_key(job_result.jobId)
273
- job_result_file = f"job_result_{job_key}.{self.node.result_spec.result_format}"
274
- job_result_path = pathlib \
275
- .Path(self.node.result_spec.result_dir) \
276
- .joinpath(job_result_file)
277
-
278
- _util.logger_for_object(self).info(f"Saving job result to [{job_result_path}]")
279
-
280
- with open(job_result_path, "xb") as result_stream:
281
- result_stream.write(job_result_bytes)
282
-
283
- return None
284
-
285
-
286
268
  class DataViewFunc(NodeFunction[_data.DataView]):
287
269
 
288
270
  def __init__(self, node: DataViewNode):
@@ -296,8 +278,20 @@ class DataViewFunc(NodeFunction[_data.DataView]):
296
278
 
297
279
  # Map empty item -> emtpy view (for optional inputs not supplied)
298
280
  if root_item.is_empty():
299
- return _data.DataView.create_empty()
281
+ return _data.DataView.create_empty(root_item.object_type)
282
+
283
+ # Handle file data views
284
+ if root_item.object_type == meta.ObjectType.FILE:
285
+ return _data.DataView.for_file_item(root_item)
300
286
 
287
+ # TODO: Generalize processing across DataView / DataItem types
288
+
289
+ if root_item.schema_type == meta.SchemaType.STRUCT:
290
+ view = _data.DataView.for_trac_schema(self.node.schema)
291
+ view.parts[root_part_key] = [root_item]
292
+ return view
293
+
294
+ # Everything else is a regular data view
301
295
  if self.node.schema is not None and len(self.node.schema.table.fields) > 0:
302
296
  trac_schema = self.node.schema
303
297
  else:
@@ -322,7 +316,11 @@ class DataItemFunc(NodeFunction[_data.DataItem]):
322
316
 
323
317
  # Map empty view -> emtpy item (for optional outputs not supplied)
324
318
  if data_view.is_empty():
325
- return _data.DataItem.create_empty()
319
+ return _data.DataItem.create_empty(data_view.object_type)
320
+
321
+ # Handle file data views
322
+ if data_view.object_type == meta.ObjectType.FILE:
323
+ return data_view.file_item
326
324
 
327
325
  # TODO: Support selecting data item described by self.node
328
326
 
@@ -342,25 +340,24 @@ class DataResultFunc(NodeFunction[ObjectBundle]):
342
340
 
343
341
  def _execute(self, ctx: NodeContext) -> ObjectBundle:
344
342
 
345
- data_item = _ctx_lookup(self.node.data_item_id, ctx)
343
+ data_spec = _ctx_lookup(self.node.data_save_id, ctx)
346
344
 
347
- # Do not record output metadata for optional outputs that are empty
348
- if data_item.is_empty():
349
- return {}
345
+ result_bundle = dict()
350
346
 
351
- data_spec = _ctx_lookup(self.node.data_spec_id, ctx)
347
+ # Do not record output metadata for optional outputs that are empty
348
+ if data_spec.is_empty():
349
+ return result_bundle
352
350
 
353
- # TODO: Check result of save operation
354
- # save_result = _ctx_lookup(self.node.data_save_id, ctx)
351
+ if self.node.data_key is not None:
352
+ result_bundle[self.node.data_key] = meta.ObjectDefinition(objectType=meta.ObjectType.DATA, data=data_spec.data_def)
355
353
 
356
- data_result = meta.ObjectDefinition(objectType=meta.ObjectType.DATA, data=data_spec.data_def)
357
- storage_result = meta.ObjectDefinition(objectType=meta.ObjectType.STORAGE, storage=data_spec.storage_def)
354
+ if self.node.file_key is not None:
355
+ result_bundle[self.node.file_key] = meta.ObjectDefinition(objectType=meta.ObjectType.FILE, file=data_spec.file_def)
358
356
 
359
- bundle = {
360
- self.node.data_key: data_result,
361
- self.node.storage_key: storage_result}
357
+ if self.node.storage_key is not None:
358
+ result_bundle[self.node.storage_key] = meta.ObjectDefinition(objectType=meta.ObjectType.STORAGE, storage=data_spec.storage_def)
362
359
 
363
- return bundle
360
+ return result_bundle
364
361
 
365
362
 
366
363
  class DynamicDataSpecFunc(NodeFunction[_data.DataSpec]):
@@ -443,11 +440,7 @@ class DynamicDataSpecFunc(NodeFunction[_data.DataSpec]):
443
440
 
444
441
  # Dynamic data def will always use an embedded schema (this is no ID for an external schema)
445
442
 
446
- return _data.DataSpec(
447
- data_item,
448
- data_def,
449
- storage_def,
450
- schema_def=None)
443
+ return _data.DataSpec.create_data_spec(data_item, data_def, storage_def, schema_def=None)
451
444
 
452
445
 
453
446
  class _LoadSaveDataFunc(abc.ABC):
@@ -455,6 +448,16 @@ class _LoadSaveDataFunc(abc.ABC):
455
448
  def __init__(self, storage: _storage.StorageManager):
456
449
  self.storage = storage
457
450
 
451
+ @classmethod
452
+ def _choose_data_spec(cls, spec_id, spec, ctx: NodeContext):
453
+
454
+ if spec_id is not None:
455
+ return _ctx_lookup(spec_id, ctx)
456
+ elif spec is not None:
457
+ return spec
458
+ else:
459
+ raise _ex.EUnexpected()
460
+
458
461
  def _choose_copy(self, data_item: str, storage_def: meta.StorageDefinition) -> meta.StorageCopy:
459
462
 
460
463
  # Metadata should be checked for consistency before a job is accepted
@@ -491,54 +494,111 @@ class LoadDataFunc( _LoadSaveDataFunc, NodeFunction[_data.DataItem],):
491
494
 
492
495
  def _execute(self, ctx: NodeContext) -> _data.DataItem:
493
496
 
494
- data_spec = _ctx_lookup(self.node.spec_id, ctx)
497
+ data_spec = self._choose_data_spec(self.node.spec_id, self.node.spec, ctx)
495
498
  data_copy = self._choose_copy(data_spec.data_item, data_spec.storage_def)
496
- data_storage = self.storage.get_data_storage(data_copy.storageKey)
499
+
500
+ if data_spec.object_type == _api.ObjectType.FILE:
501
+ return self._load_file(data_copy)
502
+
503
+ elif data_spec.schema_type == _api.SchemaType.TABLE:
504
+ return self._load_table(data_spec, data_copy)
505
+
506
+ elif data_spec.schema_type == _api.SchemaType.STRUCT:
507
+ return self._load_struct(data_copy)
508
+
509
+ # TODO: Handle dynamic inputs, they should work for any schema type
510
+ elif data_spec.schema_type == _api.SchemaType.SCHEMA_TYPE_NOT_SET:
511
+ return self._load_table(data_spec, data_copy)
512
+
513
+ else:
514
+ raise _ex.EUnexpected()
515
+
516
+ def _load_file(self, data_copy):
517
+
518
+ storage = self.storage.get_file_storage(data_copy.storageKey)
519
+ content = storage.read_bytes(data_copy.storagePath)
520
+
521
+ return _data.DataItem.for_file_content(content)
522
+
523
+ def _load_table(self, data_spec, data_copy):
497
524
 
498
525
  trac_schema = data_spec.schema_def if data_spec.schema_def else data_spec.data_def.schema
499
526
  arrow_schema = _data.DataMapping.trac_to_arrow_schema(trac_schema) if trac_schema else None
500
527
 
501
- # Decode options (metadata values) from the storage definition
502
- options = dict()
503
- for opt_key, opt_value in data_spec.storage_def.storageOptions.items():
504
- options[opt_key] = _types.MetadataCodec.decode_value(opt_value)
528
+ storage_options = dict(
529
+ (opt_key, _types.MetadataCodec.decode_value(opt_value))
530
+ for opt_key, opt_value in data_spec.storage_def.storageOptions.items())
531
+
532
+ storage = self.storage.get_data_storage(data_copy.storageKey)
533
+
534
+ table = storage.read_table(
535
+ data_copy.storagePath, data_copy.storageFormat, arrow_schema,
536
+ storage_options=storage_options)
537
+
538
+ return _data.DataItem.for_table(table, table.schema, trac_schema)
505
539
 
506
- table = data_storage.read_table(
507
- data_copy.storagePath,
508
- data_copy.storageFormat,
509
- arrow_schema,
510
- storage_options=options)
540
+ def _load_struct(self, data_copy):
511
541
 
512
- return _data.DataItem(table.schema, table)
542
+ storage = self.storage.get_file_storage(data_copy.storageKey)
513
543
 
544
+ with storage.read_byte_stream(data_copy.storagePath) as stream:
545
+ with io.TextIOWrapper(stream, "utf-8") as text_stream:
546
+ struct = _struct.StructProcessor.load_struct(text_stream, data_copy.storageFormat)
514
547
 
515
- class SaveDataFunc(_LoadSaveDataFunc, NodeFunction[None]):
548
+ return _data.DataItem.for_struct(struct)
549
+
550
+
551
+ class SaveDataFunc(_LoadSaveDataFunc, NodeFunction[_data.DataSpec]):
516
552
 
517
553
  def __init__(self, node: SaveDataNode, storage: _storage.StorageManager):
518
554
  super().__init__(storage)
519
555
  self.node = node
520
556
 
521
- def _execute(self, ctx: NodeContext):
557
+ def _execute(self, ctx: NodeContext) -> _data.DataSpec:
522
558
 
523
559
  # Item to be saved should exist in the current context
524
560
  data_item = _ctx_lookup(self.node.data_item_id, ctx)
525
561
 
562
+ # Metadata already exists as data_spec but may not contain schema, row count, file size etc.
563
+ data_spec = self._choose_data_spec(self.node.spec_id, self.node.spec, ctx)
564
+ data_copy = self._choose_copy(data_spec.data_item, data_spec.storage_def)
565
+
526
566
  # Do not save empty outputs (optional outputs that were not produced)
527
567
  if data_item.is_empty():
528
- return
568
+ return _data.DataSpec.create_empty_spec(data_item.object_type, data_item.schema_type)
529
569
 
530
- # This function assumes that metadata has already been generated as the data_spec
531
- # i.e. it is already known which incarnation / copy of the data will be created
570
+ if data_item.object_type == _api.ObjectType.FILE:
571
+ return self._save_file(data_item, data_spec, data_copy)
532
572
 
533
- data_spec = _ctx_lookup(self.node.spec_id, ctx)
534
- data_copy = self._choose_copy(data_spec.data_item, data_spec.storage_def)
535
- data_storage = self.storage.get_data_storage(data_copy.storageKey)
573
+ elif data_item.schema_type == _api.SchemaType.TABLE:
574
+ return self._save_table(data_item, data_spec, data_copy)
575
+
576
+ elif data_item.schema_type == _api.SchemaType.STRUCT:
577
+ return self._save_struct(data_item, data_spec, data_copy)
578
+
579
+ else:
580
+ raise _ex.EUnexpected()
581
+
582
+ def _save_file(self, data_item, data_spec, data_copy):
583
+
584
+ if data_item.content is None:
585
+ raise _ex.EUnexpected()
586
+
587
+ storage = self.storage.get_file_storage(data_copy.storageKey)
588
+ storage.write_bytes(data_copy.storagePath, data_item.content)
589
+
590
+ data_spec = copy.deepcopy(data_spec)
591
+ data_spec.file_def.size = len(data_item.content)
592
+
593
+ return data_spec
594
+
595
+ def _save_table(self, data_item, data_spec, data_copy):
536
596
 
537
597
  # Current implementation will always put an Arrow table in the data item
538
598
  # Empty tables are allowed, so explicitly check if table is None
539
599
  # Testing "if not data_item.table" will fail for empty tables
540
600
 
541
- if data_item.table is None:
601
+ if data_item.content is None:
542
602
  raise _ex.EUnexpected()
543
603
 
544
604
  # Decode options (metadata values) from the storage definition
@@ -546,11 +606,42 @@ class SaveDataFunc(_LoadSaveDataFunc, NodeFunction[None]):
546
606
  for opt_key, opt_value in data_spec.storage_def.storageOptions.items():
547
607
  options[opt_key] = _types.MetadataCodec.decode_value(opt_value)
548
608
 
549
- data_storage.write_table(
609
+ storage = self.storage.get_data_storage(data_copy.storageKey)
610
+ storage.write_table(
550
611
  data_copy.storagePath, data_copy.storageFormat,
551
- data_item.table,
612
+ data_item.content,
552
613
  storage_options=options, overwrite=False)
553
614
 
615
+ data_spec = copy.deepcopy(data_spec)
616
+ # TODO: Save row count in metadata
617
+
618
+ if data_spec.data_def.schema is None and data_spec.data_def.schemaId is None:
619
+ data_spec.data_def.schema = _data.DataMapping.arrow_to_trac_schema(data_item.table.schema)
620
+
621
+ return data_spec
622
+
623
+ def _save_struct(self, data_item, data_spec, data_copy):
624
+
625
+ if data_item.content is None:
626
+ raise _ex.EUnexpected()
627
+
628
+ struct_data = data_item.content
629
+ storage_format = data_copy.storageFormat
630
+
631
+ storage = self.storage.get_file_storage(data_copy.storageKey)
632
+
633
+ # Using the text wrapper closes the stream early, which is inefficient in the data layer
634
+ # Supporting text IO directly from the storage API would allow working with text streams more naturally
635
+ with storage.write_byte_stream(data_copy.storagePath) as stream:
636
+ with io.TextIOWrapper(stream, "utf-8") as text_stream:
637
+ _struct.StructProcessor.save_struct(struct_data, text_stream, storage_format)
638
+
639
+ data_spec = copy.deepcopy(data_spec)
640
+
641
+ if data_spec.data_def.schema is None and data_spec.data_def.schemaId is None:
642
+ data_spec.data_def.schema = data_item.trac_schema
643
+
644
+ return data_spec
554
645
 
555
646
  def _model_def_for_import(import_details: meta.ImportModelJob):
556
647
 
@@ -571,8 +662,6 @@ class ImportModelFunc(NodeFunction[meta.ObjectDefinition]):
571
662
  self.node = node
572
663
  self._models = models
573
664
 
574
- self._log = _util.logger_for_object(self)
575
-
576
665
  def _execute(self, ctx: NodeContext) -> meta.ObjectDefinition:
577
666
 
578
667
  model_stub = _model_def_for_import(self.node.import_details)
@@ -589,13 +678,15 @@ class RunModelFunc(NodeFunction[Bundle[_data.DataView]]):
589
678
  self, node: RunModelNode,
590
679
  model_class: _api.TracModel.__class__,
591
680
  checkout_directory: pathlib.Path,
592
- storage_manager: _storage.StorageManager):
681
+ storage_manager: _storage.StorageManager,
682
+ log_provider: _logging.LogProvider):
593
683
 
594
684
  super().__init__()
595
685
  self.node = node
596
686
  self.model_class = model_class
597
687
  self.checkout_directory = checkout_directory
598
688
  self.storage_manager = storage_manager
689
+ self.log_provider = log_provider
599
690
 
600
691
  def _execute(self, ctx: NodeContext) -> Bundle[_data.DataView]:
601
692
 
@@ -622,7 +713,7 @@ class RunModelFunc(NodeFunction[Bundle[_data.DataView]]):
622
713
  for storage_key in self.node.storage_access:
623
714
  if self.storage_manager.has_file_storage(storage_key, external=True):
624
715
  storage_impl = self.storage_manager.get_file_storage(storage_key, external=True)
625
- storage = _ctx.TracFileStorageImpl(storage_key, storage_impl, write_access, self.checkout_directory)
716
+ storage = _ctx.TracFileStorageImpl(storage_key, storage_impl, write_access, self.checkout_directory, self.log_provider)
626
717
  storage_map[storage_key] = storage
627
718
  elif self.storage_manager.has_data_storage(storage_key, external=True):
628
719
  storage_impl = self.storage_manager.get_data_storage(storage_key, external=True)
@@ -630,7 +721,7 @@ class RunModelFunc(NodeFunction[Bundle[_data.DataView]]):
630
721
  if not isinstance(storage_impl, _storage.IDataStorageBase):
631
722
  raise _ex.EStorageConfig(f"External storage for [{storage_key}] is using the legacy storage framework]")
632
723
  converter = _data.DataConverter.noop()
633
- storage = _ctx.TracDataStorageImpl(storage_key, storage_impl, converter, write_access, self.checkout_directory)
724
+ storage = _ctx.TracDataStorageImpl(storage_key, storage_impl, converter, write_access, self.checkout_directory, self.log_provider)
634
725
  storage_map[storage_key] = storage
635
726
  else:
636
727
  raise _ex.EStorageConfig(f"External storage is not available: [{storage_key}]")
@@ -642,12 +733,12 @@ class RunModelFunc(NodeFunction[Bundle[_data.DataView]]):
642
733
  trac_ctx = _ctx.TracDataContextImpl(
643
734
  self.node.model_def, self.model_class,
644
735
  local_ctx, dynamic_outputs, storage_map,
645
- self.checkout_directory)
736
+ self.checkout_directory, self.log_provider)
646
737
  else:
647
738
  trac_ctx = _ctx.TracContextImpl(
648
739
  self.node.model_def, self.model_class,
649
740
  local_ctx, dynamic_outputs,
650
- self.checkout_directory)
741
+ self.checkout_directory, self.log_provider)
651
742
 
652
743
  try:
653
744
  model = self.model_class()
@@ -750,9 +841,10 @@ class FunctionResolver:
750
841
 
751
842
  __ResolveFunc = tp.Callable[['FunctionResolver', Node[_T]], NodeFunction[_T]]
752
843
 
753
- def __init__(self, models: _models.ModelLoader, storage: _storage.StorageManager):
844
+ def __init__(self, models: _models.ModelLoader, storage: _storage.StorageManager, log_provider: _logging.LogProvider):
754
845
  self._models = models
755
846
  self._storage = storage
847
+ self._log_provider = log_provider
756
848
 
757
849
  def resolve_node(self, node: Node[_T]) -> NodeFunction[_T]:
758
850
 
@@ -788,7 +880,7 @@ class FunctionResolver:
788
880
  checkout_directory = self._models.model_load_checkout_directory(node.model_scope, node.model_def)
789
881
  storage_manager = self._storage if node.storage_access else None
790
882
 
791
- return RunModelFunc(node, model_class, checkout_directory, storage_manager)
883
+ return RunModelFunc(node, model_class, checkout_directory, storage_manager, self._log_provider)
792
884
 
793
885
  __basic_node_mapping: tp.Dict[Node.__class__, NodeFunction.__class__] = {
794
886
 
@@ -799,7 +891,6 @@ class FunctionResolver:
799
891
  DataViewNode: DataViewFunc,
800
892
  DataItemNode: DataItemFunc,
801
893
  BuildJobResultNode: BuildJobResultFunc,
802
- SaveJobResultNode: SaveJobResultFunc,
803
894
  DataResultNode: DataResultFunc,
804
895
  StaticValueNode: StaticValueFunc,
805
896
  RuntimeOutputsNode: RuntimeOutputsFunc,
@@ -13,11 +13,10 @@
13
13
  # See the License for the specific language governing permissions and
14
14
  # limitations under the License.
15
15
 
16
- import pathlib
17
16
  import typing as tp
18
17
  import dataclasses as dc
19
18
 
20
- import tracdap.rt._impl.data as _data # noqa
19
+ import tracdap.rt._impl.core.data as _data
21
20
  import tracdap.rt.metadata as meta
22
21
  import tracdap.rt.config as cfg
23
22
 
@@ -182,15 +181,6 @@ class JobOutputs:
182
181
  bundles: tp.List[NodeId[ObjectBundle]] = dc.field(default_factory=list)
183
182
 
184
183
 
185
- # TODO: Where does this go?
186
- @dc.dataclass(frozen=True)
187
- class JobResultSpec:
188
-
189
- save_result: bool = False
190
- result_dir: tp.Union[str, pathlib.Path] = None
191
- result_format: str = None
192
-
193
-
194
184
  # ----------------------------------------------------------------------------------------------------------------------
195
185
  # NODE DEFINITIONS
196
186
  # ----------------------------------------------------------------------------------------------------------------------
@@ -309,20 +299,18 @@ class DataItemNode(MappingNode[_data.DataItem]):
309
299
  @_node_type
310
300
  class DataResultNode(Node[ObjectBundle]):
311
301
 
302
+ # TODO: Remove this node type
303
+ # Either produce metadata in SaveDataNode, or handle DataSpec outputs in result processing nodes
304
+
312
305
  output_name: str
313
- data_item_id: NodeId[_data.DataItem]
314
- data_spec_id: NodeId[_data.DataSpec]
315
- data_save_id: NodeId[type(None)]
306
+ data_save_id: NodeId[_data.DataSpec]
316
307
 
317
- data_key: str
318
- storage_key: str
308
+ data_key: str = None
309
+ file_key: str = None
310
+ storage_key: str = None
319
311
 
320
312
  def _node_dependencies(self) -> tp.Dict[NodeId, DependencyType]:
321
-
322
- return {
323
- self.data_item_id: DependencyType.HARD,
324
- self.data_spec_id: DependencyType.HARD,
325
- self.data_save_id: DependencyType.HARD}
313
+ return {self.data_save_id: DependencyType.HARD}
326
314
 
327
315
 
328
316
  @_node_type
@@ -333,24 +321,33 @@ class LoadDataNode(Node[_data.DataItem]):
333
321
  The latest incarnation of the item will be loaded from any available copy
334
322
  """
335
323
 
336
- spec_id: NodeId[_data.DataSpec]
324
+ spec_id: tp.Optional[NodeId[_data.DataSpec]] = None
325
+ spec: tp.Optional[_data.DataSpec] = None
337
326
 
338
327
  def _node_dependencies(self) -> tp.Dict[NodeId, DependencyType]:
339
- return {self.spec_id: DependencyType.HARD}
328
+ deps = dict()
329
+ if self.spec_id is not None:
330
+ deps[self.spec_id] = DependencyType.HARD
331
+ return deps
340
332
 
341
333
 
342
334
  @_node_type
343
- class SaveDataNode(Node[None]):
335
+ class SaveDataNode(Node[_data.DataSpec]):
344
336
 
345
337
  """
346
338
  Save an individual data item to storage
347
339
  """
348
340
 
349
- spec_id: NodeId[_data.DataSpec]
350
341
  data_item_id: NodeId[_data.DataItem]
351
342
 
343
+ spec_id: tp.Optional[NodeId[_data.DataSpec]] = None
344
+ spec: tp.Optional[_data.DataSpec] = None
345
+
352
346
  def _node_dependencies(self) -> tp.Dict[NodeId, DependencyType]:
353
- return {self.spec_id: DependencyType.HARD, self.data_item_id: DependencyType.HARD}
347
+ deps = {self.data_item_id: DependencyType.HARD}
348
+ if self.spec_id is not None:
349
+ deps[self.spec_id] = DependencyType.HARD
350
+ return deps
354
351
 
355
352
 
356
353
  @_node_type
@@ -395,6 +392,7 @@ class RuntimeOutputsNode(Node[JobOutputs]):
395
392
  @_node_type
396
393
  class BuildJobResultNode(Node[cfg.JobResult]):
397
394
 
395
+ result_id: meta.TagHeader
398
396
  job_id: meta.TagHeader
399
397
 
400
398
  outputs: JobOutputs
@@ -407,16 +405,6 @@ class BuildJobResultNode(Node[cfg.JobResult]):
407
405
  return {node_id: DependencyType.HARD for node_id in dep_ids}
408
406
 
409
407
 
410
- @_node_type
411
- class SaveJobResultNode(Node[None]):
412
-
413
- job_result_id: NodeId[cfg.JobResult]
414
- result_spec: JobResultSpec
415
-
416
- def _node_dependencies(self) -> tp.Dict[NodeId, DependencyType]:
417
- return {self.job_result_id: DependencyType.HARD}
418
-
419
-
420
408
  @_node_type
421
409
  class ChildJobNode(Node[cfg.JobResult]):
422
410