tracdap-runtime 0.8.0rc2__py3-none-any.whl → 0.9.0b2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. tracdap/rt/_impl/core/config_parser.py +29 -3
  2. tracdap/rt/_impl/core/data.py +627 -40
  3. tracdap/rt/_impl/core/repos.py +17 -8
  4. tracdap/rt/_impl/core/storage.py +25 -13
  5. tracdap/rt/_impl/core/struct.py +254 -60
  6. tracdap/rt/_impl/core/util.py +125 -11
  7. tracdap/rt/_impl/exec/context.py +35 -8
  8. tracdap/rt/_impl/exec/dev_mode.py +169 -127
  9. tracdap/rt/_impl/exec/engine.py +203 -140
  10. tracdap/rt/_impl/exec/functions.py +228 -263
  11. tracdap/rt/_impl/exec/graph.py +141 -126
  12. tracdap/rt/_impl/exec/graph_builder.py +428 -449
  13. tracdap/rt/_impl/grpc/codec.py +8 -13
  14. tracdap/rt/_impl/grpc/server.py +7 -7
  15. tracdap/rt/_impl/grpc/tracdap/api/internal/runtime_pb2.py +25 -18
  16. tracdap/rt/_impl/grpc/tracdap/api/internal/runtime_pb2.pyi +27 -9
  17. tracdap/rt/_impl/grpc/tracdap/metadata/common_pb2.py +1 -1
  18. tracdap/rt/_impl/grpc/tracdap/metadata/config_pb2.py +1 -1
  19. tracdap/rt/_impl/grpc/tracdap/metadata/custom_pb2.py +1 -1
  20. tracdap/rt/_impl/grpc/tracdap/metadata/data_pb2.py +37 -35
  21. tracdap/rt/_impl/grpc/tracdap/metadata/data_pb2.pyi +37 -43
  22. tracdap/rt/_impl/grpc/tracdap/metadata/file_pb2.py +1 -1
  23. tracdap/rt/_impl/grpc/tracdap/metadata/flow_pb2.py +1 -1
  24. tracdap/rt/_impl/grpc/tracdap/metadata/job_pb2.py +67 -63
  25. tracdap/rt/_impl/grpc/tracdap/metadata/job_pb2.pyi +11 -2
  26. tracdap/rt/_impl/grpc/tracdap/metadata/model_pb2.py +1 -1
  27. tracdap/rt/_impl/grpc/tracdap/metadata/object_id_pb2.py +1 -1
  28. tracdap/rt/_impl/grpc/tracdap/metadata/object_pb2.py +1 -1
  29. tracdap/rt/_impl/grpc/tracdap/metadata/resource_pb2.py +1 -1
  30. tracdap/rt/_impl/grpc/tracdap/metadata/search_pb2.py +1 -1
  31. tracdap/rt/_impl/grpc/tracdap/metadata/storage_pb2.py +11 -9
  32. tracdap/rt/_impl/grpc/tracdap/metadata/storage_pb2.pyi +11 -2
  33. tracdap/rt/_impl/grpc/tracdap/metadata/tag_pb2.py +1 -1
  34. tracdap/rt/_impl/grpc/tracdap/metadata/tag_update_pb2.py +1 -1
  35. tracdap/rt/_impl/grpc/tracdap/metadata/type_pb2.py +23 -19
  36. tracdap/rt/_impl/grpc/tracdap/metadata/type_pb2.pyi +15 -2
  37. tracdap/rt/_impl/runtime.py +3 -9
  38. tracdap/rt/_impl/static_api.py +5 -6
  39. tracdap/rt/_plugins/format_csv.py +2 -2
  40. tracdap/rt/_plugins/repo_git.py +56 -11
  41. tracdap/rt/_plugins/storage_aws.py +165 -150
  42. tracdap/rt/_plugins/storage_azure.py +17 -11
  43. tracdap/rt/_plugins/storage_gcp.py +35 -18
  44. tracdap/rt/_version.py +1 -1
  45. tracdap/rt/api/model_api.py +45 -0
  46. tracdap/rt/config/__init__.py +7 -9
  47. tracdap/rt/config/common.py +3 -14
  48. tracdap/rt/config/job.py +17 -3
  49. tracdap/rt/config/platform.py +9 -32
  50. tracdap/rt/config/result.py +8 -4
  51. tracdap/rt/config/runtime.py +5 -10
  52. tracdap/rt/config/tenant.py +28 -0
  53. tracdap/rt/launch/cli.py +0 -8
  54. tracdap/rt/launch/launch.py +1 -3
  55. tracdap/rt/metadata/__init__.py +35 -35
  56. tracdap/rt/metadata/data.py +19 -31
  57. tracdap/rt/metadata/job.py +3 -1
  58. tracdap/rt/metadata/storage.py +9 -0
  59. tracdap/rt/metadata/type.py +9 -5
  60. {tracdap_runtime-0.8.0rc2.dist-info → tracdap_runtime-0.9.0b2.dist-info}/METADATA +5 -3
  61. {tracdap_runtime-0.8.0rc2.dist-info → tracdap_runtime-0.9.0b2.dist-info}/RECORD +64 -63
  62. {tracdap_runtime-0.8.0rc2.dist-info → tracdap_runtime-0.9.0b2.dist-info}/WHEEL +1 -1
  63. {tracdap_runtime-0.8.0rc2.dist-info → tracdap_runtime-0.9.0b2.dist-info}/licenses/LICENSE +0 -0
  64. {tracdap_runtime-0.8.0rc2.dist-info → tracdap_runtime-0.9.0b2.dist-info}/top_level.txt +0 -0
@@ -13,16 +13,15 @@
13
13
  # See the License for the specific language governing permissions and
14
14
  # limitations under the License.
15
15
 
16
- import copy
17
- import datetime
18
16
  import abc
17
+ import copy
19
18
  import io
20
19
  import pathlib
21
- import random
22
- import dataclasses as dc # noqa
20
+ import typing as tp
23
21
 
24
22
  import tracdap.rt.api as _api
25
- import tracdap.rt.config as _config
23
+ import tracdap.rt.metadata as _meta
24
+ import tracdap.rt.config as _cfg
26
25
  import tracdap.rt.exceptions as _ex
27
26
  import tracdap.rt._impl.exec.context as _ctx
28
27
  import tracdap.rt._impl.exec.graph_builder as _graph
@@ -66,7 +65,7 @@ class NodeContext:
66
65
  class NodeCallback:
67
66
 
68
67
  @abc.abstractmethod
69
- def send_graph_updates(self, new_nodes: tp.Dict[NodeId, Node], new_deps: tp.Dict[NodeId, tp.List[Dependency]]):
68
+ def send_graph_update(self, update: _graph.GraphUpdate):
70
69
  pass
71
70
 
72
71
 
@@ -120,6 +119,9 @@ class NodeFunction(tp.Generic[_T]):
120
119
  # ----------------------------------------------------------------------------------------------------------------------
121
120
 
122
121
 
122
+ # STATIC VALUES
123
+ # -------------
124
+
123
125
  class NoopFunc(NodeFunction[None]):
124
126
 
125
127
  def __init__(self, node: NoopNode):
@@ -140,6 +142,9 @@ class StaticValueFunc(NodeFunction[_T]):
140
142
  return self.node.value
141
143
 
142
144
 
145
+ # MAPPING OPERATIONS
146
+ # ------------------
147
+
143
148
  class IdentityFunc(NodeFunction[_T]):
144
149
 
145
150
  def __init__(self, node: IdentityNode[_T]):
@@ -150,6 +155,18 @@ class IdentityFunc(NodeFunction[_T]):
150
155
  return _ctx_lookup(self.node.src_id, ctx)
151
156
 
152
157
 
158
+ class KeyedItemFunc(NodeFunction[_T]):
159
+
160
+ def __init__(self, node: KeyedItemNode[_T]):
161
+ super().__init__()
162
+ self.node = node
163
+
164
+ def _execute(self, ctx: NodeContext) -> _T:
165
+ src_node_result = _ctx_lookup(self.node.src_id, ctx)
166
+ src_item = src_node_result.get(self.node.src_item)
167
+ return src_item
168
+
169
+
153
170
  class _ContextPushPopFunc(NodeFunction[Bundle[tp.Any]], abc.ABC):
154
171
 
155
172
  # This approach to context push / pop assumes all the nodes to be mapped are already available
@@ -195,74 +212,27 @@ class ContextPopFunc(_ContextPushPopFunc):
195
212
  super(ContextPopFunc, self).__init__(node, self._POP)
196
213
 
197
214
 
198
- class KeyedItemFunc(NodeFunction[_T]):
215
+ # DATA HANDLING
216
+ # -------------
199
217
 
200
- def __init__(self, node: KeyedItemNode[_T]):
201
- super().__init__()
202
- self.node = node
203
-
204
- def _execute(self, ctx: NodeContext) -> _T:
205
- src_node_result = _ctx_lookup(self.node.src_id, ctx)
206
- src_item = src_node_result.get(self.node.src_item)
207
- return src_item
208
-
209
-
210
- class RuntimeOutputsFunc(NodeFunction[JobOutputs]):
211
-
212
- def __init__(self, node: RuntimeOutputsNode):
213
- super().__init__()
214
- self.node = node
215
-
216
- def _execute(self, ctx: NodeContext) -> JobOutputs:
217
- return self.node.outputs
218
-
219
-
220
- class BuildJobResultFunc(NodeFunction[_config.JobResult]):
218
+ class DataSpecFunc(NodeFunction[_data.DataSpec]):
221
219
 
222
- def __init__(self, node: BuildJobResultNode):
220
+ def __init__(self, node: DataSpecNode):
223
221
  super().__init__()
224
222
  self.node = node
225
223
 
226
- def _execute(self, ctx: NodeContext) -> _config.JobResult:
227
-
228
- job_result = _config.JobResult()
229
- job_result.jobId = self.node.job_id
230
- job_result.statusCode = meta.JobStatusCode.SUCCEEDED
231
-
232
- if self.node.result_id is not None:
233
-
234
- result_def = meta.ResultDefinition()
235
- result_def.jobId = _util.selector_for(self.node.job_id)
236
- result_def.statusCode = meta.JobStatusCode.SUCCEEDED
237
-
238
- result_key = _util.object_key(self.node.result_id)
239
- result_obj = meta.ObjectDefinition(objectType=meta.ObjectType.RESULT, result=result_def)
240
-
241
- job_result.results[result_key] = result_obj
242
-
243
- # TODO: Handle individual failed results
244
-
245
- for obj_key, node_id in self.node.outputs.objects.items():
246
- obj_def = _ctx_lookup(node_id, ctx)
247
- job_result.results[obj_key] = obj_def
248
-
249
- for bundle_id in self.node.outputs.bundles:
250
- bundle = _ctx_lookup(bundle_id, ctx)
251
- job_result.results.update(bundle.items())
252
-
253
- if self.node.runtime_outputs is not None:
254
-
255
- runtime_outputs = _ctx_lookup(self.node.runtime_outputs, ctx)
256
-
257
- for obj_key, node_id in runtime_outputs.objects.items():
258
- obj_def = _ctx_lookup(node_id, ctx)
259
- job_result.results[obj_key] = obj_def
224
+ def _execute(self, ctx: NodeContext) -> _data.DataSpec:
260
225
 
261
- for bundle_id in runtime_outputs.bundles:
262
- bundle = _ctx_lookup(bundle_id, ctx)
263
- job_result.results.update(bundle.items())
226
+ # Get the schema from runtime data
227
+ data_view = _ctx_lookup(self.node.data_view_id, ctx)
228
+ trac_schema = data_view.trac_schema
264
229
 
265
- return job_result
230
+ # Common logic for building a data spec is part of the data module
231
+ return _data.build_data_spec(
232
+ self.node.data_obj_id, self.node.storage_obj_id,
233
+ self.node.context_key, trac_schema,
234
+ self.node.sys_config,
235
+ self.node.prior_data_spec)
266
236
 
267
237
 
268
238
  class DataViewFunc(NodeFunction[_data.DataView]):
@@ -281,12 +251,12 @@ class DataViewFunc(NodeFunction[_data.DataView]):
281
251
  return _data.DataView.create_empty(root_item.object_type)
282
252
 
283
253
  # Handle file data views
284
- if root_item.object_type == meta.ObjectType.FILE:
254
+ if root_item.object_type == _meta.ObjectType.FILE:
285
255
  return _data.DataView.for_file_item(root_item)
286
256
 
287
257
  # TODO: Generalize processing across DataView / DataItem types
288
258
 
289
- if root_item.schema_type == meta.SchemaType.STRUCT:
259
+ if root_item.schema_type == _meta.SchemaType.STRUCT_SCHEMA:
290
260
  view = _data.DataView.for_trac_schema(self.node.schema)
291
261
  view.parts[root_part_key] = [root_item]
292
262
  return view
@@ -294,12 +264,14 @@ class DataViewFunc(NodeFunction[_data.DataView]):
294
264
  # Everything else is a regular data view
295
265
  if self.node.schema is not None and len(self.node.schema.table.fields) > 0:
296
266
  trac_schema = self.node.schema
267
+ data_view = _data.DataView.for_trac_schema(trac_schema)
297
268
  else:
298
269
  arrow_schema = root_item.schema
299
- trac_schema = _data.DataMapping.arrow_to_trac_schema(arrow_schema)
270
+ data_view = _data.DataView.for_arrow_schema(arrow_schema)
300
271
 
301
- data_view = _data.DataView.for_trac_schema(trac_schema)
302
- data_view = _data.DataMapping.add_item_to_view(data_view, root_part_key, root_item)
272
+ data_view = _data.DataMapping \
273
+ .add_item_to_view(data_view, root_part_key, root_item) \
274
+ .with_metadata(root_item.metadata)
303
275
 
304
276
  return data_view
305
277
 
@@ -319,7 +291,7 @@ class DataItemFunc(NodeFunction[_data.DataItem]):
319
291
  return _data.DataItem.create_empty(data_view.object_type)
320
292
 
321
293
  # Handle file data views
322
- if data_view.object_type == meta.ObjectType.FILE:
294
+ if data_view.object_type == _meta.ObjectType.FILE:
323
295
  return data_view.file_item
324
296
 
325
297
  # TODO: Support selecting data item described by self.node
@@ -332,117 +304,6 @@ class DataItemFunc(NodeFunction[_data.DataItem]):
332
304
  return delta
333
305
 
334
306
 
335
- class DataResultFunc(NodeFunction[ObjectBundle]):
336
-
337
- def __init__(self, node: DataResultNode):
338
- super().__init__()
339
- self.node = node
340
-
341
- def _execute(self, ctx: NodeContext) -> ObjectBundle:
342
-
343
- data_spec = _ctx_lookup(self.node.data_save_id, ctx)
344
-
345
- result_bundle = dict()
346
-
347
- # Do not record output metadata for optional outputs that are empty
348
- if data_spec.is_empty():
349
- return result_bundle
350
-
351
- if self.node.data_key is not None:
352
- result_bundle[self.node.data_key] = meta.ObjectDefinition(objectType=meta.ObjectType.DATA, data=data_spec.data_def)
353
-
354
- if self.node.file_key is not None:
355
- result_bundle[self.node.file_key] = meta.ObjectDefinition(objectType=meta.ObjectType.FILE, file=data_spec.file_def)
356
-
357
- if self.node.storage_key is not None:
358
- result_bundle[self.node.storage_key] = meta.ObjectDefinition(objectType=meta.ObjectType.STORAGE, storage=data_spec.storage_def)
359
-
360
- return result_bundle
361
-
362
-
363
- class DynamicDataSpecFunc(NodeFunction[_data.DataSpec]):
364
-
365
- DATA_ITEM_TEMPLATE = "data/{}/{}/{}/snap-{:d}/delta-{:d}"
366
- STORAGE_PATH_TEMPLATE = "data/{}/{}/{}/snap-{:d}/delta-{:d}-x{:0>6x}"
367
-
368
- RANDOM = random.Random()
369
- RANDOM.seed()
370
-
371
- def __init__(self, node: DynamicDataSpecNode, storage: _storage.StorageManager):
372
- super().__init__()
373
- self.node = node
374
- self.storage = storage
375
-
376
- def _execute(self, ctx: NodeContext) -> _data.DataSpec:
377
-
378
- # When data def for an output was not supplied in the job, this function creates a dynamic data spec
379
-
380
- if self.node.prior_data_spec is not None:
381
- raise _ex.ETracInternal("Data updates not supported yet")
382
-
383
- data_view = _ctx_lookup(self.node.data_view_id, ctx)
384
-
385
- data_id = self.node.data_obj_id
386
- storage_id = self.node.storage_obj_id
387
-
388
- # TODO: pass the object timestamp in from somewhere
389
-
390
- # Note that datetime.utcnow() creates a datetime with no zone
391
- # datetime.now(utc) creates a datetime with an explicit UTC zone
392
- # The latter is more precise, also missing zones are rejected by validation
393
- # (lenient validation might infer the zone, this should be limited to front-facing APIs)
394
-
395
- object_timestamp = datetime.datetime.now(datetime.timezone.utc)
396
-
397
- part_key = meta.PartKey("part-root", meta.PartType.PART_ROOT)
398
- snap_index = 0
399
- delta_index = 0
400
-
401
- data_type = data_view.trac_schema.schemaType.name.lower()
402
-
403
- data_item = self.DATA_ITEM_TEMPLATE.format(
404
- data_type, data_id.objectId,
405
- part_key.opaqueKey, snap_index, delta_index)
406
-
407
- delta = meta.DataDefinition.Delta(delta_index, data_item)
408
- snap = meta.DataDefinition.Snap(snap_index, [delta])
409
- part = meta.DataDefinition.Part(part_key, snap)
410
-
411
- data_def = meta.DataDefinition()
412
- data_def.storageId = _util.selector_for_latest(storage_id)
413
- data_def.schema = data_view.trac_schema
414
- data_def.parts[part_key.opaqueKey] = part
415
-
416
- storage_key = self.storage.default_storage_key()
417
- storage_format = self.storage.default_storage_format()
418
- storage_suffix_bytes = random.randint(0, 1 << 24)
419
-
420
- storage_path = self.DATA_ITEM_TEMPLATE.format(
421
- data_type, data_id.objectId,
422
- part_key.opaqueKey, snap_index, delta_index,
423
- storage_suffix_bytes)
424
-
425
- storage_copy = meta.StorageCopy(
426
- storage_key, storage_path, storage_format,
427
- copyStatus=meta.CopyStatus.COPY_AVAILABLE,
428
- copyTimestamp=meta.DatetimeValue(object_timestamp.isoformat()))
429
-
430
- storage_incarnation = meta.StorageIncarnation(
431
- [storage_copy],
432
- incarnationIndex=0,
433
- incarnationTimestamp=meta.DatetimeValue(object_timestamp.isoformat()),
434
- incarnationStatus=meta.IncarnationStatus.INCARNATION_AVAILABLE)
435
-
436
- storage_item = meta.StorageItem([storage_incarnation])
437
-
438
- storage_def = meta.StorageDefinition()
439
- storage_def.dataItems[data_item] = storage_item
440
-
441
- # Dynamic data def will always use an embedded schema (this is no ID for an external schema)
442
-
443
- return _data.DataSpec.create_data_spec(data_item, data_def, storage_def, schema_def=None)
444
-
445
-
446
307
  class _LoadSaveDataFunc(abc.ABC):
447
308
 
448
309
  def __init__(self, storage: _storage.StorageManager):
@@ -458,7 +319,7 @@ class _LoadSaveDataFunc(abc.ABC):
458
319
  else:
459
320
  raise _ex.EUnexpected()
460
321
 
461
- def _choose_copy(self, data_item: str, storage_def: meta.StorageDefinition) -> meta.StorageCopy:
322
+ def _choose_copy(self, data_item: str, storage_def: _meta.StorageDefinition) -> _meta.StorageCopy:
462
323
 
463
324
  # Metadata should be checked for consistency before a job is accepted
464
325
  # An error here indicates a validation gap
@@ -469,15 +330,15 @@ class _LoadSaveDataFunc(abc.ABC):
469
330
  raise _ex.EValidationGap()
470
331
 
471
332
  incarnation = next(filter(
472
- lambda i: i.incarnationStatus == meta.IncarnationStatus.INCARNATION_AVAILABLE,
333
+ lambda i: i.incarnationStatus == _meta.IncarnationStatus.INCARNATION_AVAILABLE,
473
334
  reversed(storage_info.incarnations)), None)
474
335
 
475
336
  if incarnation is None:
476
337
  raise _ex.EValidationGap()
477
338
 
478
339
  copy_ = next(filter(
479
- lambda c: c.copyStatus == meta.CopyStatus.COPY_AVAILABLE
480
- and self.storage.has_data_storage(c.storageKey),
340
+ lambda c: c.copyStatus == _meta.CopyStatus.COPY_AVAILABLE
341
+ and self.storage.has_data_storage(c.storageKey),
481
342
  incarnation.copies), None)
482
343
 
483
344
  if copy_ is None:
@@ -495,15 +356,15 @@ class LoadDataFunc( _LoadSaveDataFunc, NodeFunction[_data.DataItem],):
495
356
  def _execute(self, ctx: NodeContext) -> _data.DataItem:
496
357
 
497
358
  data_spec = self._choose_data_spec(self.node.spec_id, self.node.spec, ctx)
498
- data_copy = self._choose_copy(data_spec.data_item, data_spec.storage_def)
359
+ data_copy = self._choose_copy(data_spec.data_item, data_spec.storage)
499
360
 
500
361
  if data_spec.object_type == _api.ObjectType.FILE:
501
362
  return self._load_file(data_copy)
502
363
 
503
- elif data_spec.schema_type == _api.SchemaType.TABLE:
364
+ elif data_spec.schema_type == _api.SchemaType.TABLE_SCHEMA:
504
365
  return self._load_table(data_spec, data_copy)
505
366
 
506
- elif data_spec.schema_type == _api.SchemaType.STRUCT:
367
+ elif data_spec.schema_type == _api.SchemaType.STRUCT_SCHEMA:
507
368
  return self._load_struct(data_copy)
508
369
 
509
370
  # TODO: Handle dynamic inputs, they should work for any schema type
@@ -522,12 +383,12 @@ class LoadDataFunc( _LoadSaveDataFunc, NodeFunction[_data.DataItem],):
522
383
 
523
384
  def _load_table(self, data_spec, data_copy):
524
385
 
525
- trac_schema = data_spec.schema_def if data_spec.schema_def else data_spec.data_def.schema
386
+ trac_schema = data_spec.schema if data_spec.schema else data_spec.definition.schema
526
387
  arrow_schema = _data.DataMapping.trac_to_arrow_schema(trac_schema) if trac_schema else None
527
388
 
528
389
  storage_options = dict(
529
390
  (opt_key, _types.MetadataCodec.decode_value(opt_value))
530
- for opt_key, opt_value in data_spec.storage_def.storageOptions.items())
391
+ for opt_key, opt_value in data_spec.storage.storageOptions.items())
531
392
 
532
393
  storage = self.storage.get_data_storage(data_copy.storageKey)
533
394
 
@@ -535,7 +396,9 @@ class LoadDataFunc( _LoadSaveDataFunc, NodeFunction[_data.DataItem],):
535
396
  data_copy.storagePath, data_copy.storageFormat, arrow_schema,
536
397
  storage_options=storage_options)
537
398
 
538
- return _data.DataItem.for_table(table, table.schema, trac_schema)
399
+ return _data.DataItem \
400
+ .for_table(table, table.schema, trac_schema) \
401
+ .with_metadata(data_spec.metadata)
539
402
 
540
403
  def _load_struct(self, data_copy):
541
404
 
@@ -561,7 +424,7 @@ class SaveDataFunc(_LoadSaveDataFunc, NodeFunction[_data.DataSpec]):
561
424
 
562
425
  # Metadata already exists as data_spec but may not contain schema, row count, file size etc.
563
426
  data_spec = self._choose_data_spec(self.node.spec_id, self.node.spec, ctx)
564
- data_copy = self._choose_copy(data_spec.data_item, data_spec.storage_def)
427
+ data_copy = self._choose_copy(data_spec.data_item, data_spec.storage)
565
428
 
566
429
  # Do not save empty outputs (optional outputs that were not produced)
567
430
  if data_item.is_empty():
@@ -570,10 +433,10 @@ class SaveDataFunc(_LoadSaveDataFunc, NodeFunction[_data.DataSpec]):
570
433
  if data_item.object_type == _api.ObjectType.FILE:
571
434
  return self._save_file(data_item, data_spec, data_copy)
572
435
 
573
- elif data_item.schema_type == _api.SchemaType.TABLE:
436
+ elif data_item.schema_type == _api.SchemaType.TABLE_SCHEMA:
574
437
  return self._save_table(data_item, data_spec, data_copy)
575
438
 
576
- elif data_item.schema_type == _api.SchemaType.STRUCT:
439
+ elif data_item.schema_type == _api.SchemaType.STRUCT_SCHEMA:
577
440
  return self._save_struct(data_item, data_spec, data_copy)
578
441
 
579
442
  else:
@@ -588,7 +451,7 @@ class SaveDataFunc(_LoadSaveDataFunc, NodeFunction[_data.DataSpec]):
588
451
  storage.write_bytes(data_copy.storagePath, data_item.content)
589
452
 
590
453
  data_spec = copy.deepcopy(data_spec)
591
- data_spec.file_def.size = len(data_item.content)
454
+ data_spec.definition.size = len(data_item.content)
592
455
 
593
456
  return data_spec
594
457
 
@@ -603,7 +466,7 @@ class SaveDataFunc(_LoadSaveDataFunc, NodeFunction[_data.DataSpec]):
603
466
 
604
467
  # Decode options (metadata values) from the storage definition
605
468
  options = dict()
606
- for opt_key, opt_value in data_spec.storage_def.storageOptions.items():
469
+ for opt_key, opt_value in data_spec.storage.storageOptions.items():
607
470
  options[opt_key] = _types.MetadataCodec.decode_value(opt_value)
608
471
 
609
472
  storage = self.storage.get_data_storage(data_copy.storageKey)
@@ -615,8 +478,8 @@ class SaveDataFunc(_LoadSaveDataFunc, NodeFunction[_data.DataSpec]):
615
478
  data_spec = copy.deepcopy(data_spec)
616
479
  # TODO: Save row count in metadata
617
480
 
618
- if data_spec.data_def.schema is None and data_spec.data_def.schemaId is None:
619
- data_spec.data_def.schema = _data.DataMapping.arrow_to_trac_schema(data_item.table.schema)
481
+ if data_spec.definition.schema is None and data_spec.definition.schemaId is None:
482
+ data_spec.definition.schema = _data.DataMapping.arrow_to_trac_schema(data_item.table.schema)
620
483
 
621
484
  return data_spec
622
485
 
@@ -638,38 +501,48 @@ class SaveDataFunc(_LoadSaveDataFunc, NodeFunction[_data.DataSpec]):
638
501
 
639
502
  data_spec = copy.deepcopy(data_spec)
640
503
 
641
- if data_spec.data_def.schema is None and data_spec.data_def.schemaId is None:
642
- data_spec.data_def.schema = data_item.trac_schema
504
+ if data_spec.definition.schema is None and data_spec.definition.schemaId is None:
505
+ data_spec.definition.schema = data_item.trac_schema
643
506
 
644
507
  return data_spec
645
508
 
646
- def _model_def_for_import(import_details: meta.ImportModelJob):
647
-
648
- return meta.ModelDefinition(
649
- language=import_details.language,
650
- repository=import_details.repository,
651
- packageGroup=import_details.packageGroup,
652
- package=import_details.package,
653
- version=import_details.version,
654
- entryPoint=import_details.entryPoint,
655
- path=import_details.path)
656
509
 
510
+ # MODEL EXECUTION
511
+ # ---------------
657
512
 
658
- class ImportModelFunc(NodeFunction[meta.ObjectDefinition]):
513
+ class ImportModelFunc(NodeFunction[GraphOutput]):
659
514
 
660
515
  def __init__(self, node: ImportModelNode, models: _models.ModelLoader):
661
516
  super().__init__()
662
517
  self.node = node
663
518
  self._models = models
664
519
 
665
- def _execute(self, ctx: NodeContext) -> meta.ObjectDefinition:
520
+ def _execute(self, ctx: NodeContext) -> GraphOutput:
666
521
 
667
- model_stub = _model_def_for_import(self.node.import_details)
522
+ model_id = self.node.model_id
668
523
 
669
- model_class = self._models.load_model_class(self.node.model_scope, model_stub)
524
+ model_stub = self._build_model_stub(self.node.import_details)
525
+ model_class = self._models.load_model_class(self.node.import_scope, model_stub)
670
526
  model_def = self._models.scan_model(model_stub, model_class)
527
+ model_obj = _meta.ObjectDefinition(_meta.ObjectType.MODEL, model=model_def)
528
+
529
+ model_attrs = [
530
+ _meta.TagUpdate(_meta.TagOperation.CREATE_OR_REPLACE_ATTR, attr_name, attr_value)
531
+ for attr_name, attr_value in model_def.staticAttributes.items()]
532
+
533
+ return GraphOutput(model_id, model_obj, model_attrs)
534
+
535
+ @staticmethod
536
+ def _build_model_stub(import_details: _meta.ImportModelJob):
671
537
 
672
- return meta.ObjectDefinition(meta.ObjectType.MODEL, model=model_def)
538
+ return _meta.ModelDefinition(
539
+ language=import_details.language,
540
+ repository=import_details.repository,
541
+ packageGroup=import_details.packageGroup,
542
+ package=import_details.package,
543
+ version=import_details.version,
544
+ entryPoint=import_details.entryPoint,
545
+ path=import_details.path)
673
546
 
674
547
 
675
548
  class RunModelFunc(NodeFunction[Bundle[_data.DataView]]):
@@ -709,7 +582,7 @@ class RunModelFunc(NodeFunction[Bundle[_data.DataView]]):
709
582
  storage_map = {}
710
583
 
711
584
  if self.node.storage_access:
712
- write_access = True if self.node.model_def.modelType == meta.ModelType.DATA_EXPORT_MODEL else False
585
+ write_access = True if self.node.model_def.modelType == _meta.ModelType.DATA_EXPORT_MODEL else False
713
586
  for storage_key in self.node.storage_access:
714
587
  if self.storage_manager.has_file_storage(storage_key, external=True):
715
588
  storage_impl = self.storage_manager.get_file_storage(storage_key, external=True)
@@ -729,7 +602,7 @@ class RunModelFunc(NodeFunction[Bundle[_data.DataView]]):
729
602
 
730
603
  # Run the model against the mapped local context
731
604
 
732
- if model_def.modelType in [meta.ModelType.DATA_IMPORT_MODEL, meta.ModelType.DATA_EXPORT_MODEL]:
605
+ if model_def.modelType in [_meta.ModelType.DATA_IMPORT_MODEL, _meta.ModelType.DATA_EXPORT_MODEL]:
733
606
  trac_ctx = _ctx.TracDataContextImpl(
734
607
  self.node.model_def, self.model_class,
735
608
  local_ctx, dynamic_outputs, storage_map,
@@ -741,7 +614,8 @@ class RunModelFunc(NodeFunction[Bundle[_data.DataView]]):
741
614
  self.checkout_directory, self.log_provider)
742
615
 
743
616
  try:
744
- model = self.model_class()
617
+ model = object.__new__(self.model_class)
618
+ model.__init__()
745
619
  model.run_model(trac_ctx)
746
620
  except _ex.ETrac:
747
621
  raise
@@ -750,56 +624,152 @@ class RunModelFunc(NodeFunction[Bundle[_data.DataView]]):
750
624
  msg = f"There was an unhandled error in the model: {str(e)}{details}"
751
625
  raise _ex.EModelExec(msg) from e
752
626
 
753
- # Check required outputs are present and build the results bundle
754
-
755
- model_name = self.model_class.__name__
627
+ # Buidl a result bundle for the defined model outputs
756
628
  results: Bundle[_data.DataView] = dict()
757
- new_nodes = dict()
758
- new_deps = dict()
759
629
 
760
630
  for output_name, output_schema in model_def.outputs.items():
631
+ output: _data.DataView = local_ctx.get(output_name)
632
+ if (output is None or output.is_empty()) and not output_schema.optional:
633
+ raise _ex.ERuntimeValidation(f"Missing required output [{output_name}] from model [{self.model_class.__name__}]")
634
+ results[output_name] = output or _data.DataView.create_empty()
635
+
636
+ # Add dynamic outputs to the model result bundle
637
+ for output_name in dynamic_outputs:
638
+ output: _data.DataView = local_ctx.get(output_name)
639
+ if output is None or output.is_empty():
640
+ raise _ex.ERuntimeValidation(f"No data provided for [{output_name}] from model [{self.model_class.__name__}]")
641
+ results[output_name] = output
642
+
643
+ # Send a graph update to include the dynamic outputs in the job result
644
+ if any(dynamic_outputs):
645
+ builder = _graph.GraphBuilder.dynamic(self.node.graph_context)
646
+ update = builder.build_dynamic_outputs(self.node.id, dynamic_outputs)
647
+ self.node_callback.send_graph_update(update)
648
+
649
+ return results
650
+
761
651
 
762
- result: _data.DataView = local_ctx.get(output_name)
652
+ # RESULTS PROCESSING
653
+ # ------------------
763
654
 
764
- if result is None or result.is_empty():
655
+ class JobResultFunc(NodeFunction[_cfg.JobResult]):
765
656
 
766
- if not output_schema.optional:
767
- raise _ex.ERuntimeValidation(f"Missing required output [{output_name}] from model [{model_name}]")
657
+ def __init__(self, node: JobResultNode):
658
+ super().__init__()
659
+ self.node = node
768
660
 
769
- # Create a placeholder for optional outputs that were not emitted
770
- elif result is None:
771
- result = _data.DataView.create_empty()
661
+ def _execute(self, ctx: NodeContext) -> _cfg.JobResult:
772
662
 
773
- results[output_name] = result
663
+ result_def = _meta.ResultDefinition()
664
+ result_def.jobId = _util.selector_for(self.node.job_id)
774
665
 
775
- if dynamic_outputs:
666
+ job_result = _cfg.JobResult()
667
+ job_result.jobId = self.node.job_id
668
+ job_result.resultId = self.node.result_id
669
+ job_result.result = result_def
776
670
 
777
- for output_name in dynamic_outputs:
671
+ self._process_named_outputs(self.node.named_outputs, ctx, job_result)
672
+ self._process_unnamed_outputs(self.node.unnamed_outputs, ctx, job_result)
778
673
 
779
- result: _data.DataView = local_ctx.get(output_name)
674
+ # TODO: Handle individual failed results
780
675
 
781
- if result is None or result.is_empty():
782
- raise _ex.ERuntimeValidation(f"No data provided for [{output_name}] from model [{model_name}]")
676
+ result_def.statusCode = _meta.JobStatusCode.SUCCEEDED
783
677
 
784
- results[output_name] = result
678
+ return job_result
785
679
 
786
- result_node_id = NodeId.of(output_name, self.node.id.namespace, _data.DataView)
787
- result_node = BundleItemNode(result_node_id, self.node.id, output_name)
680
+ def _process_named_outputs(self, named_outputs, ctx: NodeContext, job_result: _cfg.JobResult):
788
681
 
789
- new_nodes[result_node_id] = result_node
682
+ for output_name, output_id in named_outputs.items():
790
683
 
791
- output_section = _graph.GraphBuilder.build_runtime_outputs(dynamic_outputs, self.node.id.namespace)
792
- new_nodes.update(output_section.nodes)
684
+ output = _ctx_lookup(output_id, ctx)
793
685
 
794
- ctx_id = NodeId.of("trac_job_result", self.node.id.namespace, result_type=None)
795
- new_deps[ctx_id] = list(_graph.Dependency(nid, _graph.DependencyType.HARD) for nid in output_section.outputs)
686
+ if output_id.result_type == GraphOutput:
687
+ self._process_graph_output(output_name, output, job_result)
796
688
 
797
- self.node_callback.send_graph_updates(new_nodes, new_deps)
689
+ elif output_id.result_type == _data.DataSpec:
690
+ self._process_data_spec(output_name, output, job_result)
798
691
 
799
- return results
692
+ else:
693
+ raise _ex.EUnexpected()
694
+
695
+ def _process_unnamed_outputs(self, unnamed_outputs, ctx: NodeContext, job_result: _cfg.JobResult):
696
+
697
+ for output_id in unnamed_outputs:
698
+
699
+ output = _ctx_lookup(output_id, ctx)
800
700
 
701
+ if output_id.result_type == GraphOutput:
702
+ self._process_graph_output(None, output, job_result)
801
703
 
802
- class ChildJobFunction(NodeFunction[None]):
704
+ elif output_id.result_type == _data.DataSpec:
705
+ self._process_data_spec(None, output, job_result)
706
+
707
+ else:
708
+ raise _ex.EUnexpected()
709
+
710
+ @staticmethod
711
+ def _process_graph_output(output_name: tp.Optional[str], output: GraphOutput, job_result: _cfg.JobResult):
712
+
713
+ output_key = _util.object_key(output.objectId)
714
+
715
+ job_result.objectIds.append(output.objectId)
716
+ job_result.objects[output_key] = output.definition
717
+
718
+ if output.attrs is not None:
719
+ job_result.attrs[output_key] = _cfg.JobResultAttrs(output.attrs)
720
+
721
+ if output_name is not None:
722
+ job_result.result.outputs[output_name] = _util.selector_for(output.objectId)
723
+
724
+ @staticmethod
725
+ def _process_data_spec(output_name: tp.Optional[str], data_spec: _data.DataSpec, job_result: _cfg.JobResult):
726
+
727
+ # Do not record results for optional outputs that were not produced
728
+ if data_spec.is_empty():
729
+ return
730
+
731
+ output_id = data_spec.primary_id
732
+ output_key = _util.object_key(output_id)
733
+ output_def = data_spec.definition
734
+
735
+ if data_spec.object_type == _meta.ObjectType.DATA:
736
+ output_obj = _meta.ObjectDefinition(data_spec.object_type, data=output_def)
737
+ elif data_spec.object_type == _meta.ObjectType.FILE:
738
+ output_obj = _meta.ObjectDefinition(data_spec.object_type, file=output_def)
739
+ else:
740
+ raise _ex.EUnexpected()
741
+
742
+ storage_id = data_spec.storage_id
743
+ storage_key = _util.object_key(storage_id)
744
+ storage_def = data_spec.storage
745
+ storage_obj = _meta.ObjectDefinition(objectType=_meta.ObjectType.STORAGE, storage=storage_def)
746
+
747
+ job_result.objectIds.append(output_id)
748
+ job_result.objectIds.append(storage_id)
749
+ job_result.objects[output_key] = output_obj
750
+ job_result.objects[storage_key] = storage_obj
751
+
752
+ # Currently, jobs do not ever produce external schemas
753
+
754
+ if output_name is not None:
755
+ job_result.result.outputs[output_name] = _util.selector_for(output_id)
756
+
757
+
758
+ class DynamicOutputsFunc(NodeFunction[DynamicOutputsNode]):
759
+
760
+ def __init__(self, node: DynamicOutputsNode):
761
+ super().__init__()
762
+ self.node = node
763
+
764
+ def _execute(self, ctx: NodeContext) -> DynamicOutputsNode:
765
+ return self.node
766
+
767
+
768
+ # MISC NODE TYPES
769
+ # ---------------
770
+
771
+
772
+ class ChildJobFunc(NodeFunction[None]):
803
773
 
804
774
  def __init__(self, node: ChildJobNode):
805
775
  super().__init__()
@@ -866,9 +836,6 @@ class FunctionResolver:
866
836
  def resolve_save_data(self, node: SaveDataNode):
867
837
  return SaveDataFunc(node, self._storage)
868
838
 
869
- def resolve_dynamic_data_spec(self, node: DynamicDataSpecNode):
870
- return DynamicDataSpecFunc(node, self._storage)
871
-
872
839
  def resolve_import_model_node(self, node: ImportModelNode):
873
840
  return ImportModelFunc(node, self._models)
874
841
 
@@ -884,27 +851,25 @@ class FunctionResolver:
884
851
 
885
852
  __basic_node_mapping: tp.Dict[Node.__class__, NodeFunction.__class__] = {
886
853
 
887
- ContextPushNode: ContextPushFunc,
888
- ContextPopNode: ContextPopFunc,
854
+ NoopNode: NoopFunc,
855
+ StaticValueNode: StaticValueFunc,
889
856
  IdentityNode: IdentityFunc,
890
857
  KeyedItemNode: KeyedItemFunc,
858
+ ContextPushNode: ContextPushFunc,
859
+ ContextPopNode: ContextPopFunc,
860
+ DataSpecNode: DataSpecFunc,
891
861
  DataViewNode: DataViewFunc,
892
862
  DataItemNode: DataItemFunc,
893
- BuildJobResultNode: BuildJobResultFunc,
894
- DataResultNode: DataResultFunc,
895
- StaticValueNode: StaticValueFunc,
896
- RuntimeOutputsNode: RuntimeOutputsFunc,
897
- ChildJobNode: ChildJobFunction,
863
+ JobResultNode: JobResultFunc,
864
+ DynamicOutputsNode: DynamicOutputsFunc,
865
+ ChildJobNode: ChildJobFunc,
898
866
  BundleItemNode: NoopFunc,
899
- NoopNode: NoopFunc,
900
- RunModelResultNode: NoopFunc
901
867
  }
902
868
 
903
869
  __node_mapping: tp.Dict[Node.__class__, __ResolveFunc] = {
904
870
 
905
871
  LoadDataNode: resolve_load_data,
906
872
  SaveDataNode: resolve_save_data,
907
- DynamicDataSpecNode: resolve_dynamic_data_spec,
908
873
  RunModelNode: resolve_run_model_node,
909
874
  ImportModelNode: resolve_import_model_node
910
875
  }