tracdap-runtime 0.7.0rc1__py3-none-any.whl → 0.8.0b2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. tracdap/rt/_exec/actors.py +5 -4
  2. tracdap/rt/_exec/context.py +166 -74
  3. tracdap/rt/_exec/dev_mode.py +147 -71
  4. tracdap/rt/_exec/engine.py +224 -99
  5. tracdap/rt/_exec/functions.py +122 -80
  6. tracdap/rt/_exec/graph.py +23 -35
  7. tracdap/rt/_exec/graph_builder.py +250 -113
  8. tracdap/rt/_exec/runtime.py +24 -10
  9. tracdap/rt/_exec/server.py +4 -3
  10. tracdap/rt/_impl/config_parser.py +3 -2
  11. tracdap/rt/_impl/data.py +89 -16
  12. tracdap/rt/_impl/grpc/tracdap/metadata/file_pb2.py +3 -1
  13. tracdap/rt/_impl/grpc/tracdap/metadata/file_pb2.pyi +8 -0
  14. tracdap/rt/_impl/grpc/tracdap/metadata/job_pb2.py +64 -62
  15. tracdap/rt/_impl/grpc/tracdap/metadata/job_pb2.pyi +16 -2
  16. tracdap/rt/_impl/grpc/tracdap/metadata/model_pb2.py +27 -25
  17. tracdap/rt/_impl/grpc/tracdap/metadata/model_pb2.pyi +14 -4
  18. tracdap/rt/_impl/grpc/tracdap/metadata/object_id_pb2.py +3 -3
  19. tracdap/rt/_impl/grpc/tracdap/metadata/object_id_pb2.pyi +2 -0
  20. tracdap/rt/_impl/grpc/tracdap/metadata/object_pb2.py +4 -4
  21. tracdap/rt/_impl/grpc/tracdap/metadata/object_pb2.pyi +4 -2
  22. tracdap/rt/_impl/logging.py +195 -0
  23. tracdap/rt/_impl/models.py +11 -8
  24. tracdap/rt/_impl/repos.py +5 -3
  25. tracdap/rt/_impl/schemas.py +2 -2
  26. tracdap/rt/_impl/shim.py +3 -2
  27. tracdap/rt/_impl/static_api.py +53 -33
  28. tracdap/rt/_impl/storage.py +4 -3
  29. tracdap/rt/_impl/util.py +1 -111
  30. tracdap/rt/_impl/validation.py +57 -30
  31. tracdap/rt/_version.py +1 -1
  32. tracdap/rt/api/__init__.py +6 -3
  33. tracdap/rt/api/file_types.py +29 -0
  34. tracdap/rt/api/hook.py +15 -7
  35. tracdap/rt/api/model_api.py +16 -0
  36. tracdap/rt/api/static_api.py +211 -125
  37. tracdap/rt/config/__init__.py +6 -6
  38. tracdap/rt/config/common.py +11 -1
  39. tracdap/rt/config/platform.py +4 -6
  40. tracdap/rt/ext/plugins.py +2 -2
  41. tracdap/rt/launch/launch.py +9 -11
  42. tracdap/rt/metadata/__init__.py +11 -9
  43. tracdap/rt/metadata/file.py +8 -0
  44. tracdap/rt/metadata/job.py +16 -0
  45. tracdap/rt/metadata/model.py +12 -2
  46. tracdap/rt/metadata/object.py +2 -0
  47. tracdap/rt/metadata/object_id.py +2 -0
  48. {tracdap_runtime-0.7.0rc1.dist-info → tracdap_runtime-0.8.0b2.dist-info}/METADATA +15 -15
  49. {tracdap_runtime-0.7.0rc1.dist-info → tracdap_runtime-0.8.0b2.dist-info}/RECORD +52 -50
  50. {tracdap_runtime-0.7.0rc1.dist-info → tracdap_runtime-0.8.0b2.dist-info}/WHEEL +1 -1
  51. {tracdap_runtime-0.7.0rc1.dist-info → tracdap_runtime-0.8.0b2.dist-info}/LICENSE +0 -0
  52. {tracdap_runtime-0.7.0rc1.dist-info → tracdap_runtime-0.8.0b2.dist-info}/top_level.txt +0 -0
@@ -16,6 +16,8 @@
16
16
  import copy as cp
17
17
  import dataclasses as dc
18
18
  import enum
19
+ import io
20
+ import pathlib
19
21
  import typing as tp
20
22
 
21
23
  import tracdap.rt.metadata as _meta
@@ -24,8 +26,10 @@ import tracdap.rt.exceptions as _ex
24
26
  import tracdap.rt._exec.actors as _actors
25
27
  import tracdap.rt._exec.graph_builder as _graph
26
28
  import tracdap.rt._exec.functions as _func
27
- import tracdap.rt._impl.models as _models # noqa
29
+ import tracdap.rt._impl.config_parser as _cfg_p # noqa
28
30
  import tracdap.rt._impl.data as _data # noqa
31
+ import tracdap.rt._impl.logging as _logging # noqa
32
+ import tracdap.rt._impl.models as _models # noqa
29
33
  import tracdap.rt._impl.storage as _storage # noqa
30
34
  import tracdap.rt._impl.util as _util # noqa
31
35
 
@@ -79,18 +83,44 @@ class _EngineContext:
79
83
  pending_nodes, active_nodes, succeeded_nodes, failed_nodes)
80
84
 
81
85
 
86
+ @dc.dataclass
87
+ class _JobResultSpec:
88
+
89
+ save_result: bool = False
90
+ result_dir: tp.Union[str, pathlib.Path] = None
91
+ result_format: str = None
92
+
93
+
82
94
  @dc.dataclass
83
95
  class _JobState:
84
96
 
85
97
  job_id: _meta.TagHeader
86
- actor_id: _actors.ActorId = None
98
+ log_init: dc.InitVar[tp.Optional[_logging.LogProvider]] = None
87
99
 
100
+ actor_id: _actors.ActorId = None
88
101
  monitors: tp.List[_actors.ActorId] = dc.field(default_factory=list)
89
102
 
90
103
  job_config: _cfg.JobConfig = None
91
104
  job_result: _cfg.JobResult = None
92
105
  job_error: Exception = None
93
106
 
107
+ parent_key: str = None
108
+ result_spec: _JobResultSpec = None
109
+
110
+ log_buffer: io.BytesIO = None
111
+ log_provider: _logging.LogProvider = None
112
+ log: _logging.Logger = None
113
+
114
+ def __post_init__(self, log_init):
115
+
116
+ if isinstance(self.log, _logging.LogProvider):
117
+ self.log_provider = log_init
118
+ else:
119
+ self.log_buffer = io.BytesIO()
120
+ self.log_provider = _logging.job_log_provider(self.log_buffer)
121
+
122
+ self.log = self.log_provider.logger_for_class(TracEngine)
123
+
94
124
 
95
125
  class TracEngine(_actors.Actor):
96
126
 
@@ -107,7 +137,7 @@ class TracEngine(_actors.Actor):
107
137
 
108
138
  super().__init__()
109
139
 
110
- self._log = _util.logger_for_object(self)
140
+ self._log = _logging.logger_for_object(self)
111
141
 
112
142
  self._sys_config = sys_config
113
143
  self._models = models
@@ -164,13 +194,17 @@ class TracEngine(_actors.Actor):
164
194
  job_result_format: str):
165
195
 
166
196
  job_key = _util.object_key(job_config.jobId)
197
+ job_state = _JobState(job_config.jobId)
198
+
199
+ job_state.log.info(f"Job submitted: [{job_key}]")
167
200
 
168
201
  result_needed = bool(job_result_dir)
169
- result_spec = _graph.JobResultSpec(result_needed, job_result_dir, job_result_format)
202
+ result_spec = _JobResultSpec(result_needed, job_result_dir, job_result_format)
170
203
 
171
- self._log.info(f"Job submitted: [{job_key}]")
204
+ job_processor = JobProcessor(
205
+ self._sys_config, self._models, self._storage, job_state.log_provider,
206
+ job_key, job_config, graph_spec=None)
172
207
 
173
- job_processor = JobProcessor(self._models, self._storage, job_key, job_config, result_spec, graph_spec=None)
174
208
  job_actor_id = self.actors().spawn(job_processor)
175
209
 
176
210
  job_monitor_success = lambda ctx, key, result: self._notify_callback(key, result, None)
@@ -178,24 +212,36 @@ class TracEngine(_actors.Actor):
178
212
  job_monitor = JobMonitor(job_key, job_monitor_success, job_monitor_failure)
179
213
  job_monitor_id = self.actors().spawn(job_monitor)
180
214
 
181
- job_state = _JobState(job_config.jobId)
182
215
  job_state.actor_id = job_actor_id
183
216
  job_state.monitors.append(job_monitor_id)
184
217
  job_state.job_config = job_config
218
+ job_state.result_spec = result_spec
185
219
 
186
220
  self._jobs[job_key] = job_state
187
221
 
188
222
  @_actors.Message
189
- def submit_child_job(self, child_id: _meta.TagHeader, child_graph: _graph.Graph, monitor_id: _actors.ActorId):
223
+ def submit_child_job(self, parent_key: str, child_id: _meta.TagHeader, child_graph: _graph.Graph, monitor_id: _actors.ActorId):
224
+
225
+ parent_state = self._jobs.get(parent_key)
226
+
227
+ # Ignore duplicate messages from the job processor (can happen in unusual error cases)
228
+ if parent_state is None:
229
+ self._log.warning(f"Ignoring [submit_child_job] message, parent [{parent_key}] has already completed")
230
+ return
190
231
 
191
232
  child_key = _util.object_key(child_id)
192
233
 
193
- child_processor = JobProcessor(self._models, self._storage, child_key, None, None, graph_spec=child_graph) # noqa
234
+ child_processor = JobProcessor(
235
+ self._sys_config, self._models, self._storage, parent_state.log_provider,
236
+ child_key, None, graph_spec=child_graph)
237
+
194
238
  child_actor_id = self.actors().spawn(child_processor)
195
239
 
196
- child_state = _JobState(child_id)
240
+ child_state = _JobState(child_id, parent_state.log_provider)
197
241
  child_state.actor_id = child_actor_id
198
242
  child_state.monitors.append(monitor_id)
243
+ child_state.parent_key = parent_key
244
+ child_state.result_spec = _JobResultSpec(False) # Do not output separate results for child jobs
199
245
 
200
246
  self._jobs[child_key] = child_state
201
247
 
@@ -219,9 +265,9 @@ class TracEngine(_actors.Actor):
219
265
  self._log.warning(f"Ignoring [job_succeeded] message, job [{job_key}] has already completed")
220
266
  return
221
267
 
222
- self._log.info(f"Recording job as successful: {job_key}")
223
-
224
268
  job_state = self._jobs[job_key]
269
+ job_state.log.info(f"Recording job as successful: {job_key}")
270
+
225
271
  job_state.job_result = job_result
226
272
 
227
273
  for monitor_id in job_state.monitors:
@@ -237,11 +283,30 @@ class TracEngine(_actors.Actor):
237
283
  self._log.warning(f"Ignoring [job_failed] message, job [{job_key}] has already completed")
238
284
  return
239
285
 
240
- self._log.error(f"Recording job as failed: {job_key}")
241
-
242
286
  job_state = self._jobs[job_key]
287
+ job_state.log.error(f"Recording job as failed: {job_key}")
288
+
243
289
  job_state.job_error = error
244
290
 
291
+ # Create a failed result so there is something to report
292
+ result_id = job_state.job_config.resultMapping.get("trac_job_result")
293
+
294
+ if result_id is not None:
295
+
296
+ job_state.job_result = _cfg.JobResult(
297
+ jobId=job_state.job_id,
298
+ statusCode=_meta.JobStatusCode.FAILED,
299
+ statusMessage=str(error))
300
+
301
+ result_def = _meta.ResultDefinition()
302
+ result_def.jobId = _util.selector_for(job_state.job_id)
303
+ result_def.statusCode = _meta.JobStatusCode.FAILED
304
+
305
+ result_key = _util.object_key(result_id)
306
+ result_obj = _meta.ObjectDefinition(objectType=_meta.ObjectType.RESULT, result=result_def)
307
+
308
+ job_state.job_result.results[result_key] = result_obj
309
+
245
310
  for monitor_id in job_state.monitors:
246
311
  self.actors().send(monitor_id, "job_failed", error)
247
312
 
@@ -256,6 +321,14 @@ class TracEngine(_actors.Actor):
256
321
 
257
322
  job_state = self._jobs.get(job_key)
258
323
 
324
+ # Record output metadata if required (not needed for local runs or when using API server)
325
+ if job_state.parent_key is None and job_state.result_spec.save_result:
326
+
327
+ if "trac_job_log_file" in job_state.job_config.resultMapping:
328
+ self._save_job_log_file(job_key, job_state)
329
+
330
+ self._save_job_result(job_key, job_state)
331
+
259
332
  # Stop any monitors that were created directly by the engine
260
333
  # (Other actors are responsible for stopping their own monitors)
261
334
  while job_state.monitors:
@@ -265,9 +338,57 @@ class TracEngine(_actors.Actor):
265
338
  self.actors().stop(monitor_id)
266
339
 
267
340
  if job_state.actor_id is not None:
268
- self.actors().stop(job_state.actor_id )
341
+ self.actors().stop(job_state.actor_id)
269
342
  job_state.actor_id = None
270
343
 
344
+ def _save_job_log_file(self, job_key: str, job_state: _JobState):
345
+
346
+ self._log.info(f"Saving job log file for [{job_key}]")
347
+
348
+ # Saving log files could go into a separate actor, perhaps a job monitor along with _save_job_result()
349
+
350
+ file_id = job_state.job_config.resultMapping["trac_job_log_file"]
351
+ storage_id = job_state.job_config.resultMapping["trac_job_log_file:STORAGE"]
352
+
353
+ file_type = _meta.FileType("TXT", "text/plain")
354
+ file_def, storage_def = _graph.GraphBuilder.build_output_file_and_storage(
355
+ "trac_job_log_file", file_type,
356
+ self._sys_config, job_state.job_config)
357
+
358
+ storage_item = storage_def.dataItems[file_def.dataItem].incarnations[0].copies[0]
359
+ storage = self._storage.get_file_storage(storage_item.storageKey)
360
+
361
+ with storage.write_byte_stream(storage_item.storagePath) as stream:
362
+ stream.write(job_state.log_buffer.getbuffer())
363
+ file_def.size = stream.tell()
364
+
365
+ result_id = job_state.job_config.resultMapping["trac_job_result"]
366
+ result_def = job_state.job_result.results[_util.object_key(result_id)].result
367
+ result_def.logFileId = _util.selector_for(file_id)
368
+
369
+ file_obj = _meta.ObjectDefinition(objectType=_meta.ObjectType.FILE, file=file_def)
370
+ storage_obj = _meta.ObjectDefinition(objectType=_meta.ObjectType.STORAGE, storage=storage_def)
371
+
372
+ job_state.job_result.results[_util.object_key(file_id)] = file_obj
373
+ job_state.job_result.results[_util.object_key(storage_id)] = storage_obj
374
+
375
+ def _save_job_result(self, job_key: str, job_state: _JobState):
376
+
377
+ self._log.info(f"Saving job result for [{job_key}]")
378
+
379
+ # It might be better abstract reporting of results, job status etc., perhaps with a job monitor
380
+
381
+ if job_state.result_spec.save_result:
382
+
383
+ result_format = job_state.result_spec.result_format
384
+ result_dir = job_state.result_spec.result_dir
385
+ result_file = f"job_result_{job_key}.{result_format}"
386
+ result_path = pathlib.Path(result_dir).joinpath(result_file)
387
+
388
+ with open(result_path, "xt") as result_stream:
389
+ result_content = _cfg_p.ConfigQuoter.quote(job_state.job_result, result_format)
390
+ result_stream.write(result_content)
391
+
271
392
  def _get_job_info(self, job_key: str, details: bool = False) -> tp.Optional[_cfg.JobResult]:
272
393
 
273
394
  job_state = self._jobs.get(job_key)
@@ -336,19 +457,25 @@ class JobProcessor(_actors.Actor):
336
457
  """
337
458
 
338
459
  def __init__(
339
- self, models: _models.ModelLoader, storage: _storage.StorageManager,
340
- job_key: str, job_config: _cfg.JobConfig, result_spec: _graph.JobResultSpec,
341
- graph_spec: tp.Optional[_graph.Graph]):
460
+ self, sys_config: _cfg.RuntimeConfig,
461
+ models: _models.ModelLoader, storage: _storage.StorageManager, log_provider: _logging.LogProvider,
462
+ job_key: str, job_config: tp.Optional[_cfg.JobConfig], graph_spec: tp.Optional[_graph.Graph]):
342
463
 
343
464
  super().__init__()
465
+
466
+ # Either a job config or a pre-built spec is required
467
+ if not job_config and not graph_spec:
468
+ raise _ex.EUnexpected()
469
+
344
470
  self.job_key = job_key
345
471
  self.job_config = job_config
346
- self.result_spec = result_spec
347
472
  self.graph_spec = graph_spec
473
+ self._sys_config = sys_config
348
474
  self._models = models
349
475
  self._storage = storage
350
- self._resolver = _func.FunctionResolver(models, storage)
351
- self._log = _util.logger_for_object(self)
476
+ self._log_provider = log_provider
477
+ self._resolver = _func.FunctionResolver(models, storage, log_provider)
478
+ self._log = log_provider.logger_for_object(self)
352
479
 
353
480
  def on_start(self):
354
481
 
@@ -358,7 +485,7 @@ class JobProcessor(_actors.Actor):
358
485
  if self.graph_spec is not None:
359
486
  self.actors().send(self.actors().id, "build_graph_succeeded", self.graph_spec)
360
487
  else:
361
- self.actors().spawn(GraphBuilder(self.job_config, self.result_spec))
488
+ self.actors().spawn(GraphBuilder(self._sys_config, self.job_config, self._log_provider))
362
489
 
363
490
  def on_stop(self):
364
491
 
@@ -396,14 +523,14 @@ class JobProcessor(_actors.Actor):
396
523
  # Add all the nodes as pending nodes to start
397
524
  graph.pending_nodes.update(graph.nodes.keys())
398
525
 
399
- self.actors().spawn(FunctionResolver(self._resolver, graph))
526
+ self.actors().spawn(FunctionResolver(self._resolver, self._log_provider, graph))
400
527
  if self.actors().sender != self.actors().id and self.actors().sender != self.actors().parent:
401
528
  self.actors().stop(self.actors().sender)
402
529
 
403
530
  @_actors.Message
404
531
  def resolve_functions_succeeded(self, graph: _EngineContext):
405
532
 
406
- self.actors().spawn(GraphProcessor(graph, self._resolver))
533
+ self.actors().spawn(GraphProcessor(graph, self._resolver, self._log_provider))
407
534
  if self.actors().sender != self.actors().id and self.actors().sender != self.actors().parent:
408
535
  self.actors().stop(self.actors().sender)
409
536
 
@@ -426,11 +553,12 @@ class GraphBuilder(_actors.Actor):
426
553
  GraphBuilder is a worker (actor) to wrap the GraphBuilder logic from graph_builder.py
427
554
  """
428
555
 
429
- def __init__(self, job_config: _cfg.JobConfig, result_spec: _graph.JobResultSpec):
556
+ def __init__(self, sys_config: _cfg.RuntimeConfig, job_config: _cfg.JobConfig, log_provider: _logging.LogProvider):
557
+
430
558
  super().__init__()
559
+ self.sys_config = sys_config
431
560
  self.job_config = job_config
432
- self.result_spec = result_spec
433
- self._log = _util.logger_for_object(self)
561
+ self._log = log_provider.logger_for_object(self)
434
562
 
435
563
  def on_start(self):
436
564
  self.build_graph(self, self.job_config)
@@ -440,8 +568,7 @@ class GraphBuilder(_actors.Actor):
440
568
 
441
569
  self._log.info("Building execution graph")
442
570
 
443
- # TODO: Get sys config, or find a way to pass storage settings
444
- graph_builder = _graph.GraphBuilder(job_config, self.result_spec)
571
+ graph_builder = _graph.GraphBuilder(self.sys_config, job_config)
445
572
  graph_spec = graph_builder.build_job(job_config.job)
446
573
 
447
574
  self.actors().reply("build_graph_succeeded", graph_spec)
@@ -450,14 +577,14 @@ class GraphBuilder(_actors.Actor):
450
577
  class FunctionResolver(_actors.Actor):
451
578
 
452
579
  """
453
- GraphResolver is a worker (actors) to wrap the FunctionResolver logic in functions.py
580
+ FunctionResolver is a worker (actors) to wrap the FunctionResolver logic in functions.py
454
581
  """
455
582
 
456
- def __init__(self, resolver: _func.FunctionResolver, graph: _EngineContext):
583
+ def __init__(self, resolver: _func.FunctionResolver, log_provider: _logging.LogProvider, graph: _EngineContext):
457
584
  super().__init__()
458
585
  self.graph = graph
459
586
  self._resolver = resolver
460
- self._log = _util.logger_for_object(self)
587
+ self._log = log_provider.logger_for_object(self)
461
588
 
462
589
  def on_start(self):
463
590
  self.resolve_functions(self, self.graph)
@@ -486,13 +613,15 @@ class GraphProcessor(_actors.Actor):
486
613
  Once all running nodes are stopped, an error is reported to the parent
487
614
  """
488
615
 
489
- def __init__(self, graph: _EngineContext, resolver: _func.FunctionResolver):
616
+ def __init__(self, graph: _EngineContext, resolver: _func.FunctionResolver, log_provider: _logging.LogProvider):
490
617
  super().__init__()
491
618
  self.graph = graph
492
619
  self.root_id_ = graph.root_id
493
620
  self.processors: tp.Dict[NodeId, _actors.ActorId] = dict()
494
621
  self._resolver = resolver
495
- self._log = _util.logger_for_object(self)
622
+ self._log = log_provider.logger_for_object(self)
623
+ self._graph_logger = GraphLogger(log_provider)
624
+ self._node_logger = NodeLogger(log_provider)
496
625
 
497
626
  def on_start(self):
498
627
 
@@ -513,7 +642,7 @@ class GraphProcessor(_actors.Actor):
513
642
  for node_id, node in graph.nodes.items():
514
643
  if node_id in graph.succeeded_nodes and not self._is_required_node(node, graph):
515
644
  node = processed_graph.nodes.pop(node_id)
516
- NodeLogger.log_node_evict(node)
645
+ self._node_logger.log_node_evict(node)
517
646
  del node
518
647
 
519
648
  pending_nodes = cp.copy(graph.pending_nodes)
@@ -538,13 +667,13 @@ class GraphProcessor(_actors.Actor):
538
667
  # There is scope for a much more sophisticated approach, with prioritized scheduling
539
668
 
540
669
  if isinstance(node.node, _graph.ChildJobNode):
541
- processor = ChildJobNodeProcessor(processed_graph, node)
670
+ processor = ChildJobNodeProcessor(processed_graph, node, self._node_logger)
542
671
  elif isinstance(node.node, _graph.RunModelNode) or isinstance(node.node, _graph.ImportModelNode):
543
- processor = ModelNodeProcessor(processed_graph, node)
672
+ processor = ModelNodeProcessor(processed_graph, node, self._node_logger)
544
673
  elif isinstance(node.node, _graph.LoadDataNode) or isinstance(node.node, _graph.SaveDataNode):
545
- processor = DataNodeProcessor(processed_graph, node)
674
+ processor = DataNodeProcessor(processed_graph, node, self._node_logger)
546
675
  else:
547
- processor = NodeProcessor(processed_graph, node)
676
+ processor = NodeProcessor(processed_graph, node, self._node_logger)
548
677
 
549
678
  # New nodes can be launched with the updated graph
550
679
  # Anything that was pruned is not needed by the new node
@@ -612,7 +741,7 @@ class GraphProcessor(_actors.Actor):
612
741
  new_graph.pending_nodes = cp.copy(new_graph.pending_nodes)
613
742
 
614
743
  for node_id, node in new_nodes.items():
615
- GraphLogger.log_node_add(node)
744
+ self._graph_logger.log_node_add(node)
616
745
  node_func = self._resolver.resolve_node(node)
617
746
  new_node = _EngineNode(node, node_func)
618
747
  new_graph.nodes[node_id] = new_node
@@ -622,7 +751,7 @@ class GraphProcessor(_actors.Actor):
622
751
  engine_node = cp.copy(new_graph.nodes[node_id])
623
752
  engine_node.dependencies = cp.copy(engine_node.dependencies)
624
753
  for dep in deps:
625
- GraphLogger.log_dependency_add(node_id, dep.node_id)
754
+ self._graph_logger.log_dependency_add(node_id, dep.node_id)
626
755
  engine_node.dependencies[dep.node_id] = dep.dependency_type
627
756
  new_graph.nodes[node_id] = engine_node
628
757
 
@@ -782,11 +911,12 @@ class NodeProcessor(_actors.Actor):
782
911
 
783
912
  __NONE_TYPE = type(None)
784
913
 
785
- def __init__(self, graph: _EngineContext, node: _EngineNode):
914
+ def __init__(self, graph: _EngineContext, node: _EngineNode, node_logger: "NodeLogger"):
786
915
  super().__init__()
787
916
  self.graph = graph
788
917
  self.node = node
789
918
  self.node_id = node.node.id
919
+ self.node_logger = node_logger
790
920
 
791
921
 
792
922
  def on_start(self):
@@ -821,7 +951,7 @@ class NodeProcessor(_actors.Actor):
821
951
 
822
952
  try:
823
953
 
824
- NodeLogger.log_node_start(self.node)
954
+ self.node_logger.log_node_start(self.node)
825
955
 
826
956
  # Context contains only node states available when the context is set up
827
957
  ctx = NodeContextImpl(self.graph.nodes)
@@ -834,13 +964,13 @@ class NodeProcessor(_actors.Actor):
834
964
 
835
965
  self._check_result_type(result)
836
966
 
837
- NodeLogger.log_node_succeeded(self.node)
967
+ self.node_logger.log_node_succeeded(self.node)
838
968
 
839
969
  self.actors().send_parent("node_succeeded", self.node_id, result)
840
970
 
841
971
  except Exception as e:
842
972
 
843
- NodeLogger.log_node_failed(self.node, e)
973
+ self.node_logger.log_node_failed(self.node, e)
844
974
 
845
975
  self.actors().send_parent("node_failed", self.node_id, e)
846
976
 
@@ -896,28 +1026,29 @@ class NodeProcessor(_actors.Actor):
896
1026
 
897
1027
  class ModelNodeProcessor(NodeProcessor):
898
1028
 
899
- def __init__(self, graph: _EngineContext, node: _EngineNode):
900
- super().__init__(graph, node)
1029
+ def __init__(self, graph: _EngineContext, node: _EngineNode, node_logger: "NodeLogger"):
1030
+ super().__init__(graph, node, node_logger)
901
1031
 
902
1032
 
903
1033
  class DataNodeProcessor(NodeProcessor):
904
1034
 
905
- def __init__(self, graph: _EngineContext, node: _EngineNode):
906
- super().__init__(graph, node)
1035
+ def __init__(self, graph: _EngineContext, node: _EngineNode, node_logger: "NodeLogger"):
1036
+ super().__init__(graph, node, node_logger)
907
1037
 
908
1038
 
909
1039
  class ChildJobNodeProcessor(NodeProcessor):
910
1040
 
911
- def __init__(self, graph: _EngineContext, node: _EngineNode):
912
- super().__init__(graph, node)
1041
+ def __init__(self, graph: _EngineContext, node: _EngineNode, node_logger: "NodeLogger"):
1042
+ super().__init__(graph, node, node_logger)
913
1043
 
914
1044
  @_actors.Message
915
1045
  def evaluate_node(self):
916
1046
 
917
- NodeLogger.log_node_start(self.node)
1047
+ self.node_logger.log_node_start(self.node)
918
1048
 
919
1049
  job_id = self.node.node.job_id # noqa
920
1050
  job_key = _util.object_key(job_id)
1051
+ parent_key = self.graph.job_key
921
1052
 
922
1053
  node_id = self.actors().id
923
1054
 
@@ -932,21 +1063,21 @@ class ChildJobNodeProcessor(NodeProcessor):
932
1063
 
933
1064
  graph_spec: _graph.Graph = self.node.node.graph # noqa
934
1065
 
935
- self.actors().send(self.graph.engine_id, "submit_child_job", job_id, graph_spec, monitor_id)
1066
+ self.actors().send(self.graph.engine_id, "submit_child_job", parent_key, job_id, graph_spec, monitor_id)
936
1067
 
937
1068
  @_actors.Message
938
1069
  def child_job_succeeded(self, job_result: _cfg.JobResult):
939
1070
 
940
1071
  self._check_result_type(job_result)
941
1072
 
942
- NodeLogger.log_node_succeeded(self.node)
1073
+ self.node_logger.log_node_succeeded(self.node)
943
1074
 
944
1075
  self.actors().send_parent("node_succeeded", self.node_id, job_result)
945
1076
 
946
1077
  @_actors.Message
947
1078
  def child_job_failed(self, job_error: Exception):
948
1079
 
949
- NodeLogger.log_node_failed(self.node, job_error)
1080
+ self.node_logger.log_node_failed(self.node, job_error)
950
1081
 
951
1082
  self.actors().send_parent("node_failed", self.node_id, job_error)
952
1083
 
@@ -957,23 +1088,22 @@ class GraphLogger:
957
1088
  Log the activity of the GraphProcessor
958
1089
  """
959
1090
 
960
- _log = _util.logger_for_class(GraphProcessor)
1091
+ def __init__(self, log_provider: _logging.LogProvider):
1092
+ self._log = log_provider.logger_for_class(GraphProcessor)
961
1093
 
962
- @classmethod
963
- def log_node_add(cls, node: _graph.Node):
1094
+ def log_node_add(self, node: _graph.Node):
964
1095
 
965
1096
  node_name = node.id.name
966
1097
  namespace = node.id.namespace
967
1098
 
968
- cls._log.info(f"ADD {cls._func_type(node)} [{node_name}] / {namespace}")
1099
+ self._log.info(f"ADD {self._func_type(node)} [{node_name}] / {namespace}")
969
1100
 
970
- @classmethod
971
- def log_dependency_add(cls, node_id: NodeId, dep_id: NodeId):
1101
+ def log_dependency_add(self, node_id: NodeId, dep_id: NodeId):
972
1102
 
973
1103
  if node_id.namespace == dep_id.namespace:
974
- cls._log.info(f"ADD DEPENDENCY [{node_id.name}] -> [{dep_id.name}] / {node_id.namespace}")
1104
+ self._log.info(f"ADD DEPENDENCY [{node_id.name}] -> [{dep_id.name}] / {node_id.namespace}")
975
1105
  else:
976
- cls._log.info(f"ADD DEPENDENCY [{node_id.name}] / {node_id.namespace} -> [{dep_id.name}] / {dep_id.namespace}")
1106
+ self._log.info(f"ADD DEPENDENCY [{node_id.name}] / {node_id.namespace} -> [{dep_id.name}] / {dep_id.namespace}")
977
1107
 
978
1108
  @classmethod
979
1109
  def _func_type(cls, node: _graph.Node):
@@ -990,7 +1120,8 @@ class NodeLogger:
990
1120
 
991
1121
  # Separate out the logic for logging nodes, so the NodeProcessor itself stays a bit cleaner
992
1122
 
993
- _log = _util.logger_for_class(NodeProcessor)
1123
+ def __init__(self, log_provider: _logging.LogProvider):
1124
+ self._log = log_provider.logger_for_class(NodeProcessor)
994
1125
 
995
1126
  class LoggingType(enum.Enum):
996
1127
  DEFAULT = 0
@@ -999,81 +1130,75 @@ class NodeLogger:
999
1130
  SIMPLE_MAPPING = 3
1000
1131
  MODEL = 4
1001
1132
 
1002
- @classmethod
1003
- def log_node_start(cls, node: _EngineNode):
1133
+ def log_node_start(self, node: _EngineNode):
1004
1134
 
1005
- logging_type = cls._logging_type(node)
1135
+ logging_type = self._logging_type(node)
1006
1136
  node_name = node.node.id.name
1007
1137
  namespace = node.node.id.namespace
1008
1138
 
1009
- if logging_type == cls.LoggingType.STATIC_VALUE:
1010
- cls._log.info(f"SET {cls._value_type(node)} [{node_name}] / {namespace}")
1139
+ if logging_type == self.LoggingType.STATIC_VALUE:
1140
+ self._log.info(f"SET {self._value_type(node)} [{node_name}] / {namespace}")
1011
1141
 
1012
- elif logging_type in [cls.LoggingType.SIMPLE_MAPPING]:
1013
- cls._log.info(f"MAP {cls._value_type(node)} [{cls._mapping_source(node)}] -> [{node_name}] / {namespace}")
1142
+ elif logging_type in [self.LoggingType.SIMPLE_MAPPING]:
1143
+ self._log.info(f"MAP {self._value_type(node)} [{self._mapping_source(node)}] -> [{node_name}] / {namespace}")
1014
1144
 
1015
1145
  else:
1016
- cls._log.info(f"START {cls._func_type(node)} [{node_name}] / {namespace}")
1146
+ self._log.info(f"START {self._func_type(node)} [{node_name}] / {namespace}")
1017
1147
 
1018
- @classmethod
1019
- def log_node_succeeded(cls, node: _EngineNode):
1148
+ def log_node_succeeded(self, node: _EngineNode):
1020
1149
 
1021
- logging_type = cls._logging_type(node)
1150
+ logging_type = self._logging_type(node)
1022
1151
  node_name = node.node.id.name
1023
1152
  namespace = node.node.id.namespace
1024
1153
 
1025
- if logging_type in [cls.LoggingType.STATIC_VALUE, cls.LoggingType.SIMPLE_MAPPING]:
1154
+ if logging_type in [self.LoggingType.STATIC_VALUE, self.LoggingType.SIMPLE_MAPPING]:
1026
1155
  return
1027
1156
 
1028
- if logging_type == cls.LoggingType.PUSH_POP:
1029
- cls._log_push_pop_node_details(node.node) # noqa
1157
+ if logging_type == self.LoggingType.PUSH_POP:
1158
+ self._log_push_pop_node_details(node.node) # noqa
1030
1159
 
1031
- if logging_type == cls.LoggingType.MODEL:
1032
- cls._log_model_node_details(node.node) # noqa
1160
+ if logging_type == self.LoggingType.MODEL:
1161
+ self._log_model_node_details(node.node) # noqa
1033
1162
 
1034
- cls._log.info(f"DONE {cls._func_type(node)} [{node_name}] / {namespace}")
1163
+ self._log.info(f"DONE {self._func_type(node)} [{node_name}] / {namespace}")
1035
1164
 
1036
- @classmethod
1037
- def log_node_failed(cls, node: _EngineNode, e: Exception):
1165
+ def log_node_failed(self, node: _EngineNode, e: Exception):
1038
1166
 
1039
1167
  node_name = node.node.id.name
1040
1168
  namespace = node.node.id.namespace
1041
1169
 
1042
- cls._log.error(f"FAILED {cls._func_type(node)} [{node_name}] / {namespace}")
1043
- cls._log.exception(e)
1170
+ self._log.error(f"FAILED {self._func_type(node)} [{node_name}] / {namespace}")
1171
+ self._log.exception(e)
1044
1172
 
1045
- @classmethod
1046
- def log_node_evict(cls, node: _EngineNode):
1173
+ def log_node_evict(self, node: _EngineNode):
1047
1174
 
1048
- logging_type = cls._logging_type(node)
1175
+ logging_type = self._logging_type(node)
1049
1176
  node_name = node.node.id.name
1050
1177
  namespace = node.node.id.namespace
1051
1178
 
1052
- if logging_type in [cls.LoggingType.STATIC_VALUE, cls.LoggingType.SIMPLE_MAPPING]:
1179
+ if logging_type in [self.LoggingType.STATIC_VALUE, self.LoggingType.SIMPLE_MAPPING]:
1053
1180
  return
1054
1181
 
1055
- cls._log.info(f"EVICT {cls._func_type(node)} [{node_name}] / {namespace}")
1182
+ self._log.info(f"EVICT {self._func_type(node)} [{node_name}] / {namespace}")
1056
1183
 
1057
- @classmethod
1058
- def _log_push_pop_node_details(cls, node: tp.Union[_graph.ContextPushNode, _graph.ContextPopNode]):
1184
+ def _log_push_pop_node_details(self, node: tp.Union[_graph.ContextPushNode, _graph.ContextPopNode]):
1059
1185
 
1060
1186
  push_or_pop = "PUSH" if isinstance(node, _graph.ContextPushNode) else "POP"
1061
1187
  direction = "->" if isinstance(node, _graph.ContextPushNode) else "<-"
1062
1188
 
1063
1189
  for inner_id, outer_id in node.mapping.items():
1064
- item_type = cls._type_str(inner_id.result_type)
1190
+ item_type = self._type_str(inner_id.result_type)
1065
1191
  msg = f"{push_or_pop} {item_type} [{outer_id.name}] {direction} [{inner_id.name}] / {node.id.namespace}"
1066
- cls._log.info(msg)
1192
+ self._log.info(msg)
1067
1193
 
1068
- @classmethod
1069
- def _log_model_node_details(cls, node: _graph.RunModelNode):
1194
+ def _log_model_node_details(self, node: _graph.RunModelNode):
1070
1195
 
1071
- cls._type_str(_data.DataView)
1196
+ self._type_str(_data.DataView)
1072
1197
 
1073
1198
  for output in node.model_def.outputs:
1074
- result_type = cls._type_str(_data.DataView)
1199
+ result_type = self._type_str(_data.DataView)
1075
1200
  msg = f"RESULT {result_type} [{output}] / {node.bundle_namespace}"
1076
- cls._log.info(msg)
1201
+ self._log.info(msg)
1077
1202
 
1078
1203
  @classmethod
1079
1204
  def _logging_type(cls, node: _EngineNode) -> LoggingType: