tracdap-runtime 0.6.5__py3-none-any.whl → 0.6.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tracdap/rt/_exec/context.py +272 -105
- tracdap/rt/_exec/dev_mode.py +231 -138
- tracdap/rt/_exec/engine.py +217 -59
- tracdap/rt/_exec/functions.py +25 -1
- tracdap/rt/_exec/graph.py +9 -0
- tracdap/rt/_exec/graph_builder.py +295 -198
- tracdap/rt/_exec/runtime.py +7 -5
- tracdap/rt/_impl/config_parser.py +11 -4
- tracdap/rt/_impl/data.py +278 -167
- tracdap/rt/_impl/ext/__init__.py +13 -0
- tracdap/rt/_impl/ext/sql.py +116 -0
- tracdap/rt/_impl/ext/storage.py +57 -0
- tracdap/rt/_impl/grpc/tracdap/metadata/job_pb2.py +62 -54
- tracdap/rt/_impl/grpc/tracdap/metadata/job_pb2.pyi +37 -2
- tracdap/rt/_impl/static_api.py +24 -11
- tracdap/rt/_impl/storage.py +2 -2
- tracdap/rt/_impl/util.py +10 -0
- tracdap/rt/_impl/validation.py +66 -13
- tracdap/rt/_plugins/storage_sql.py +417 -0
- tracdap/rt/_plugins/storage_sql_dialects.py +117 -0
- tracdap/rt/_version.py +1 -1
- tracdap/rt/api/experimental.py +79 -32
- tracdap/rt/api/hook.py +10 -0
- tracdap/rt/metadata/__init__.py +4 -0
- tracdap/rt/metadata/job.py +45 -0
- {tracdap_runtime-0.6.5.dist-info → tracdap_runtime-0.6.6.dist-info}/METADATA +3 -1
- {tracdap_runtime-0.6.5.dist-info → tracdap_runtime-0.6.6.dist-info}/RECORD +30 -25
- {tracdap_runtime-0.6.5.dist-info → tracdap_runtime-0.6.6.dist-info}/WHEEL +1 -1
- {tracdap_runtime-0.6.5.dist-info → tracdap_runtime-0.6.6.dist-info}/LICENSE +0 -0
- {tracdap_runtime-0.6.5.dist-info → tracdap_runtime-0.6.6.dist-info}/top_level.txt +0 -0
tracdap/rt/_exec/engine.py
CHANGED
@@ -39,8 +39,9 @@ class _EngineNode:
|
|
39
39
|
"""
|
40
40
|
|
41
41
|
node: _graph.Node
|
42
|
-
dependencies: tp.Dict[NodeId, _graph.DependencyType]
|
43
42
|
function: tp.Optional[_func.NodeFunction] = None
|
43
|
+
|
44
|
+
dependencies: tp.Dict[NodeId, _graph.DependencyType] = dc.field(default_factory=dict)
|
44
45
|
complete: bool = False
|
45
46
|
result: tp.Optional[tp.Any] = None
|
46
47
|
error: tp.Optional[str] = None
|
@@ -57,21 +58,35 @@ class _EngineContext:
|
|
57
58
|
Represents the state of an execution graph being processed by the TRAC engine
|
58
59
|
"""
|
59
60
|
|
61
|
+
engine_id: _actors.ActorId
|
62
|
+
job_key: str
|
63
|
+
root_id: NodeId
|
64
|
+
|
60
65
|
nodes: tp.Dict[NodeId, _EngineNode]
|
61
66
|
pending_nodes: tp.Set[NodeId] = dc.field(default_factory=set)
|
62
67
|
active_nodes: tp.Set[NodeId] = dc.field(default_factory=set)
|
63
68
|
succeeded_nodes: tp.Set[NodeId] = dc.field(default_factory=set)
|
64
69
|
failed_nodes: tp.Set[NodeId] = dc.field(default_factory=set)
|
65
70
|
|
71
|
+
def with_updates(
|
72
|
+
self, nodes,
|
73
|
+
pending_nodes, active_nodes,
|
74
|
+
succeeded_nodes, failed_nodes) -> "_EngineContext":
|
75
|
+
|
76
|
+
return _EngineContext(
|
77
|
+
self.engine_id, self.job_key, self.root_id, nodes,
|
78
|
+
pending_nodes, active_nodes, succeeded_nodes, failed_nodes)
|
79
|
+
|
66
80
|
|
67
81
|
@dc.dataclass
|
68
82
|
class _JobState:
|
69
83
|
|
70
84
|
job_id: _meta.TagHeader
|
71
|
-
job_config: _cfg.JobConfig
|
72
|
-
|
73
85
|
actor_id: _actors.ActorId = None
|
74
86
|
|
87
|
+
monitors: tp.List[_actors.ActorId] = dc.field(default_factory=list)
|
88
|
+
|
89
|
+
job_config: _cfg.JobConfig = None
|
75
90
|
job_result: _cfg.JobResult = None
|
76
91
|
job_error: Exception = None
|
77
92
|
|
@@ -154,14 +169,35 @@ class TracEngine(_actors.Actor):
|
|
154
169
|
|
155
170
|
self._log.info(f"Job submitted: [{job_key}]")
|
156
171
|
|
157
|
-
job_processor = JobProcessor(job_key, job_config, result_spec,
|
172
|
+
job_processor = JobProcessor(self._models, self._storage, job_key, job_config, result_spec, graph_spec=None)
|
158
173
|
job_actor_id = self.actors().spawn(job_processor)
|
159
174
|
|
160
|
-
|
175
|
+
job_monitor_success = lambda ctx, key, result: self._notify_callback(key, result, None)
|
176
|
+
job_monitor_failure = lambda ctx, key, error: self._notify_callback(key, None, error)
|
177
|
+
job_monitor = JobMonitor(job_key, job_monitor_success, job_monitor_failure)
|
178
|
+
job_monitor_id = self.actors().spawn(job_monitor)
|
179
|
+
|
180
|
+
job_state = _JobState(job_config.jobId)
|
161
181
|
job_state.actor_id = job_actor_id
|
182
|
+
job_state.monitors.append(job_monitor_id)
|
183
|
+
job_state.job_config = job_config
|
162
184
|
|
163
185
|
self._jobs[job_key] = job_state
|
164
186
|
|
187
|
+
@_actors.Message
|
188
|
+
def submit_child_job(self, child_id: _meta.TagHeader, child_graph: _graph.Graph, monitor_id: _actors.ActorId):
|
189
|
+
|
190
|
+
child_key = _util.object_key(child_id)
|
191
|
+
|
192
|
+
child_processor = JobProcessor(self._models, self._storage, child_key, None, None, graph_spec=child_graph) # noqa
|
193
|
+
child_actor_id = self.actors().spawn(child_processor)
|
194
|
+
|
195
|
+
child_state = _JobState(child_id)
|
196
|
+
child_state.actor_id = child_actor_id
|
197
|
+
child_state.monitors.append(monitor_id)
|
198
|
+
|
199
|
+
self._jobs[child_key] = child_state
|
200
|
+
|
165
201
|
@_actors.Message
|
166
202
|
def get_job_list(self):
|
167
203
|
|
@@ -184,11 +220,13 @@ class TracEngine(_actors.Actor):
|
|
184
220
|
|
185
221
|
self._log.info(f"Recording job as successful: {job_key}")
|
186
222
|
|
187
|
-
self._jobs[job_key]
|
188
|
-
|
223
|
+
job_state = self._jobs[job_key]
|
224
|
+
job_state.job_result = job_result
|
225
|
+
|
226
|
+
for monitor_id in job_state.monitors:
|
227
|
+
self.actors().send(monitor_id, "job_succeeded", job_result)
|
189
228
|
|
190
|
-
|
191
|
-
self._notify_callback(job_key, job_result, None)
|
229
|
+
self._finalize_job(job_key)
|
192
230
|
|
193
231
|
@_actors.Message
|
194
232
|
def job_failed(self, job_key: str, error: Exception):
|
@@ -200,11 +238,13 @@ class TracEngine(_actors.Actor):
|
|
200
238
|
|
201
239
|
self._log.error(f"Recording job as failed: {job_key}")
|
202
240
|
|
203
|
-
self._jobs[job_key]
|
204
|
-
|
241
|
+
job_state = self._jobs[job_key]
|
242
|
+
job_state.job_error = error
|
243
|
+
|
244
|
+
for monitor_id in job_state.monitors:
|
245
|
+
self.actors().send(monitor_id, "job_failed", error)
|
205
246
|
|
206
|
-
|
207
|
-
self._notify_callback(job_key, None, error)
|
247
|
+
self._finalize_job(job_key)
|
208
248
|
|
209
249
|
def _finalize_job(self, job_key: str):
|
210
250
|
|
@@ -214,10 +254,17 @@ class TracEngine(_actors.Actor):
|
|
214
254
|
# For now each instance of the runtime only processes one job so no need to worry
|
215
255
|
|
216
256
|
job_state = self._jobs.get(job_key)
|
217
|
-
job_actor_id = job_state.actor_id if job_state is not None else None
|
218
257
|
|
219
|
-
|
220
|
-
|
258
|
+
# Stop any monitors that were created directly by the engine
|
259
|
+
# (Other actors are responsible for stopping their own monitors)
|
260
|
+
while job_state.monitors:
|
261
|
+
monitor_id = job_state.monitors.pop()
|
262
|
+
monitor_parent = monitor_id[:monitor_id.rfind('/')]
|
263
|
+
if self.actors().id == monitor_parent:
|
264
|
+
self.actors().stop(monitor_id)
|
265
|
+
|
266
|
+
if job_state.actor_id is not None:
|
267
|
+
self.actors().stop(job_state.actor_id )
|
221
268
|
job_state.actor_id = None
|
222
269
|
|
223
270
|
def _get_job_info(self, job_key: str, details: bool = False) -> tp.Optional[_cfg.JobResult]:
|
@@ -251,6 +298,35 @@ class TracEngine(_actors.Actor):
|
|
251
298
|
return job_result
|
252
299
|
|
253
300
|
|
301
|
+
class JobMonitor(_actors.Actor):
|
302
|
+
|
303
|
+
def __init__(
|
304
|
+
self, job_key: str,
|
305
|
+
success_func: tp.Callable[[_actors.ActorContext, str, _cfg.JobResult], None],
|
306
|
+
failure_func: tp.Callable[[_actors.ActorContext, str, Exception], None]):
|
307
|
+
|
308
|
+
super().__init__()
|
309
|
+
self._job_key = job_key
|
310
|
+
self._success_func = success_func
|
311
|
+
self._failure_func = failure_func
|
312
|
+
self._signal_sent = False
|
313
|
+
|
314
|
+
@_actors.Message
|
315
|
+
def job_succeeded(self, job_result: _cfg.JobResult):
|
316
|
+
self._success_func(self.actors(), self._job_key, job_result)
|
317
|
+
self._signal_sent = True
|
318
|
+
|
319
|
+
@_actors.Message
|
320
|
+
def job_failed(self, error: Exception):
|
321
|
+
self._failure_func(self.actors(), self._job_key, error)
|
322
|
+
self._signal_sent = True
|
323
|
+
|
324
|
+
def on_stop(self):
|
325
|
+
if not self._signal_sent:
|
326
|
+
error = _ex.ETracInternal(f"No result was received for job [{self._job_key}]")
|
327
|
+
self._failure_func(self.actors(), self._job_key, error)
|
328
|
+
|
329
|
+
|
254
330
|
class JobProcessor(_actors.Actor):
|
255
331
|
|
256
332
|
"""
|
@@ -259,26 +335,32 @@ class JobProcessor(_actors.Actor):
|
|
259
335
|
"""
|
260
336
|
|
261
337
|
def __init__(
|
262
|
-
self,
|
263
|
-
result_spec: _graph.JobResultSpec,
|
264
|
-
|
265
|
-
storage: _storage.StorageManager):
|
338
|
+
self, models: _models.ModelLoader, storage: _storage.StorageManager,
|
339
|
+
job_key: str, job_config: _cfg.JobConfig, result_spec: _graph.JobResultSpec,
|
340
|
+
graph_spec: tp.Optional[_graph.Graph]):
|
266
341
|
|
267
342
|
super().__init__()
|
268
343
|
self.job_key = job_key
|
269
344
|
self.job_config = job_config
|
270
345
|
self.result_spec = result_spec
|
346
|
+
self.graph_spec = graph_spec
|
271
347
|
self._models = models
|
272
348
|
self._storage = storage
|
273
349
|
self._resolver = _func.FunctionResolver(models, storage)
|
274
350
|
self._log = _util.logger_for_object(self)
|
275
351
|
|
276
352
|
def on_start(self):
|
353
|
+
|
277
354
|
self._log.info(f"Starting job [{self.job_key}]")
|
278
355
|
self._models.create_scope(self.job_key)
|
279
|
-
|
356
|
+
|
357
|
+
if self.graph_spec is not None:
|
358
|
+
self.actors().send(self.actors().id, "build_graph_succeeded", self.graph_spec)
|
359
|
+
else:
|
360
|
+
self.actors().spawn(GraphBuilder(self.job_config, self.result_spec))
|
280
361
|
|
281
362
|
def on_stop(self):
|
363
|
+
|
282
364
|
self._log.info(f"Cleaning up job [{self.job_key}]")
|
283
365
|
self._models.destroy_scope(self.job_key)
|
284
366
|
|
@@ -303,9 +385,26 @@ class JobProcessor(_actors.Actor):
|
|
303
385
|
return super().on_signal(signal)
|
304
386
|
|
305
387
|
@_actors.Message
|
306
|
-
def
|
307
|
-
|
308
|
-
|
388
|
+
def build_graph_succeeded(self, graph_spec: _graph.Graph):
|
389
|
+
|
390
|
+
# Build a new engine context graph from the graph spec
|
391
|
+
engine_id = self.actors().parent
|
392
|
+
nodes = dict((node_id, _EngineNode(node)) for node_id, node in graph_spec.nodes.items())
|
393
|
+
graph = _EngineContext(engine_id, self.job_key, graph_spec.root_id, nodes)
|
394
|
+
|
395
|
+
# Add all the nodes as pending nodes to start
|
396
|
+
graph.pending_nodes.update(graph.nodes.keys())
|
397
|
+
|
398
|
+
self.actors().spawn(FunctionResolver(self._resolver, graph))
|
399
|
+
if self.actors().sender != self.actors().id and self.actors().sender != self.actors().parent:
|
400
|
+
self.actors().stop(self.actors().sender)
|
401
|
+
|
402
|
+
@_actors.Message
|
403
|
+
def resolve_functions_succeeded(self, graph: _EngineContext):
|
404
|
+
|
405
|
+
self.actors().spawn(GraphProcessor(graph, self._resolver))
|
406
|
+
if self.actors().sender != self.actors().id and self.actors().sender != self.actors().parent:
|
407
|
+
self.actors().stop(self.actors().sender)
|
309
408
|
|
310
409
|
@_actors.Message
|
311
410
|
def job_succeeded(self, job_result: _cfg.JobResult):
|
@@ -323,44 +422,54 @@ class JobProcessor(_actors.Actor):
|
|
323
422
|
class GraphBuilder(_actors.Actor):
|
324
423
|
|
325
424
|
"""
|
326
|
-
GraphBuilder is a worker (
|
327
|
-
The logic for graph building is provided in graph_builder.py
|
425
|
+
GraphBuilder is a worker (actor) to wrap the GraphBuilder logic from graph_builder.py
|
328
426
|
"""
|
329
427
|
|
330
|
-
def __init__(
|
331
|
-
self, job_config: _cfg.JobConfig,
|
332
|
-
result_spec: _graph.JobResultSpec,
|
333
|
-
resolver: _func.FunctionResolver):
|
334
|
-
|
428
|
+
def __init__(self, job_config: _cfg.JobConfig, result_spec: _graph.JobResultSpec):
|
335
429
|
super().__init__()
|
336
430
|
self.job_config = job_config
|
337
431
|
self.result_spec = result_spec
|
338
|
-
self.graph: tp.Optional[_EngineContext] = None
|
339
|
-
|
340
|
-
self._resolver = resolver
|
341
432
|
self._log = _util.logger_for_object(self)
|
342
433
|
|
343
434
|
def on_start(self):
|
435
|
+
self.build_graph(self, self.job_config)
|
436
|
+
|
437
|
+
@_actors.Message
|
438
|
+
def build_graph(self, job_config: _cfg.JobConfig):
|
344
439
|
|
345
440
|
self._log.info("Building execution graph")
|
346
441
|
|
347
442
|
# TODO: Get sys config, or find a way to pass storage settings
|
348
|
-
|
349
|
-
|
350
|
-
graph = _EngineContext(graph_nodes, pending_nodes=set(graph_nodes.keys()))
|
443
|
+
graph_builder = _graph.GraphBuilder(job_config, self.result_spec)
|
444
|
+
graph_spec = graph_builder.build_job(job_config.job)
|
351
445
|
|
352
|
-
self.
|
446
|
+
self.actors().reply("build_graph_succeeded", graph_spec)
|
353
447
|
|
354
|
-
for node_id, node in graph.nodes.items():
|
355
|
-
node.function = self._resolver.resolve_node(node.node)
|
356
448
|
|
449
|
+
class FunctionResolver(_actors.Actor):
|
450
|
+
|
451
|
+
"""
|
452
|
+
GraphResolver is a worker (actors) to wrap the FunctionResolver logic in functions.py
|
453
|
+
"""
|
454
|
+
|
455
|
+
def __init__(self, resolver: _func.FunctionResolver, graph: _EngineContext):
|
456
|
+
super().__init__()
|
357
457
|
self.graph = graph
|
358
|
-
self.
|
458
|
+
self._resolver = resolver
|
459
|
+
self._log = _util.logger_for_object(self)
|
460
|
+
|
461
|
+
def on_start(self):
|
462
|
+
self.resolve_functions(self, self.graph)
|
359
463
|
|
360
464
|
@_actors.Message
|
361
|
-
def
|
465
|
+
def resolve_functions(self, graph: _EngineContext):
|
362
466
|
|
363
|
-
self.
|
467
|
+
self._log.info("Resolving graph nodes to executable code")
|
468
|
+
|
469
|
+
for node_id, node in graph.nodes.items():
|
470
|
+
node.function = self._resolver.resolve_node(node.node)
|
471
|
+
|
472
|
+
self.actors().reply("resolve_functions_succeeded", graph)
|
364
473
|
|
365
474
|
|
366
475
|
class GraphProcessor(_actors.Actor):
|
@@ -376,10 +485,10 @@ class GraphProcessor(_actors.Actor):
|
|
376
485
|
Once all running nodes are stopped, an error is reported to the parent
|
377
486
|
"""
|
378
487
|
|
379
|
-
def __init__(self, graph: _EngineContext,
|
488
|
+
def __init__(self, graph: _EngineContext, resolver: _func.FunctionResolver):
|
380
489
|
super().__init__()
|
381
490
|
self.graph = graph
|
382
|
-
self.
|
491
|
+
self.root_id_ = graph.root_id
|
383
492
|
self.processors: tp.Dict[NodeId, _actors.ActorId] = dict()
|
384
493
|
self._resolver = resolver
|
385
494
|
self._log = _util.logger_for_object(self)
|
@@ -427,12 +536,14 @@ class GraphProcessor(_actors.Actor):
|
|
427
536
|
# Model and data nodes map to different thread pools in the actors engine
|
428
537
|
# There is scope for a much more sophisticated approach, with prioritized scheduling
|
429
538
|
|
430
|
-
if isinstance(node.node, _graph.
|
431
|
-
processor =
|
539
|
+
if isinstance(node.node, _graph.ChildJobNode):
|
540
|
+
processor = ChildJobNodeProcessor(processed_graph, node)
|
541
|
+
elif isinstance(node.node, _graph.RunModelNode) or isinstance(node.node, _graph.ImportModelNode):
|
542
|
+
processor = ModelNodeProcessor(processed_graph, node)
|
432
543
|
elif isinstance(node.node, _graph.LoadDataNode) or isinstance(node.node, _graph.SaveDataNode):
|
433
|
-
processor = DataNodeProcessor(processed_graph,
|
544
|
+
processor = DataNodeProcessor(processed_graph, node)
|
434
545
|
else:
|
435
|
-
processor = NodeProcessor(processed_graph,
|
546
|
+
processor = NodeProcessor(processed_graph, node)
|
436
547
|
|
437
548
|
# New nodes can be launched with the updated graph
|
438
549
|
# Anything that was pruned is not needed by the new node
|
@@ -502,7 +613,7 @@ class GraphProcessor(_actors.Actor):
|
|
502
613
|
for node_id, node in new_nodes.items():
|
503
614
|
GraphLogger.log_node_add(node)
|
504
615
|
node_func = self._resolver.resolve_node(node)
|
505
|
-
new_node = _EngineNode(node,
|
616
|
+
new_node = _EngineNode(node, node_func)
|
506
617
|
new_graph.nodes[node_id] = new_node
|
507
618
|
new_graph.pending_nodes.add(node_id)
|
508
619
|
|
@@ -625,9 +736,10 @@ class GraphProcessor(_actors.Actor):
|
|
625
736
|
for node_id in list(filter(lambda n: n.namespace == context_pop, nodes)):
|
626
737
|
nodes.pop(node_id)
|
627
738
|
|
628
|
-
graph =
|
739
|
+
self.graph = self.graph.with_updates(
|
740
|
+
nodes, pending_nodes, active_nodes,
|
741
|
+
succeeded_nodes, failed_nodes)
|
629
742
|
|
630
|
-
self.graph = graph
|
631
743
|
self.check_job_status()
|
632
744
|
|
633
745
|
def check_job_status(self, do_submit=True):
|
@@ -657,7 +769,7 @@ class GraphProcessor(_actors.Actor):
|
|
657
769
|
self.actors().send_parent("job_failed", _ex.EModelExec("Job suffered multiple errors", errors))
|
658
770
|
|
659
771
|
else:
|
660
|
-
job_result = self.graph.nodes[self.root_id].result
|
772
|
+
job_result = self.graph.nodes[self.graph.root_id].result
|
661
773
|
self.actors().send_parent("job_succeeded", job_result)
|
662
774
|
|
663
775
|
|
@@ -669,11 +781,12 @@ class NodeProcessor(_actors.Actor):
|
|
669
781
|
|
670
782
|
__NONE_TYPE = type(None)
|
671
783
|
|
672
|
-
def __init__(self, graph: _EngineContext,
|
784
|
+
def __init__(self, graph: _EngineContext, node: _EngineNode):
|
673
785
|
super().__init__()
|
674
786
|
self.graph = graph
|
675
|
-
self.node_id = node_id
|
676
787
|
self.node = node
|
788
|
+
self.node_id = node.node.id
|
789
|
+
|
677
790
|
|
678
791
|
def on_start(self):
|
679
792
|
|
@@ -782,14 +895,59 @@ class NodeProcessor(_actors.Actor):
|
|
782
895
|
|
783
896
|
class ModelNodeProcessor(NodeProcessor):
|
784
897
|
|
785
|
-
def __init__(self, graph: _EngineContext,
|
786
|
-
super().__init__(graph,
|
898
|
+
def __init__(self, graph: _EngineContext, node: _EngineNode):
|
899
|
+
super().__init__(graph, node)
|
787
900
|
|
788
901
|
|
789
902
|
class DataNodeProcessor(NodeProcessor):
|
790
903
|
|
791
|
-
def __init__(self, graph: _EngineContext,
|
792
|
-
super().__init__(graph,
|
904
|
+
def __init__(self, graph: _EngineContext, node: _EngineNode):
|
905
|
+
super().__init__(graph, node)
|
906
|
+
|
907
|
+
|
908
|
+
class ChildJobNodeProcessor(NodeProcessor):
|
909
|
+
|
910
|
+
def __init__(self, graph: _EngineContext, node: _EngineNode):
|
911
|
+
super().__init__(graph, node)
|
912
|
+
|
913
|
+
@_actors.Message
|
914
|
+
def evaluate_node(self):
|
915
|
+
|
916
|
+
NodeLogger.log_node_start(self.node)
|
917
|
+
|
918
|
+
job_id = self.node.node.job_id # noqa
|
919
|
+
job_key = _util.object_key(job_id)
|
920
|
+
|
921
|
+
node_id = self.actors().id
|
922
|
+
|
923
|
+
def success_callback(ctx, _, result):
|
924
|
+
ctx.send(node_id, "child_job_succeeded", result)
|
925
|
+
|
926
|
+
def failure_callback(ctx, _, error):
|
927
|
+
ctx.send(node_id, "child_job_failed", error)
|
928
|
+
|
929
|
+
monitor = JobMonitor(job_key, success_callback, failure_callback)
|
930
|
+
monitor_id = self.actors().spawn(monitor)
|
931
|
+
|
932
|
+
graph_spec: _graph.Graph = self.node.node.graph # noqa
|
933
|
+
|
934
|
+
self.actors().send(self.graph.engine_id, "submit_child_job", job_id, graph_spec, monitor_id)
|
935
|
+
|
936
|
+
@_actors.Message
|
937
|
+
def child_job_succeeded(self, job_result: _cfg.JobResult):
|
938
|
+
|
939
|
+
self._check_result_type(job_result)
|
940
|
+
|
941
|
+
NodeLogger.log_node_succeeded(self.node)
|
942
|
+
|
943
|
+
self.actors().send_parent("node_succeeded", self.node_id, job_result)
|
944
|
+
|
945
|
+
@_actors.Message
|
946
|
+
def child_job_failed(self, job_error: Exception):
|
947
|
+
|
948
|
+
NodeLogger.log_node_failed(self.node, job_error)
|
949
|
+
|
950
|
+
self.actors().send_parent("node_failed", self.node_id, job_error)
|
793
951
|
|
794
952
|
|
795
953
|
class GraphLogger:
|
tracdap/rt/_exec/functions.py
CHANGED
@@ -623,6 +623,17 @@ class RunModelFunc(NodeFunction[Bundle[_data.DataView]]):
|
|
623
623
|
storage_impl = self.storage_manager.get_file_storage(storage_key, external=True)
|
624
624
|
storage = _ctx.TracFileStorageImpl(storage_key, storage_impl, write_access, self.checkout_directory)
|
625
625
|
storage_map[storage_key] = storage
|
626
|
+
elif self.storage_manager.has_data_storage(storage_key, external=True):
|
627
|
+
storage_impl = self.storage_manager.get_data_storage(storage_key, external=True)
|
628
|
+
# This is a work-around until the storage extension API can be updated / unified
|
629
|
+
if not isinstance(storage_impl, _storage.IDataStorageBase):
|
630
|
+
raise _ex.EStorageConfig(f"External storage for [{storage_key}] is using the legacy storage framework]")
|
631
|
+
converter = _data.DataConverter.noop()
|
632
|
+
storage = _ctx.TracDataStorageImpl(storage_key, storage_impl, converter, write_access, self.checkout_directory)
|
633
|
+
storage_map[storage_key] = storage
|
634
|
+
else:
|
635
|
+
raise _ex.EStorageConfig(f"External storage is not available: [{storage_key}]")
|
636
|
+
|
626
637
|
|
627
638
|
# Run the model against the mapped local context
|
628
639
|
|
@@ -688,7 +699,7 @@ class RunModelFunc(NodeFunction[Bundle[_data.DataView]]):
|
|
688
699
|
output_section = _graph.GraphBuilder.build_runtime_outputs(dynamic_outputs, self.node.id.namespace)
|
689
700
|
new_nodes.update(output_section.nodes)
|
690
701
|
|
691
|
-
ctx_id = NodeId.of("
|
702
|
+
ctx_id = NodeId.of("trac_job_result", self.node.id.namespace, result_type=None)
|
692
703
|
new_deps[ctx_id] = list(_graph.Dependency(nid, _graph.DependencyType.HARD) for nid in output_section.outputs)
|
693
704
|
|
694
705
|
self.node_callback.send_graph_updates(new_nodes, new_deps)
|
@@ -696,6 +707,18 @@ class RunModelFunc(NodeFunction[Bundle[_data.DataView]]):
|
|
696
707
|
return results
|
697
708
|
|
698
709
|
|
710
|
+
class ChildJobFunction(NodeFunction[None]):
|
711
|
+
|
712
|
+
def __init__(self, node: ChildJobNode):
|
713
|
+
super().__init__()
|
714
|
+
self.node = node
|
715
|
+
|
716
|
+
def _execute(self, ctx: NodeContext):
|
717
|
+
# This node should never execute, the engine intercepts child job nodes and provides special handling
|
718
|
+
raise _ex.ETracInternal("Child job was not processed correctly (this is a bug)")
|
719
|
+
|
720
|
+
|
721
|
+
|
699
722
|
# ----------------------------------------------------------------------------------------------------------------------
|
700
723
|
# FUNCTION RESOLUTION
|
701
724
|
# ----------------------------------------------------------------------------------------------------------------------
|
@@ -779,6 +802,7 @@ class FunctionResolver:
|
|
779
802
|
DataResultNode: DataResultFunc,
|
780
803
|
StaticValueNode: StaticValueFunc,
|
781
804
|
RuntimeOutputsNode: RuntimeOutputsFunc,
|
805
|
+
ChildJobNode: ChildJobFunction,
|
782
806
|
BundleItemNode: NoopFunc,
|
783
807
|
NoopNode: NoopFunc,
|
784
808
|
RunModelResultNode: NoopFunc
|
tracdap/rt/_exec/graph.py
CHANGED
@@ -414,3 +414,12 @@ class SaveJobResultNode(Node[None]):
|
|
414
414
|
|
415
415
|
def _node_dependencies(self) -> tp.Dict[NodeId, DependencyType]:
|
416
416
|
return {self.job_result_id: DependencyType.HARD}
|
417
|
+
|
418
|
+
|
419
|
+
@_node_type
|
420
|
+
class ChildJobNode(Node[cfg.JobResult]):
|
421
|
+
|
422
|
+
job_id: meta.TagHeader
|
423
|
+
job_def: meta.JobDefinition
|
424
|
+
|
425
|
+
graph: Graph
|