tracdap-runtime 0.6.5__py3-none-any.whl → 0.7.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tracdap/rt/__init__.py +6 -5
- tracdap/rt/_exec/actors.py +6 -5
- tracdap/rt/_exec/context.py +278 -110
- tracdap/rt/_exec/dev_mode.py +237 -143
- tracdap/rt/_exec/engine.py +223 -64
- tracdap/rt/_exec/functions.py +31 -6
- tracdap/rt/_exec/graph.py +15 -5
- tracdap/rt/_exec/graph_builder.py +301 -203
- tracdap/rt/_exec/runtime.py +13 -10
- tracdap/rt/_exec/server.py +6 -5
- tracdap/rt/_impl/__init__.py +6 -5
- tracdap/rt/_impl/config_parser.py +17 -9
- tracdap/rt/_impl/data.py +284 -172
- tracdap/rt/_impl/ext/__init__.py +14 -0
- tracdap/rt/_impl/ext/sql.py +117 -0
- tracdap/rt/_impl/ext/storage.py +58 -0
- tracdap/rt/_impl/grpc/__init__.py +6 -5
- tracdap/rt/_impl/grpc/codec.py +6 -5
- tracdap/rt/_impl/grpc/tracdap/metadata/job_pb2.py +62 -54
- tracdap/rt/_impl/grpc/tracdap/metadata/job_pb2.pyi +37 -2
- tracdap/rt/_impl/guard_rails.py +6 -5
- tracdap/rt/_impl/models.py +6 -5
- tracdap/rt/_impl/repos.py +6 -5
- tracdap/rt/_impl/schemas.py +6 -5
- tracdap/rt/_impl/shim.py +6 -5
- tracdap/rt/_impl/static_api.py +30 -16
- tracdap/rt/_impl/storage.py +8 -7
- tracdap/rt/_impl/type_system.py +6 -5
- tracdap/rt/_impl/util.py +16 -5
- tracdap/rt/_impl/validation.py +72 -18
- tracdap/rt/_plugins/__init__.py +6 -5
- tracdap/rt/_plugins/_helpers.py +6 -5
- tracdap/rt/_plugins/config_local.py +6 -5
- tracdap/rt/_plugins/format_arrow.py +6 -5
- tracdap/rt/_plugins/format_csv.py +6 -5
- tracdap/rt/_plugins/format_parquet.py +6 -5
- tracdap/rt/_plugins/repo_git.py +6 -5
- tracdap/rt/_plugins/repo_local.py +6 -5
- tracdap/rt/_plugins/repo_pypi.py +6 -5
- tracdap/rt/_plugins/storage_aws.py +6 -5
- tracdap/rt/_plugins/storage_azure.py +6 -5
- tracdap/rt/_plugins/storage_gcp.py +6 -5
- tracdap/rt/_plugins/storage_local.py +6 -5
- tracdap/rt/_plugins/storage_sql.py +418 -0
- tracdap/rt/_plugins/storage_sql_dialects.py +118 -0
- tracdap/rt/_version.py +7 -6
- tracdap/rt/api/__init__.py +23 -5
- tracdap/rt/api/experimental.py +85 -37
- tracdap/rt/api/hook.py +16 -5
- tracdap/rt/api/model_api.py +110 -90
- tracdap/rt/api/static_api.py +142 -100
- tracdap/rt/config/common.py +26 -27
- tracdap/rt/config/job.py +5 -6
- tracdap/rt/config/platform.py +41 -42
- tracdap/rt/config/result.py +5 -6
- tracdap/rt/config/runtime.py +6 -7
- tracdap/rt/exceptions.py +13 -7
- tracdap/rt/ext/__init__.py +6 -5
- tracdap/rt/ext/config.py +6 -5
- tracdap/rt/ext/embed.py +6 -5
- tracdap/rt/ext/plugins.py +6 -5
- tracdap/rt/ext/repos.py +6 -5
- tracdap/rt/ext/storage.py +6 -5
- tracdap/rt/launch/__init__.py +10 -5
- tracdap/rt/launch/__main__.py +6 -5
- tracdap/rt/launch/cli.py +6 -5
- tracdap/rt/launch/launch.py +38 -15
- tracdap/rt/metadata/__init__.py +4 -0
- tracdap/rt/metadata/common.py +2 -3
- tracdap/rt/metadata/custom.py +3 -4
- tracdap/rt/metadata/data.py +30 -31
- tracdap/rt/metadata/file.py +6 -7
- tracdap/rt/metadata/flow.py +22 -23
- tracdap/rt/metadata/job.py +89 -45
- tracdap/rt/metadata/model.py +26 -27
- tracdap/rt/metadata/object.py +11 -12
- tracdap/rt/metadata/object_id.py +23 -24
- tracdap/rt/metadata/resource.py +0 -1
- tracdap/rt/metadata/search.py +15 -16
- tracdap/rt/metadata/stoarge.py +22 -23
- tracdap/rt/metadata/tag.py +8 -9
- tracdap/rt/metadata/tag_update.py +11 -12
- tracdap/rt/metadata/type.py +38 -38
- {tracdap_runtime-0.6.5.dist-info → tracdap_runtime-0.7.0rc1.dist-info}/LICENSE +1 -1
- {tracdap_runtime-0.6.5.dist-info → tracdap_runtime-0.7.0rc1.dist-info}/METADATA +4 -2
- tracdap_runtime-0.7.0rc1.dist-info/RECORD +121 -0
- {tracdap_runtime-0.6.5.dist-info → tracdap_runtime-0.7.0rc1.dist-info}/WHEEL +1 -1
- tracdap_runtime-0.6.5.dist-info/RECORD +0 -116
- {tracdap_runtime-0.6.5.dist-info → tracdap_runtime-0.7.0rc1.dist-info}/top_level.txt +0 -0
tracdap/rt/_exec/engine.py
CHANGED
@@ -1,8 +1,9 @@
|
|
1
|
-
#
|
2
|
-
#
|
3
|
-
#
|
4
|
-
#
|
5
|
-
#
|
1
|
+
# Licensed to the Fintech Open Source Foundation (FINOS) under one or
|
2
|
+
# more contributor license agreements. See the NOTICE file distributed
|
3
|
+
# with this work for additional information regarding copyright ownership.
|
4
|
+
# FINOS licenses this file to you under the Apache License, Version 2.0
|
5
|
+
# (the "License"); you may not use this file except in compliance with the
|
6
|
+
# License. You may obtain a copy of the License at
|
6
7
|
#
|
7
8
|
# http://www.apache.org/licenses/LICENSE-2.0
|
8
9
|
#
|
@@ -39,8 +40,9 @@ class _EngineNode:
|
|
39
40
|
"""
|
40
41
|
|
41
42
|
node: _graph.Node
|
42
|
-
dependencies: tp.Dict[NodeId, _graph.DependencyType]
|
43
43
|
function: tp.Optional[_func.NodeFunction] = None
|
44
|
+
|
45
|
+
dependencies: tp.Dict[NodeId, _graph.DependencyType] = dc.field(default_factory=dict)
|
44
46
|
complete: bool = False
|
45
47
|
result: tp.Optional[tp.Any] = None
|
46
48
|
error: tp.Optional[str] = None
|
@@ -57,21 +59,35 @@ class _EngineContext:
|
|
57
59
|
Represents the state of an execution graph being processed by the TRAC engine
|
58
60
|
"""
|
59
61
|
|
62
|
+
engine_id: _actors.ActorId
|
63
|
+
job_key: str
|
64
|
+
root_id: NodeId
|
65
|
+
|
60
66
|
nodes: tp.Dict[NodeId, _EngineNode]
|
61
67
|
pending_nodes: tp.Set[NodeId] = dc.field(default_factory=set)
|
62
68
|
active_nodes: tp.Set[NodeId] = dc.field(default_factory=set)
|
63
69
|
succeeded_nodes: tp.Set[NodeId] = dc.field(default_factory=set)
|
64
70
|
failed_nodes: tp.Set[NodeId] = dc.field(default_factory=set)
|
65
71
|
|
72
|
+
def with_updates(
|
73
|
+
self, nodes,
|
74
|
+
pending_nodes, active_nodes,
|
75
|
+
succeeded_nodes, failed_nodes) -> "_EngineContext":
|
76
|
+
|
77
|
+
return _EngineContext(
|
78
|
+
self.engine_id, self.job_key, self.root_id, nodes,
|
79
|
+
pending_nodes, active_nodes, succeeded_nodes, failed_nodes)
|
80
|
+
|
66
81
|
|
67
82
|
@dc.dataclass
|
68
83
|
class _JobState:
|
69
84
|
|
70
85
|
job_id: _meta.TagHeader
|
71
|
-
job_config: _cfg.JobConfig
|
72
|
-
|
73
86
|
actor_id: _actors.ActorId = None
|
74
87
|
|
88
|
+
monitors: tp.List[_actors.ActorId] = dc.field(default_factory=list)
|
89
|
+
|
90
|
+
job_config: _cfg.JobConfig = None
|
75
91
|
job_result: _cfg.JobResult = None
|
76
92
|
job_error: Exception = None
|
77
93
|
|
@@ -154,14 +170,35 @@ class TracEngine(_actors.Actor):
|
|
154
170
|
|
155
171
|
self._log.info(f"Job submitted: [{job_key}]")
|
156
172
|
|
157
|
-
job_processor = JobProcessor(job_key, job_config, result_spec,
|
173
|
+
job_processor = JobProcessor(self._models, self._storage, job_key, job_config, result_spec, graph_spec=None)
|
158
174
|
job_actor_id = self.actors().spawn(job_processor)
|
159
175
|
|
160
|
-
|
176
|
+
job_monitor_success = lambda ctx, key, result: self._notify_callback(key, result, None)
|
177
|
+
job_monitor_failure = lambda ctx, key, error: self._notify_callback(key, None, error)
|
178
|
+
job_monitor = JobMonitor(job_key, job_monitor_success, job_monitor_failure)
|
179
|
+
job_monitor_id = self.actors().spawn(job_monitor)
|
180
|
+
|
181
|
+
job_state = _JobState(job_config.jobId)
|
161
182
|
job_state.actor_id = job_actor_id
|
183
|
+
job_state.monitors.append(job_monitor_id)
|
184
|
+
job_state.job_config = job_config
|
162
185
|
|
163
186
|
self._jobs[job_key] = job_state
|
164
187
|
|
188
|
+
@_actors.Message
|
189
|
+
def submit_child_job(self, child_id: _meta.TagHeader, child_graph: _graph.Graph, monitor_id: _actors.ActorId):
|
190
|
+
|
191
|
+
child_key = _util.object_key(child_id)
|
192
|
+
|
193
|
+
child_processor = JobProcessor(self._models, self._storage, child_key, None, None, graph_spec=child_graph) # noqa
|
194
|
+
child_actor_id = self.actors().spawn(child_processor)
|
195
|
+
|
196
|
+
child_state = _JobState(child_id)
|
197
|
+
child_state.actor_id = child_actor_id
|
198
|
+
child_state.monitors.append(monitor_id)
|
199
|
+
|
200
|
+
self._jobs[child_key] = child_state
|
201
|
+
|
165
202
|
@_actors.Message
|
166
203
|
def get_job_list(self):
|
167
204
|
|
@@ -184,11 +221,13 @@ class TracEngine(_actors.Actor):
|
|
184
221
|
|
185
222
|
self._log.info(f"Recording job as successful: {job_key}")
|
186
223
|
|
187
|
-
self._jobs[job_key]
|
188
|
-
|
224
|
+
job_state = self._jobs[job_key]
|
225
|
+
job_state.job_result = job_result
|
226
|
+
|
227
|
+
for monitor_id in job_state.monitors:
|
228
|
+
self.actors().send(monitor_id, "job_succeeded", job_result)
|
189
229
|
|
190
|
-
|
191
|
-
self._notify_callback(job_key, job_result, None)
|
230
|
+
self._finalize_job(job_key)
|
192
231
|
|
193
232
|
@_actors.Message
|
194
233
|
def job_failed(self, job_key: str, error: Exception):
|
@@ -200,11 +239,13 @@ class TracEngine(_actors.Actor):
|
|
200
239
|
|
201
240
|
self._log.error(f"Recording job as failed: {job_key}")
|
202
241
|
|
203
|
-
self._jobs[job_key]
|
204
|
-
|
242
|
+
job_state = self._jobs[job_key]
|
243
|
+
job_state.job_error = error
|
244
|
+
|
245
|
+
for monitor_id in job_state.monitors:
|
246
|
+
self.actors().send(monitor_id, "job_failed", error)
|
205
247
|
|
206
|
-
|
207
|
-
self._notify_callback(job_key, None, error)
|
248
|
+
self._finalize_job(job_key)
|
208
249
|
|
209
250
|
def _finalize_job(self, job_key: str):
|
210
251
|
|
@@ -214,10 +255,17 @@ class TracEngine(_actors.Actor):
|
|
214
255
|
# For now each instance of the runtime only processes one job so no need to worry
|
215
256
|
|
216
257
|
job_state = self._jobs.get(job_key)
|
217
|
-
job_actor_id = job_state.actor_id if job_state is not None else None
|
218
258
|
|
219
|
-
|
220
|
-
|
259
|
+
# Stop any monitors that were created directly by the engine
|
260
|
+
# (Other actors are responsible for stopping their own monitors)
|
261
|
+
while job_state.monitors:
|
262
|
+
monitor_id = job_state.monitors.pop()
|
263
|
+
monitor_parent = monitor_id[:monitor_id.rfind('/')]
|
264
|
+
if self.actors().id == monitor_parent:
|
265
|
+
self.actors().stop(monitor_id)
|
266
|
+
|
267
|
+
if job_state.actor_id is not None:
|
268
|
+
self.actors().stop(job_state.actor_id )
|
221
269
|
job_state.actor_id = None
|
222
270
|
|
223
271
|
def _get_job_info(self, job_key: str, details: bool = False) -> tp.Optional[_cfg.JobResult]:
|
@@ -251,6 +299,35 @@ class TracEngine(_actors.Actor):
|
|
251
299
|
return job_result
|
252
300
|
|
253
301
|
|
302
|
+
class JobMonitor(_actors.Actor):
|
303
|
+
|
304
|
+
def __init__(
|
305
|
+
self, job_key: str,
|
306
|
+
success_func: tp.Callable[[_actors.ActorContext, str, _cfg.JobResult], None],
|
307
|
+
failure_func: tp.Callable[[_actors.ActorContext, str, Exception], None]):
|
308
|
+
|
309
|
+
super().__init__()
|
310
|
+
self._job_key = job_key
|
311
|
+
self._success_func = success_func
|
312
|
+
self._failure_func = failure_func
|
313
|
+
self._signal_sent = False
|
314
|
+
|
315
|
+
@_actors.Message
|
316
|
+
def job_succeeded(self, job_result: _cfg.JobResult):
|
317
|
+
self._success_func(self.actors(), self._job_key, job_result)
|
318
|
+
self._signal_sent = True
|
319
|
+
|
320
|
+
@_actors.Message
|
321
|
+
def job_failed(self, error: Exception):
|
322
|
+
self._failure_func(self.actors(), self._job_key, error)
|
323
|
+
self._signal_sent = True
|
324
|
+
|
325
|
+
def on_stop(self):
|
326
|
+
if not self._signal_sent:
|
327
|
+
error = _ex.ETracInternal(f"No result was received for job [{self._job_key}]")
|
328
|
+
self._failure_func(self.actors(), self._job_key, error)
|
329
|
+
|
330
|
+
|
254
331
|
class JobProcessor(_actors.Actor):
|
255
332
|
|
256
333
|
"""
|
@@ -259,26 +336,32 @@ class JobProcessor(_actors.Actor):
|
|
259
336
|
"""
|
260
337
|
|
261
338
|
def __init__(
|
262
|
-
self,
|
263
|
-
result_spec: _graph.JobResultSpec,
|
264
|
-
|
265
|
-
storage: _storage.StorageManager):
|
339
|
+
self, models: _models.ModelLoader, storage: _storage.StorageManager,
|
340
|
+
job_key: str, job_config: _cfg.JobConfig, result_spec: _graph.JobResultSpec,
|
341
|
+
graph_spec: tp.Optional[_graph.Graph]):
|
266
342
|
|
267
343
|
super().__init__()
|
268
344
|
self.job_key = job_key
|
269
345
|
self.job_config = job_config
|
270
346
|
self.result_spec = result_spec
|
347
|
+
self.graph_spec = graph_spec
|
271
348
|
self._models = models
|
272
349
|
self._storage = storage
|
273
350
|
self._resolver = _func.FunctionResolver(models, storage)
|
274
351
|
self._log = _util.logger_for_object(self)
|
275
352
|
|
276
353
|
def on_start(self):
|
354
|
+
|
277
355
|
self._log.info(f"Starting job [{self.job_key}]")
|
278
356
|
self._models.create_scope(self.job_key)
|
279
|
-
|
357
|
+
|
358
|
+
if self.graph_spec is not None:
|
359
|
+
self.actors().send(self.actors().id, "build_graph_succeeded", self.graph_spec)
|
360
|
+
else:
|
361
|
+
self.actors().spawn(GraphBuilder(self.job_config, self.result_spec))
|
280
362
|
|
281
363
|
def on_stop(self):
|
364
|
+
|
282
365
|
self._log.info(f"Cleaning up job [{self.job_key}]")
|
283
366
|
self._models.destroy_scope(self.job_key)
|
284
367
|
|
@@ -303,9 +386,26 @@ class JobProcessor(_actors.Actor):
|
|
303
386
|
return super().on_signal(signal)
|
304
387
|
|
305
388
|
@_actors.Message
|
306
|
-
def
|
307
|
-
|
308
|
-
|
389
|
+
def build_graph_succeeded(self, graph_spec: _graph.Graph):
|
390
|
+
|
391
|
+
# Build a new engine context graph from the graph spec
|
392
|
+
engine_id = self.actors().parent
|
393
|
+
nodes = dict((node_id, _EngineNode(node)) for node_id, node in graph_spec.nodes.items())
|
394
|
+
graph = _EngineContext(engine_id, self.job_key, graph_spec.root_id, nodes)
|
395
|
+
|
396
|
+
# Add all the nodes as pending nodes to start
|
397
|
+
graph.pending_nodes.update(graph.nodes.keys())
|
398
|
+
|
399
|
+
self.actors().spawn(FunctionResolver(self._resolver, graph))
|
400
|
+
if self.actors().sender != self.actors().id and self.actors().sender != self.actors().parent:
|
401
|
+
self.actors().stop(self.actors().sender)
|
402
|
+
|
403
|
+
@_actors.Message
|
404
|
+
def resolve_functions_succeeded(self, graph: _EngineContext):
|
405
|
+
|
406
|
+
self.actors().spawn(GraphProcessor(graph, self._resolver))
|
407
|
+
if self.actors().sender != self.actors().id and self.actors().sender != self.actors().parent:
|
408
|
+
self.actors().stop(self.actors().sender)
|
309
409
|
|
310
410
|
@_actors.Message
|
311
411
|
def job_succeeded(self, job_result: _cfg.JobResult):
|
@@ -323,44 +423,54 @@ class JobProcessor(_actors.Actor):
|
|
323
423
|
class GraphBuilder(_actors.Actor):
|
324
424
|
|
325
425
|
"""
|
326
|
-
GraphBuilder is a worker (
|
327
|
-
The logic for graph building is provided in graph_builder.py
|
426
|
+
GraphBuilder is a worker (actor) to wrap the GraphBuilder logic from graph_builder.py
|
328
427
|
"""
|
329
428
|
|
330
|
-
def __init__(
|
331
|
-
self, job_config: _cfg.JobConfig,
|
332
|
-
result_spec: _graph.JobResultSpec,
|
333
|
-
resolver: _func.FunctionResolver):
|
334
|
-
|
429
|
+
def __init__(self, job_config: _cfg.JobConfig, result_spec: _graph.JobResultSpec):
|
335
430
|
super().__init__()
|
336
431
|
self.job_config = job_config
|
337
432
|
self.result_spec = result_spec
|
338
|
-
self.graph: tp.Optional[_EngineContext] = None
|
339
|
-
|
340
|
-
self._resolver = resolver
|
341
433
|
self._log = _util.logger_for_object(self)
|
342
434
|
|
343
435
|
def on_start(self):
|
436
|
+
self.build_graph(self, self.job_config)
|
437
|
+
|
438
|
+
@_actors.Message
|
439
|
+
def build_graph(self, job_config: _cfg.JobConfig):
|
344
440
|
|
345
441
|
self._log.info("Building execution graph")
|
346
442
|
|
347
443
|
# TODO: Get sys config, or find a way to pass storage settings
|
348
|
-
|
349
|
-
|
350
|
-
graph = _EngineContext(graph_nodes, pending_nodes=set(graph_nodes.keys()))
|
444
|
+
graph_builder = _graph.GraphBuilder(job_config, self.result_spec)
|
445
|
+
graph_spec = graph_builder.build_job(job_config.job)
|
351
446
|
|
352
|
-
self.
|
447
|
+
self.actors().reply("build_graph_succeeded", graph_spec)
|
353
448
|
|
354
|
-
for node_id, node in graph.nodes.items():
|
355
|
-
node.function = self._resolver.resolve_node(node.node)
|
356
449
|
|
450
|
+
class FunctionResolver(_actors.Actor):
|
451
|
+
|
452
|
+
"""
|
453
|
+
GraphResolver is a worker (actors) to wrap the FunctionResolver logic in functions.py
|
454
|
+
"""
|
455
|
+
|
456
|
+
def __init__(self, resolver: _func.FunctionResolver, graph: _EngineContext):
|
457
|
+
super().__init__()
|
357
458
|
self.graph = graph
|
358
|
-
self.
|
459
|
+
self._resolver = resolver
|
460
|
+
self._log = _util.logger_for_object(self)
|
461
|
+
|
462
|
+
def on_start(self):
|
463
|
+
self.resolve_functions(self, self.graph)
|
359
464
|
|
360
465
|
@_actors.Message
|
361
|
-
def
|
466
|
+
def resolve_functions(self, graph: _EngineContext):
|
362
467
|
|
363
|
-
self.
|
468
|
+
self._log.info("Resolving graph nodes to executable code")
|
469
|
+
|
470
|
+
for node_id, node in graph.nodes.items():
|
471
|
+
node.function = self._resolver.resolve_node(node.node)
|
472
|
+
|
473
|
+
self.actors().reply("resolve_functions_succeeded", graph)
|
364
474
|
|
365
475
|
|
366
476
|
class GraphProcessor(_actors.Actor):
|
@@ -376,10 +486,10 @@ class GraphProcessor(_actors.Actor):
|
|
376
486
|
Once all running nodes are stopped, an error is reported to the parent
|
377
487
|
"""
|
378
488
|
|
379
|
-
def __init__(self, graph: _EngineContext,
|
489
|
+
def __init__(self, graph: _EngineContext, resolver: _func.FunctionResolver):
|
380
490
|
super().__init__()
|
381
491
|
self.graph = graph
|
382
|
-
self.
|
492
|
+
self.root_id_ = graph.root_id
|
383
493
|
self.processors: tp.Dict[NodeId, _actors.ActorId] = dict()
|
384
494
|
self._resolver = resolver
|
385
495
|
self._log = _util.logger_for_object(self)
|
@@ -427,12 +537,14 @@ class GraphProcessor(_actors.Actor):
|
|
427
537
|
# Model and data nodes map to different thread pools in the actors engine
|
428
538
|
# There is scope for a much more sophisticated approach, with prioritized scheduling
|
429
539
|
|
430
|
-
if isinstance(node.node, _graph.
|
431
|
-
processor =
|
540
|
+
if isinstance(node.node, _graph.ChildJobNode):
|
541
|
+
processor = ChildJobNodeProcessor(processed_graph, node)
|
542
|
+
elif isinstance(node.node, _graph.RunModelNode) or isinstance(node.node, _graph.ImportModelNode):
|
543
|
+
processor = ModelNodeProcessor(processed_graph, node)
|
432
544
|
elif isinstance(node.node, _graph.LoadDataNode) or isinstance(node.node, _graph.SaveDataNode):
|
433
|
-
processor = DataNodeProcessor(processed_graph,
|
545
|
+
processor = DataNodeProcessor(processed_graph, node)
|
434
546
|
else:
|
435
|
-
processor = NodeProcessor(processed_graph,
|
547
|
+
processor = NodeProcessor(processed_graph, node)
|
436
548
|
|
437
549
|
# New nodes can be launched with the updated graph
|
438
550
|
# Anything that was pruned is not needed by the new node
|
@@ -502,7 +614,7 @@ class GraphProcessor(_actors.Actor):
|
|
502
614
|
for node_id, node in new_nodes.items():
|
503
615
|
GraphLogger.log_node_add(node)
|
504
616
|
node_func = self._resolver.resolve_node(node)
|
505
|
-
new_node = _EngineNode(node,
|
617
|
+
new_node = _EngineNode(node, node_func)
|
506
618
|
new_graph.nodes[node_id] = new_node
|
507
619
|
new_graph.pending_nodes.add(node_id)
|
508
620
|
|
@@ -625,9 +737,10 @@ class GraphProcessor(_actors.Actor):
|
|
625
737
|
for node_id in list(filter(lambda n: n.namespace == context_pop, nodes)):
|
626
738
|
nodes.pop(node_id)
|
627
739
|
|
628
|
-
graph =
|
740
|
+
self.graph = self.graph.with_updates(
|
741
|
+
nodes, pending_nodes, active_nodes,
|
742
|
+
succeeded_nodes, failed_nodes)
|
629
743
|
|
630
|
-
self.graph = graph
|
631
744
|
self.check_job_status()
|
632
745
|
|
633
746
|
def check_job_status(self, do_submit=True):
|
@@ -657,7 +770,7 @@ class GraphProcessor(_actors.Actor):
|
|
657
770
|
self.actors().send_parent("job_failed", _ex.EModelExec("Job suffered multiple errors", errors))
|
658
771
|
|
659
772
|
else:
|
660
|
-
job_result = self.graph.nodes[self.root_id].result
|
773
|
+
job_result = self.graph.nodes[self.graph.root_id].result
|
661
774
|
self.actors().send_parent("job_succeeded", job_result)
|
662
775
|
|
663
776
|
|
@@ -669,11 +782,12 @@ class NodeProcessor(_actors.Actor):
|
|
669
782
|
|
670
783
|
__NONE_TYPE = type(None)
|
671
784
|
|
672
|
-
def __init__(self, graph: _EngineContext,
|
785
|
+
def __init__(self, graph: _EngineContext, node: _EngineNode):
|
673
786
|
super().__init__()
|
674
787
|
self.graph = graph
|
675
|
-
self.node_id = node_id
|
676
788
|
self.node = node
|
789
|
+
self.node_id = node.node.id
|
790
|
+
|
677
791
|
|
678
792
|
def on_start(self):
|
679
793
|
|
@@ -782,14 +896,59 @@ class NodeProcessor(_actors.Actor):
|
|
782
896
|
|
783
897
|
class ModelNodeProcessor(NodeProcessor):
|
784
898
|
|
785
|
-
def __init__(self, graph: _EngineContext,
|
786
|
-
super().__init__(graph,
|
899
|
+
def __init__(self, graph: _EngineContext, node: _EngineNode):
|
900
|
+
super().__init__(graph, node)
|
787
901
|
|
788
902
|
|
789
903
|
class DataNodeProcessor(NodeProcessor):
|
790
904
|
|
791
|
-
def __init__(self, graph: _EngineContext,
|
792
|
-
super().__init__(graph,
|
905
|
+
def __init__(self, graph: _EngineContext, node: _EngineNode):
|
906
|
+
super().__init__(graph, node)
|
907
|
+
|
908
|
+
|
909
|
+
class ChildJobNodeProcessor(NodeProcessor):
|
910
|
+
|
911
|
+
def __init__(self, graph: _EngineContext, node: _EngineNode):
|
912
|
+
super().__init__(graph, node)
|
913
|
+
|
914
|
+
@_actors.Message
|
915
|
+
def evaluate_node(self):
|
916
|
+
|
917
|
+
NodeLogger.log_node_start(self.node)
|
918
|
+
|
919
|
+
job_id = self.node.node.job_id # noqa
|
920
|
+
job_key = _util.object_key(job_id)
|
921
|
+
|
922
|
+
node_id = self.actors().id
|
923
|
+
|
924
|
+
def success_callback(ctx, _, result):
|
925
|
+
ctx.send(node_id, "child_job_succeeded", result)
|
926
|
+
|
927
|
+
def failure_callback(ctx, _, error):
|
928
|
+
ctx.send(node_id, "child_job_failed", error)
|
929
|
+
|
930
|
+
monitor = JobMonitor(job_key, success_callback, failure_callback)
|
931
|
+
monitor_id = self.actors().spawn(monitor)
|
932
|
+
|
933
|
+
graph_spec: _graph.Graph = self.node.node.graph # noqa
|
934
|
+
|
935
|
+
self.actors().send(self.graph.engine_id, "submit_child_job", job_id, graph_spec, monitor_id)
|
936
|
+
|
937
|
+
@_actors.Message
|
938
|
+
def child_job_succeeded(self, job_result: _cfg.JobResult):
|
939
|
+
|
940
|
+
self._check_result_type(job_result)
|
941
|
+
|
942
|
+
NodeLogger.log_node_succeeded(self.node)
|
943
|
+
|
944
|
+
self.actors().send_parent("node_succeeded", self.node_id, job_result)
|
945
|
+
|
946
|
+
@_actors.Message
|
947
|
+
def child_job_failed(self, job_error: Exception):
|
948
|
+
|
949
|
+
NodeLogger.log_node_failed(self.node, job_error)
|
950
|
+
|
951
|
+
self.actors().send_parent("node_failed", self.node_id, job_error)
|
793
952
|
|
794
953
|
|
795
954
|
class GraphLogger:
|
tracdap/rt/_exec/functions.py
CHANGED
@@ -1,8 +1,9 @@
|
|
1
|
-
#
|
2
|
-
#
|
3
|
-
#
|
4
|
-
#
|
5
|
-
#
|
1
|
+
# Licensed to the Fintech Open Source Foundation (FINOS) under one or
|
2
|
+
# more contributor license agreements. See the NOTICE file distributed
|
3
|
+
# with this work for additional information regarding copyright ownership.
|
4
|
+
# FINOS licenses this file to you under the Apache License, Version 2.0
|
5
|
+
# (the "License"); you may not use this file except in compliance with the
|
6
|
+
# License. You may obtain a copy of the License at
|
6
7
|
#
|
7
8
|
# http://www.apache.org/licenses/LICENSE-2.0
|
8
9
|
#
|
@@ -623,6 +624,17 @@ class RunModelFunc(NodeFunction[Bundle[_data.DataView]]):
|
|
623
624
|
storage_impl = self.storage_manager.get_file_storage(storage_key, external=True)
|
624
625
|
storage = _ctx.TracFileStorageImpl(storage_key, storage_impl, write_access, self.checkout_directory)
|
625
626
|
storage_map[storage_key] = storage
|
627
|
+
elif self.storage_manager.has_data_storage(storage_key, external=True):
|
628
|
+
storage_impl = self.storage_manager.get_data_storage(storage_key, external=True)
|
629
|
+
# This is a work-around until the storage extension API can be updated / unified
|
630
|
+
if not isinstance(storage_impl, _storage.IDataStorageBase):
|
631
|
+
raise _ex.EStorageConfig(f"External storage for [{storage_key}] is using the legacy storage framework]")
|
632
|
+
converter = _data.DataConverter.noop()
|
633
|
+
storage = _ctx.TracDataStorageImpl(storage_key, storage_impl, converter, write_access, self.checkout_directory)
|
634
|
+
storage_map[storage_key] = storage
|
635
|
+
else:
|
636
|
+
raise _ex.EStorageConfig(f"External storage is not available: [{storage_key}]")
|
637
|
+
|
626
638
|
|
627
639
|
# Run the model against the mapped local context
|
628
640
|
|
@@ -688,7 +700,7 @@ class RunModelFunc(NodeFunction[Bundle[_data.DataView]]):
|
|
688
700
|
output_section = _graph.GraphBuilder.build_runtime_outputs(dynamic_outputs, self.node.id.namespace)
|
689
701
|
new_nodes.update(output_section.nodes)
|
690
702
|
|
691
|
-
ctx_id = NodeId.of("
|
703
|
+
ctx_id = NodeId.of("trac_job_result", self.node.id.namespace, result_type=None)
|
692
704
|
new_deps[ctx_id] = list(_graph.Dependency(nid, _graph.DependencyType.HARD) for nid in output_section.outputs)
|
693
705
|
|
694
706
|
self.node_callback.send_graph_updates(new_nodes, new_deps)
|
@@ -696,6 +708,18 @@ class RunModelFunc(NodeFunction[Bundle[_data.DataView]]):
|
|
696
708
|
return results
|
697
709
|
|
698
710
|
|
711
|
+
class ChildJobFunction(NodeFunction[None]):
|
712
|
+
|
713
|
+
def __init__(self, node: ChildJobNode):
|
714
|
+
super().__init__()
|
715
|
+
self.node = node
|
716
|
+
|
717
|
+
def _execute(self, ctx: NodeContext):
|
718
|
+
# This node should never execute, the engine intercepts child job nodes and provides special handling
|
719
|
+
raise _ex.ETracInternal("Child job was not processed correctly (this is a bug)")
|
720
|
+
|
721
|
+
|
722
|
+
|
699
723
|
# ----------------------------------------------------------------------------------------------------------------------
|
700
724
|
# FUNCTION RESOLUTION
|
701
725
|
# ----------------------------------------------------------------------------------------------------------------------
|
@@ -779,6 +803,7 @@ class FunctionResolver:
|
|
779
803
|
DataResultNode: DataResultFunc,
|
780
804
|
StaticValueNode: StaticValueFunc,
|
781
805
|
RuntimeOutputsNode: RuntimeOutputsFunc,
|
806
|
+
ChildJobNode: ChildJobFunction,
|
782
807
|
BundleItemNode: NoopFunc,
|
783
808
|
NoopNode: NoopFunc,
|
784
809
|
RunModelResultNode: NoopFunc
|
tracdap/rt/_exec/graph.py
CHANGED
@@ -1,8 +1,9 @@
|
|
1
|
-
#
|
2
|
-
#
|
3
|
-
#
|
4
|
-
#
|
5
|
-
#
|
1
|
+
# Licensed to the Fintech Open Source Foundation (FINOS) under one or
|
2
|
+
# more contributor license agreements. See the NOTICE file distributed
|
3
|
+
# with this work for additional information regarding copyright ownership.
|
4
|
+
# FINOS licenses this file to you under the Apache License, Version 2.0
|
5
|
+
# (the "License"); you may not use this file except in compliance with the
|
6
|
+
# License. You may obtain a copy of the License at
|
6
7
|
#
|
7
8
|
# http://www.apache.org/licenses/LICENSE-2.0
|
8
9
|
#
|
@@ -414,3 +415,12 @@ class SaveJobResultNode(Node[None]):
|
|
414
415
|
|
415
416
|
def _node_dependencies(self) -> tp.Dict[NodeId, DependencyType]:
|
416
417
|
return {self.job_result_id: DependencyType.HARD}
|
418
|
+
|
419
|
+
|
420
|
+
@_node_type
|
421
|
+
class ChildJobNode(Node[cfg.JobResult]):
|
422
|
+
|
423
|
+
job_id: meta.TagHeader
|
424
|
+
job_def: meta.JobDefinition
|
425
|
+
|
426
|
+
graph: Graph
|