tracdap-runtime 0.8.0b1__py3-none-any.whl → 0.8.0b3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tracdap/rt/_impl/core/__init__.py +14 -0
- tracdap/rt/_impl/{config_parser.py → core/config_parser.py} +14 -7
- tracdap/rt/_impl/{data.py → core/data.py} +3 -3
- tracdap/rt/_impl/core/logging.py +195 -0
- tracdap/rt/_impl/{models.py → core/models.py} +7 -6
- tracdap/rt/_impl/{repos.py → core/repos.py} +5 -3
- tracdap/rt/_impl/{schemas.py → core/schemas.py} +5 -5
- tracdap/rt/_impl/{shim.py → core/shim.py} +5 -4
- tracdap/rt/_impl/{storage.py → core/storage.py} +7 -6
- tracdap/rt/_impl/{util.py → core/util.py} +0 -110
- tracdap/rt/_impl/{validation.py → core/validation.py} +4 -3
- tracdap/rt/_impl/exec/__init__.py +14 -0
- tracdap/rt/{_exec → _impl/exec}/actors.py +12 -14
- tracdap/rt/{_exec → _impl/exec}/context.py +30 -14
- tracdap/rt/{_exec → _impl/exec}/dev_mode.py +8 -7
- tracdap/rt/{_exec → _impl/exec}/engine.py +227 -104
- tracdap/rt/{_exec → _impl/exec}/functions.py +36 -57
- tracdap/rt/{_exec → _impl/exec}/graph.py +2 -21
- tracdap/rt/{_exec → _impl/exec}/graph_builder.py +33 -19
- tracdap/rt/{_exec → _impl/grpc}/server.py +7 -6
- tracdap/rt/_impl/grpc/tracdap/metadata/job_pb2.py +64 -62
- tracdap/rt/_impl/grpc/tracdap/metadata/job_pb2.pyi +16 -2
- tracdap/rt/_impl/grpc/tracdap/metadata/object_id_pb2.py +3 -3
- tracdap/rt/_impl/grpc/tracdap/metadata/object_id_pb2.pyi +2 -0
- tracdap/rt/_impl/grpc/tracdap/metadata/object_pb2.py +4 -4
- tracdap/rt/_impl/grpc/tracdap/metadata/object_pb2.pyi +4 -2
- tracdap/rt/{_exec → _impl}/runtime.py +13 -12
- tracdap/rt/_impl/static_api.py +4 -4
- tracdap/rt/_plugins/format_csv.py +1 -1
- tracdap/rt/_plugins/storage_sql.py +1 -1
- tracdap/rt/_version.py +1 -1
- tracdap/rt/config/__init__.py +1 -0
- tracdap/rt/config/platform.py +8 -0
- tracdap/rt/ext/embed.py +2 -2
- tracdap/rt/ext/plugins.py +3 -3
- tracdap/rt/launch/launch.py +3 -3
- tracdap/rt/metadata/__init__.py +1 -0
- tracdap/rt/metadata/job.py +16 -0
- tracdap/rt/metadata/object.py +2 -0
- tracdap/rt/metadata/object_id.py +2 -0
- {tracdap_runtime-0.8.0b1.dist-info → tracdap_runtime-0.8.0b3.dist-info}/METADATA +2 -2
- {tracdap_runtime-0.8.0b1.dist-info → tracdap_runtime-0.8.0b3.dist-info}/RECORD +47 -45
- {tracdap_runtime-0.8.0b1.dist-info → tracdap_runtime-0.8.0b3.dist-info}/WHEEL +1 -1
- tracdap/rt/_exec/__init__.py +0 -0
- /tracdap/rt/_impl/{guard_rails.py → core/guard_rails.py} +0 -0
- /tracdap/rt/_impl/{type_system.py → core/type_system.py} +0 -0
- {tracdap_runtime-0.8.0b1.dist-info → tracdap_runtime-0.8.0b3.dist-info}/LICENSE +0 -0
- {tracdap_runtime-0.8.0b1.dist-info → tracdap_runtime-0.8.0b3.dist-info}/top_level.txt +0 -0
@@ -16,18 +16,22 @@
|
|
16
16
|
import copy as cp
|
17
17
|
import dataclasses as dc
|
18
18
|
import enum
|
19
|
+
import io
|
20
|
+
import pathlib
|
19
21
|
import typing as tp
|
20
22
|
|
21
23
|
import tracdap.rt.metadata as _meta
|
22
24
|
import tracdap.rt.config as _cfg
|
23
25
|
import tracdap.rt.exceptions as _ex
|
24
|
-
import tracdap.rt.
|
25
|
-
import tracdap.rt.
|
26
|
-
import tracdap.rt.
|
27
|
-
import tracdap.rt._impl.
|
28
|
-
import tracdap.rt._impl.data as _data
|
29
|
-
import tracdap.rt._impl.
|
30
|
-
import tracdap.rt._impl.
|
26
|
+
import tracdap.rt._impl.exec.actors as _actors
|
27
|
+
import tracdap.rt._impl.exec.graph_builder as _graph
|
28
|
+
import tracdap.rt._impl.exec.functions as _func
|
29
|
+
import tracdap.rt._impl.core.config_parser as _cfg_p
|
30
|
+
import tracdap.rt._impl.core.data as _data
|
31
|
+
import tracdap.rt._impl.core.logging as _logging
|
32
|
+
import tracdap.rt._impl.core.models as _models
|
33
|
+
import tracdap.rt._impl.core.storage as _storage
|
34
|
+
import tracdap.rt._impl.core.util as _util
|
31
35
|
|
32
36
|
from .graph import NodeId
|
33
37
|
|
@@ -79,18 +83,44 @@ class _EngineContext:
|
|
79
83
|
pending_nodes, active_nodes, succeeded_nodes, failed_nodes)
|
80
84
|
|
81
85
|
|
86
|
+
@dc.dataclass
|
87
|
+
class _JobResultSpec:
|
88
|
+
|
89
|
+
save_result: bool = False
|
90
|
+
result_dir: tp.Union[str, pathlib.Path] = None
|
91
|
+
result_format: str = None
|
92
|
+
|
93
|
+
|
82
94
|
@dc.dataclass
|
83
95
|
class _JobState:
|
84
96
|
|
85
97
|
job_id: _meta.TagHeader
|
86
|
-
|
98
|
+
log_init: dc.InitVar[tp.Optional[_logging.LogProvider]] = None
|
87
99
|
|
100
|
+
actor_id: _actors.ActorId = None
|
88
101
|
monitors: tp.List[_actors.ActorId] = dc.field(default_factory=list)
|
89
102
|
|
90
103
|
job_config: _cfg.JobConfig = None
|
91
104
|
job_result: _cfg.JobResult = None
|
92
105
|
job_error: Exception = None
|
93
106
|
|
107
|
+
parent_key: str = None
|
108
|
+
result_spec: _JobResultSpec = None
|
109
|
+
|
110
|
+
log_buffer: io.BytesIO = None
|
111
|
+
log_provider: _logging.LogProvider = None
|
112
|
+
log: _logging.Logger = None
|
113
|
+
|
114
|
+
def __post_init__(self, log_init):
|
115
|
+
|
116
|
+
if isinstance(self.log, _logging.LogProvider):
|
117
|
+
self.log_provider = log_init
|
118
|
+
else:
|
119
|
+
self.log_buffer = io.BytesIO()
|
120
|
+
self.log_provider = _logging.job_log_provider(self.log_buffer)
|
121
|
+
|
122
|
+
self.log = self.log_provider.logger_for_class(TracEngine)
|
123
|
+
|
94
124
|
|
95
125
|
class TracEngine(_actors.Actor):
|
96
126
|
|
@@ -107,7 +137,7 @@ class TracEngine(_actors.Actor):
|
|
107
137
|
|
108
138
|
super().__init__()
|
109
139
|
|
110
|
-
self._log =
|
140
|
+
self._log = _logging.logger_for_object(self)
|
111
141
|
|
112
142
|
self._sys_config = sys_config
|
113
143
|
self._models = models
|
@@ -164,13 +194,17 @@ class TracEngine(_actors.Actor):
|
|
164
194
|
job_result_format: str):
|
165
195
|
|
166
196
|
job_key = _util.object_key(job_config.jobId)
|
197
|
+
job_state = _JobState(job_config.jobId)
|
198
|
+
|
199
|
+
job_state.log.info(f"Job submitted: [{job_key}]")
|
167
200
|
|
168
201
|
result_needed = bool(job_result_dir)
|
169
|
-
result_spec =
|
202
|
+
result_spec = _JobResultSpec(result_needed, job_result_dir, job_result_format)
|
170
203
|
|
171
|
-
|
204
|
+
job_processor = JobProcessor(
|
205
|
+
self._sys_config, self._models, self._storage, job_state.log_provider,
|
206
|
+
job_key, job_config, graph_spec=None)
|
172
207
|
|
173
|
-
job_processor = JobProcessor(self._sys_config, self._models, self._storage, job_key, job_config, result_spec, graph_spec=None)
|
174
208
|
job_actor_id = self.actors().spawn(job_processor)
|
175
209
|
|
176
210
|
job_monitor_success = lambda ctx, key, result: self._notify_callback(key, result, None)
|
@@ -178,24 +212,36 @@ class TracEngine(_actors.Actor):
|
|
178
212
|
job_monitor = JobMonitor(job_key, job_monitor_success, job_monitor_failure)
|
179
213
|
job_monitor_id = self.actors().spawn(job_monitor)
|
180
214
|
|
181
|
-
job_state = _JobState(job_config.jobId)
|
182
215
|
job_state.actor_id = job_actor_id
|
183
216
|
job_state.monitors.append(job_monitor_id)
|
184
217
|
job_state.job_config = job_config
|
218
|
+
job_state.result_spec = result_spec
|
185
219
|
|
186
220
|
self._jobs[job_key] = job_state
|
187
221
|
|
188
222
|
@_actors.Message
|
189
|
-
def submit_child_job(self, child_id: _meta.TagHeader, child_graph: _graph.Graph, monitor_id: _actors.ActorId):
|
223
|
+
def submit_child_job(self, parent_key: str, child_id: _meta.TagHeader, child_graph: _graph.Graph, monitor_id: _actors.ActorId):
|
224
|
+
|
225
|
+
parent_state = self._jobs.get(parent_key)
|
226
|
+
|
227
|
+
# Ignore duplicate messages from the job processor (can happen in unusual error cases)
|
228
|
+
if parent_state is None:
|
229
|
+
self._log.warning(f"Ignoring [submit_child_job] message, parent [{parent_key}] has already completed")
|
230
|
+
return
|
190
231
|
|
191
232
|
child_key = _util.object_key(child_id)
|
192
233
|
|
193
|
-
child_processor = JobProcessor(
|
234
|
+
child_processor = JobProcessor(
|
235
|
+
self._sys_config, self._models, self._storage, parent_state.log_provider,
|
236
|
+
child_key, None, graph_spec=child_graph)
|
237
|
+
|
194
238
|
child_actor_id = self.actors().spawn(child_processor)
|
195
239
|
|
196
|
-
child_state = _JobState(child_id)
|
240
|
+
child_state = _JobState(child_id, parent_state.log_provider)
|
197
241
|
child_state.actor_id = child_actor_id
|
198
242
|
child_state.monitors.append(monitor_id)
|
243
|
+
child_state.parent_key = parent_key
|
244
|
+
child_state.result_spec = _JobResultSpec(False) # Do not output separate results for child jobs
|
199
245
|
|
200
246
|
self._jobs[child_key] = child_state
|
201
247
|
|
@@ -219,9 +265,9 @@ class TracEngine(_actors.Actor):
|
|
219
265
|
self._log.warning(f"Ignoring [job_succeeded] message, job [{job_key}] has already completed")
|
220
266
|
return
|
221
267
|
|
222
|
-
self._log.info(f"Recording job as successful: {job_key}")
|
223
|
-
|
224
268
|
job_state = self._jobs[job_key]
|
269
|
+
job_state.log.info(f"Recording job as successful: {job_key}")
|
270
|
+
|
225
271
|
job_state.job_result = job_result
|
226
272
|
|
227
273
|
for monitor_id in job_state.monitors:
|
@@ -237,11 +283,30 @@ class TracEngine(_actors.Actor):
|
|
237
283
|
self._log.warning(f"Ignoring [job_failed] message, job [{job_key}] has already completed")
|
238
284
|
return
|
239
285
|
|
240
|
-
self._log.error(f"Recording job as failed: {job_key}")
|
241
|
-
|
242
286
|
job_state = self._jobs[job_key]
|
287
|
+
job_state.log.error(f"Recording job as failed: {job_key}")
|
288
|
+
|
243
289
|
job_state.job_error = error
|
244
290
|
|
291
|
+
# Create a failed result so there is something to report
|
292
|
+
result_id = job_state.job_config.resultMapping.get("trac_job_result")
|
293
|
+
|
294
|
+
if result_id is not None:
|
295
|
+
|
296
|
+
job_state.job_result = _cfg.JobResult(
|
297
|
+
jobId=job_state.job_id,
|
298
|
+
statusCode=_meta.JobStatusCode.FAILED,
|
299
|
+
statusMessage=str(error))
|
300
|
+
|
301
|
+
result_def = _meta.ResultDefinition()
|
302
|
+
result_def.jobId = _util.selector_for(job_state.job_id)
|
303
|
+
result_def.statusCode = _meta.JobStatusCode.FAILED
|
304
|
+
|
305
|
+
result_key = _util.object_key(result_id)
|
306
|
+
result_obj = _meta.ObjectDefinition(objectType=_meta.ObjectType.RESULT, result=result_def)
|
307
|
+
|
308
|
+
job_state.job_result.results[result_key] = result_obj
|
309
|
+
|
245
310
|
for monitor_id in job_state.monitors:
|
246
311
|
self.actors().send(monitor_id, "job_failed", error)
|
247
312
|
|
@@ -256,6 +321,14 @@ class TracEngine(_actors.Actor):
|
|
256
321
|
|
257
322
|
job_state = self._jobs.get(job_key)
|
258
323
|
|
324
|
+
# Record output metadata if required (not needed for local runs or when using API server)
|
325
|
+
if job_state.parent_key is None and job_state.result_spec.save_result:
|
326
|
+
|
327
|
+
if "trac_job_log_file" in job_state.job_config.resultMapping:
|
328
|
+
self._save_job_log_file(job_key, job_state)
|
329
|
+
|
330
|
+
self._save_job_result(job_key, job_state)
|
331
|
+
|
259
332
|
# Stop any monitors that were created directly by the engine
|
260
333
|
# (Other actors are responsible for stopping their own monitors)
|
261
334
|
while job_state.monitors:
|
@@ -265,9 +338,57 @@ class TracEngine(_actors.Actor):
|
|
265
338
|
self.actors().stop(monitor_id)
|
266
339
|
|
267
340
|
if job_state.actor_id is not None:
|
268
|
-
self.actors().stop(job_state.actor_id
|
341
|
+
self.actors().stop(job_state.actor_id)
|
269
342
|
job_state.actor_id = None
|
270
343
|
|
344
|
+
def _save_job_log_file(self, job_key: str, job_state: _JobState):
|
345
|
+
|
346
|
+
self._log.info(f"Saving job log file for [{job_key}]")
|
347
|
+
|
348
|
+
# Saving log files could go into a separate actor, perhaps a job monitor along with _save_job_result()
|
349
|
+
|
350
|
+
file_id = job_state.job_config.resultMapping["trac_job_log_file"]
|
351
|
+
storage_id = job_state.job_config.resultMapping["trac_job_log_file:STORAGE"]
|
352
|
+
|
353
|
+
file_type = _meta.FileType("TXT", "text/plain")
|
354
|
+
file_def, storage_def = _graph.GraphBuilder.build_output_file_and_storage(
|
355
|
+
"trac_job_log_file", file_type,
|
356
|
+
self._sys_config, job_state.job_config)
|
357
|
+
|
358
|
+
storage_item = storage_def.dataItems[file_def.dataItem].incarnations[0].copies[0]
|
359
|
+
storage = self._storage.get_file_storage(storage_item.storageKey)
|
360
|
+
|
361
|
+
with storage.write_byte_stream(storage_item.storagePath) as stream:
|
362
|
+
stream.write(job_state.log_buffer.getbuffer())
|
363
|
+
file_def.size = stream.tell()
|
364
|
+
|
365
|
+
result_id = job_state.job_config.resultMapping["trac_job_result"]
|
366
|
+
result_def = job_state.job_result.results[_util.object_key(result_id)].result
|
367
|
+
result_def.logFileId = _util.selector_for(file_id)
|
368
|
+
|
369
|
+
file_obj = _meta.ObjectDefinition(objectType=_meta.ObjectType.FILE, file=file_def)
|
370
|
+
storage_obj = _meta.ObjectDefinition(objectType=_meta.ObjectType.STORAGE, storage=storage_def)
|
371
|
+
|
372
|
+
job_state.job_result.results[_util.object_key(file_id)] = file_obj
|
373
|
+
job_state.job_result.results[_util.object_key(storage_id)] = storage_obj
|
374
|
+
|
375
|
+
def _save_job_result(self, job_key: str, job_state: _JobState):
|
376
|
+
|
377
|
+
self._log.info(f"Saving job result for [{job_key}]")
|
378
|
+
|
379
|
+
# It might be better abstract reporting of results, job status etc., perhaps with a job monitor
|
380
|
+
|
381
|
+
if job_state.result_spec.save_result:
|
382
|
+
|
383
|
+
result_format = job_state.result_spec.result_format
|
384
|
+
result_dir = job_state.result_spec.result_dir
|
385
|
+
result_file = f"job_result_{job_key}.{result_format}"
|
386
|
+
result_path = pathlib.Path(result_dir).joinpath(result_file)
|
387
|
+
|
388
|
+
with open(result_path, "xt") as result_stream:
|
389
|
+
result_content = _cfg_p.ConfigQuoter.quote(job_state.job_result, result_format)
|
390
|
+
result_stream.write(result_content)
|
391
|
+
|
271
392
|
def _get_job_info(self, job_key: str, details: bool = False) -> tp.Optional[_cfg.JobResult]:
|
272
393
|
|
273
394
|
job_state = self._jobs.get(job_key)
|
@@ -337,20 +458,24 @@ class JobProcessor(_actors.Actor):
|
|
337
458
|
|
338
459
|
def __init__(
|
339
460
|
self, sys_config: _cfg.RuntimeConfig,
|
340
|
-
models: _models.ModelLoader, storage: _storage.StorageManager,
|
341
|
-
job_key: str, job_config: _cfg.JobConfig,
|
342
|
-
graph_spec: tp.Optional[_graph.Graph]):
|
461
|
+
models: _models.ModelLoader, storage: _storage.StorageManager, log_provider: _logging.LogProvider,
|
462
|
+
job_key: str, job_config: tp.Optional[_cfg.JobConfig], graph_spec: tp.Optional[_graph.Graph]):
|
343
463
|
|
344
464
|
super().__init__()
|
465
|
+
|
466
|
+
# Either a job config or a pre-built spec is required
|
467
|
+
if not job_config and not graph_spec:
|
468
|
+
raise _ex.EUnexpected()
|
469
|
+
|
345
470
|
self.job_key = job_key
|
346
471
|
self.job_config = job_config
|
347
|
-
self.result_spec = result_spec
|
348
472
|
self.graph_spec = graph_spec
|
349
473
|
self._sys_config = sys_config
|
350
474
|
self._models = models
|
351
475
|
self._storage = storage
|
352
|
-
self.
|
353
|
-
self.
|
476
|
+
self._log_provider = log_provider
|
477
|
+
self._resolver = _func.FunctionResolver(models, storage, log_provider)
|
478
|
+
self._log = log_provider.logger_for_object(self)
|
354
479
|
|
355
480
|
def on_start(self):
|
356
481
|
|
@@ -360,7 +485,7 @@ class JobProcessor(_actors.Actor):
|
|
360
485
|
if self.graph_spec is not None:
|
361
486
|
self.actors().send(self.actors().id, "build_graph_succeeded", self.graph_spec)
|
362
487
|
else:
|
363
|
-
self.actors().spawn(GraphBuilder(self._sys_config, self.job_config, self.
|
488
|
+
self.actors().spawn(GraphBuilder(self._sys_config, self.job_config, self._log_provider))
|
364
489
|
|
365
490
|
def on_stop(self):
|
366
491
|
|
@@ -398,14 +523,14 @@ class JobProcessor(_actors.Actor):
|
|
398
523
|
# Add all the nodes as pending nodes to start
|
399
524
|
graph.pending_nodes.update(graph.nodes.keys())
|
400
525
|
|
401
|
-
self.actors().spawn(FunctionResolver(self._resolver, graph))
|
526
|
+
self.actors().spawn(FunctionResolver(self._resolver, self._log_provider, graph))
|
402
527
|
if self.actors().sender != self.actors().id and self.actors().sender != self.actors().parent:
|
403
528
|
self.actors().stop(self.actors().sender)
|
404
529
|
|
405
530
|
@_actors.Message
|
406
531
|
def resolve_functions_succeeded(self, graph: _EngineContext):
|
407
532
|
|
408
|
-
self.actors().spawn(GraphProcessor(graph, self._resolver))
|
533
|
+
self.actors().spawn(GraphProcessor(graph, self._resolver, self._log_provider))
|
409
534
|
if self.actors().sender != self.actors().id and self.actors().sender != self.actors().parent:
|
410
535
|
self.actors().stop(self.actors().sender)
|
411
536
|
|
@@ -428,12 +553,12 @@ class GraphBuilder(_actors.Actor):
|
|
428
553
|
GraphBuilder is a worker (actor) to wrap the GraphBuilder logic from graph_builder.py
|
429
554
|
"""
|
430
555
|
|
431
|
-
def __init__(self, sys_config: _cfg.RuntimeConfig, job_config: _cfg.JobConfig,
|
556
|
+
def __init__(self, sys_config: _cfg.RuntimeConfig, job_config: _cfg.JobConfig, log_provider: _logging.LogProvider):
|
557
|
+
|
432
558
|
super().__init__()
|
433
559
|
self.sys_config = sys_config
|
434
560
|
self.job_config = job_config
|
435
|
-
self.
|
436
|
-
self._log = _util.logger_for_object(self)
|
561
|
+
self._log = log_provider.logger_for_object(self)
|
437
562
|
|
438
563
|
def on_start(self):
|
439
564
|
self.build_graph(self, self.job_config)
|
@@ -443,7 +568,7 @@ class GraphBuilder(_actors.Actor):
|
|
443
568
|
|
444
569
|
self._log.info("Building execution graph")
|
445
570
|
|
446
|
-
graph_builder = _graph.GraphBuilder(self.sys_config, job_config
|
571
|
+
graph_builder = _graph.GraphBuilder(self.sys_config, job_config)
|
447
572
|
graph_spec = graph_builder.build_job(job_config.job)
|
448
573
|
|
449
574
|
self.actors().reply("build_graph_succeeded", graph_spec)
|
@@ -452,14 +577,14 @@ class GraphBuilder(_actors.Actor):
|
|
452
577
|
class FunctionResolver(_actors.Actor):
|
453
578
|
|
454
579
|
"""
|
455
|
-
|
580
|
+
FunctionResolver is a worker (actors) to wrap the FunctionResolver logic in functions.py
|
456
581
|
"""
|
457
582
|
|
458
|
-
def __init__(self, resolver: _func.FunctionResolver, graph: _EngineContext):
|
583
|
+
def __init__(self, resolver: _func.FunctionResolver, log_provider: _logging.LogProvider, graph: _EngineContext):
|
459
584
|
super().__init__()
|
460
585
|
self.graph = graph
|
461
586
|
self._resolver = resolver
|
462
|
-
self._log =
|
587
|
+
self._log = log_provider.logger_for_object(self)
|
463
588
|
|
464
589
|
def on_start(self):
|
465
590
|
self.resolve_functions(self, self.graph)
|
@@ -488,13 +613,15 @@ class GraphProcessor(_actors.Actor):
|
|
488
613
|
Once all running nodes are stopped, an error is reported to the parent
|
489
614
|
"""
|
490
615
|
|
491
|
-
def __init__(self, graph: _EngineContext, resolver: _func.FunctionResolver):
|
616
|
+
def __init__(self, graph: _EngineContext, resolver: _func.FunctionResolver, log_provider: _logging.LogProvider):
|
492
617
|
super().__init__()
|
493
618
|
self.graph = graph
|
494
619
|
self.root_id_ = graph.root_id
|
495
620
|
self.processors: tp.Dict[NodeId, _actors.ActorId] = dict()
|
496
621
|
self._resolver = resolver
|
497
|
-
self._log =
|
622
|
+
self._log = log_provider.logger_for_object(self)
|
623
|
+
self._graph_logger = GraphLogger(log_provider)
|
624
|
+
self._node_logger = NodeLogger(log_provider)
|
498
625
|
|
499
626
|
def on_start(self):
|
500
627
|
|
@@ -515,7 +642,7 @@ class GraphProcessor(_actors.Actor):
|
|
515
642
|
for node_id, node in graph.nodes.items():
|
516
643
|
if node_id in graph.succeeded_nodes and not self._is_required_node(node, graph):
|
517
644
|
node = processed_graph.nodes.pop(node_id)
|
518
|
-
|
645
|
+
self._node_logger.log_node_evict(node)
|
519
646
|
del node
|
520
647
|
|
521
648
|
pending_nodes = cp.copy(graph.pending_nodes)
|
@@ -540,13 +667,13 @@ class GraphProcessor(_actors.Actor):
|
|
540
667
|
# There is scope for a much more sophisticated approach, with prioritized scheduling
|
541
668
|
|
542
669
|
if isinstance(node.node, _graph.ChildJobNode):
|
543
|
-
processor = ChildJobNodeProcessor(processed_graph, node)
|
670
|
+
processor = ChildJobNodeProcessor(processed_graph, node, self._node_logger)
|
544
671
|
elif isinstance(node.node, _graph.RunModelNode) or isinstance(node.node, _graph.ImportModelNode):
|
545
|
-
processor = ModelNodeProcessor(processed_graph, node)
|
672
|
+
processor = ModelNodeProcessor(processed_graph, node, self._node_logger)
|
546
673
|
elif isinstance(node.node, _graph.LoadDataNode) or isinstance(node.node, _graph.SaveDataNode):
|
547
|
-
processor = DataNodeProcessor(processed_graph, node)
|
674
|
+
processor = DataNodeProcessor(processed_graph, node, self._node_logger)
|
548
675
|
else:
|
549
|
-
processor = NodeProcessor(processed_graph, node)
|
676
|
+
processor = NodeProcessor(processed_graph, node, self._node_logger)
|
550
677
|
|
551
678
|
# New nodes can be launched with the updated graph
|
552
679
|
# Anything that was pruned is not needed by the new node
|
@@ -614,7 +741,7 @@ class GraphProcessor(_actors.Actor):
|
|
614
741
|
new_graph.pending_nodes = cp.copy(new_graph.pending_nodes)
|
615
742
|
|
616
743
|
for node_id, node in new_nodes.items():
|
617
|
-
|
744
|
+
self._graph_logger.log_node_add(node)
|
618
745
|
node_func = self._resolver.resolve_node(node)
|
619
746
|
new_node = _EngineNode(node, node_func)
|
620
747
|
new_graph.nodes[node_id] = new_node
|
@@ -624,7 +751,7 @@ class GraphProcessor(_actors.Actor):
|
|
624
751
|
engine_node = cp.copy(new_graph.nodes[node_id])
|
625
752
|
engine_node.dependencies = cp.copy(engine_node.dependencies)
|
626
753
|
for dep in deps:
|
627
|
-
|
754
|
+
self._graph_logger.log_dependency_add(node_id, dep.node_id)
|
628
755
|
engine_node.dependencies[dep.node_id] = dep.dependency_type
|
629
756
|
new_graph.nodes[node_id] = engine_node
|
630
757
|
|
@@ -784,11 +911,12 @@ class NodeProcessor(_actors.Actor):
|
|
784
911
|
|
785
912
|
__NONE_TYPE = type(None)
|
786
913
|
|
787
|
-
def __init__(self, graph: _EngineContext, node: _EngineNode):
|
914
|
+
def __init__(self, graph: _EngineContext, node: _EngineNode, node_logger: "NodeLogger"):
|
788
915
|
super().__init__()
|
789
916
|
self.graph = graph
|
790
917
|
self.node = node
|
791
918
|
self.node_id = node.node.id
|
919
|
+
self.node_logger = node_logger
|
792
920
|
|
793
921
|
|
794
922
|
def on_start(self):
|
@@ -823,7 +951,7 @@ class NodeProcessor(_actors.Actor):
|
|
823
951
|
|
824
952
|
try:
|
825
953
|
|
826
|
-
|
954
|
+
self.node_logger.log_node_start(self.node)
|
827
955
|
|
828
956
|
# Context contains only node states available when the context is set up
|
829
957
|
ctx = NodeContextImpl(self.graph.nodes)
|
@@ -836,13 +964,13 @@ class NodeProcessor(_actors.Actor):
|
|
836
964
|
|
837
965
|
self._check_result_type(result)
|
838
966
|
|
839
|
-
|
967
|
+
self.node_logger.log_node_succeeded(self.node)
|
840
968
|
|
841
969
|
self.actors().send_parent("node_succeeded", self.node_id, result)
|
842
970
|
|
843
971
|
except Exception as e:
|
844
972
|
|
845
|
-
|
973
|
+
self.node_logger.log_node_failed(self.node, e)
|
846
974
|
|
847
975
|
self.actors().send_parent("node_failed", self.node_id, e)
|
848
976
|
|
@@ -898,28 +1026,29 @@ class NodeProcessor(_actors.Actor):
|
|
898
1026
|
|
899
1027
|
class ModelNodeProcessor(NodeProcessor):
|
900
1028
|
|
901
|
-
def __init__(self, graph: _EngineContext, node: _EngineNode):
|
902
|
-
super().__init__(graph, node)
|
1029
|
+
def __init__(self, graph: _EngineContext, node: _EngineNode, node_logger: "NodeLogger"):
|
1030
|
+
super().__init__(graph, node, node_logger)
|
903
1031
|
|
904
1032
|
|
905
1033
|
class DataNodeProcessor(NodeProcessor):
|
906
1034
|
|
907
|
-
def __init__(self, graph: _EngineContext, node: _EngineNode):
|
908
|
-
super().__init__(graph, node)
|
1035
|
+
def __init__(self, graph: _EngineContext, node: _EngineNode, node_logger: "NodeLogger"):
|
1036
|
+
super().__init__(graph, node, node_logger)
|
909
1037
|
|
910
1038
|
|
911
1039
|
class ChildJobNodeProcessor(NodeProcessor):
|
912
1040
|
|
913
|
-
def __init__(self, graph: _EngineContext, node: _EngineNode):
|
914
|
-
super().__init__(graph, node)
|
1041
|
+
def __init__(self, graph: _EngineContext, node: _EngineNode, node_logger: "NodeLogger"):
|
1042
|
+
super().__init__(graph, node, node_logger)
|
915
1043
|
|
916
1044
|
@_actors.Message
|
917
1045
|
def evaluate_node(self):
|
918
1046
|
|
919
|
-
|
1047
|
+
self.node_logger.log_node_start(self.node)
|
920
1048
|
|
921
1049
|
job_id = self.node.node.job_id # noqa
|
922
1050
|
job_key = _util.object_key(job_id)
|
1051
|
+
parent_key = self.graph.job_key
|
923
1052
|
|
924
1053
|
node_id = self.actors().id
|
925
1054
|
|
@@ -934,21 +1063,21 @@ class ChildJobNodeProcessor(NodeProcessor):
|
|
934
1063
|
|
935
1064
|
graph_spec: _graph.Graph = self.node.node.graph # noqa
|
936
1065
|
|
937
|
-
self.actors().send(self.graph.engine_id, "submit_child_job", job_id, graph_spec, monitor_id)
|
1066
|
+
self.actors().send(self.graph.engine_id, "submit_child_job", parent_key, job_id, graph_spec, monitor_id)
|
938
1067
|
|
939
1068
|
@_actors.Message
|
940
1069
|
def child_job_succeeded(self, job_result: _cfg.JobResult):
|
941
1070
|
|
942
1071
|
self._check_result_type(job_result)
|
943
1072
|
|
944
|
-
|
1073
|
+
self.node_logger.log_node_succeeded(self.node)
|
945
1074
|
|
946
1075
|
self.actors().send_parent("node_succeeded", self.node_id, job_result)
|
947
1076
|
|
948
1077
|
@_actors.Message
|
949
1078
|
def child_job_failed(self, job_error: Exception):
|
950
1079
|
|
951
|
-
|
1080
|
+
self.node_logger.log_node_failed(self.node, job_error)
|
952
1081
|
|
953
1082
|
self.actors().send_parent("node_failed", self.node_id, job_error)
|
954
1083
|
|
@@ -959,23 +1088,22 @@ class GraphLogger:
|
|
959
1088
|
Log the activity of the GraphProcessor
|
960
1089
|
"""
|
961
1090
|
|
962
|
-
|
1091
|
+
def __init__(self, log_provider: _logging.LogProvider):
|
1092
|
+
self._log = log_provider.logger_for_class(GraphProcessor)
|
963
1093
|
|
964
|
-
|
965
|
-
def log_node_add(cls, node: _graph.Node):
|
1094
|
+
def log_node_add(self, node: _graph.Node):
|
966
1095
|
|
967
1096
|
node_name = node.id.name
|
968
1097
|
namespace = node.id.namespace
|
969
1098
|
|
970
|
-
|
1099
|
+
self._log.info(f"ADD {self._func_type(node)} [{node_name}] / {namespace}")
|
971
1100
|
|
972
|
-
|
973
|
-
def log_dependency_add(cls, node_id: NodeId, dep_id: NodeId):
|
1101
|
+
def log_dependency_add(self, node_id: NodeId, dep_id: NodeId):
|
974
1102
|
|
975
1103
|
if node_id.namespace == dep_id.namespace:
|
976
|
-
|
1104
|
+
self._log.info(f"ADD DEPENDENCY [{node_id.name}] -> [{dep_id.name}] / {node_id.namespace}")
|
977
1105
|
else:
|
978
|
-
|
1106
|
+
self._log.info(f"ADD DEPENDENCY [{node_id.name}] / {node_id.namespace} -> [{dep_id.name}] / {dep_id.namespace}")
|
979
1107
|
|
980
1108
|
@classmethod
|
981
1109
|
def _func_type(cls, node: _graph.Node):
|
@@ -992,7 +1120,8 @@ class NodeLogger:
|
|
992
1120
|
|
993
1121
|
# Separate out the logic for logging nodes, so the NodeProcessor itself stays a bit cleaner
|
994
1122
|
|
995
|
-
|
1123
|
+
def __init__(self, log_provider: _logging.LogProvider):
|
1124
|
+
self._log = log_provider.logger_for_class(NodeProcessor)
|
996
1125
|
|
997
1126
|
class LoggingType(enum.Enum):
|
998
1127
|
DEFAULT = 0
|
@@ -1001,81 +1130,75 @@ class NodeLogger:
|
|
1001
1130
|
SIMPLE_MAPPING = 3
|
1002
1131
|
MODEL = 4
|
1003
1132
|
|
1004
|
-
|
1005
|
-
def log_node_start(cls, node: _EngineNode):
|
1133
|
+
def log_node_start(self, node: _EngineNode):
|
1006
1134
|
|
1007
|
-
logging_type =
|
1135
|
+
logging_type = self._logging_type(node)
|
1008
1136
|
node_name = node.node.id.name
|
1009
1137
|
namespace = node.node.id.namespace
|
1010
1138
|
|
1011
|
-
if logging_type ==
|
1012
|
-
|
1139
|
+
if logging_type == self.LoggingType.STATIC_VALUE:
|
1140
|
+
self._log.info(f"SET {self._value_type(node)} [{node_name}] / {namespace}")
|
1013
1141
|
|
1014
|
-
elif logging_type in [
|
1015
|
-
|
1142
|
+
elif logging_type in [self.LoggingType.SIMPLE_MAPPING]:
|
1143
|
+
self._log.info(f"MAP {self._value_type(node)} [{self._mapping_source(node)}] -> [{node_name}] / {namespace}")
|
1016
1144
|
|
1017
1145
|
else:
|
1018
|
-
|
1146
|
+
self._log.info(f"START {self._func_type(node)} [{node_name}] / {namespace}")
|
1019
1147
|
|
1020
|
-
|
1021
|
-
def log_node_succeeded(cls, node: _EngineNode):
|
1148
|
+
def log_node_succeeded(self, node: _EngineNode):
|
1022
1149
|
|
1023
|
-
logging_type =
|
1150
|
+
logging_type = self._logging_type(node)
|
1024
1151
|
node_name = node.node.id.name
|
1025
1152
|
namespace = node.node.id.namespace
|
1026
1153
|
|
1027
|
-
if logging_type in [
|
1154
|
+
if logging_type in [self.LoggingType.STATIC_VALUE, self.LoggingType.SIMPLE_MAPPING]:
|
1028
1155
|
return
|
1029
1156
|
|
1030
|
-
if logging_type ==
|
1031
|
-
|
1157
|
+
if logging_type == self.LoggingType.PUSH_POP:
|
1158
|
+
self._log_push_pop_node_details(node.node) # noqa
|
1032
1159
|
|
1033
|
-
if logging_type ==
|
1034
|
-
|
1160
|
+
if logging_type == self.LoggingType.MODEL:
|
1161
|
+
self._log_model_node_details(node.node) # noqa
|
1035
1162
|
|
1036
|
-
|
1163
|
+
self._log.info(f"DONE {self._func_type(node)} [{node_name}] / {namespace}")
|
1037
1164
|
|
1038
|
-
|
1039
|
-
def log_node_failed(cls, node: _EngineNode, e: Exception):
|
1165
|
+
def log_node_failed(self, node: _EngineNode, e: Exception):
|
1040
1166
|
|
1041
1167
|
node_name = node.node.id.name
|
1042
1168
|
namespace = node.node.id.namespace
|
1043
1169
|
|
1044
|
-
|
1045
|
-
|
1170
|
+
self._log.error(f"FAILED {self._func_type(node)} [{node_name}] / {namespace}")
|
1171
|
+
self._log.exception(e)
|
1046
1172
|
|
1047
|
-
|
1048
|
-
def log_node_evict(cls, node: _EngineNode):
|
1173
|
+
def log_node_evict(self, node: _EngineNode):
|
1049
1174
|
|
1050
|
-
logging_type =
|
1175
|
+
logging_type = self._logging_type(node)
|
1051
1176
|
node_name = node.node.id.name
|
1052
1177
|
namespace = node.node.id.namespace
|
1053
1178
|
|
1054
|
-
if logging_type in [
|
1179
|
+
if logging_type in [self.LoggingType.STATIC_VALUE, self.LoggingType.SIMPLE_MAPPING]:
|
1055
1180
|
return
|
1056
1181
|
|
1057
|
-
|
1182
|
+
self._log.info(f"EVICT {self._func_type(node)} [{node_name}] / {namespace}")
|
1058
1183
|
|
1059
|
-
|
1060
|
-
def _log_push_pop_node_details(cls, node: tp.Union[_graph.ContextPushNode, _graph.ContextPopNode]):
|
1184
|
+
def _log_push_pop_node_details(self, node: tp.Union[_graph.ContextPushNode, _graph.ContextPopNode]):
|
1061
1185
|
|
1062
1186
|
push_or_pop = "PUSH" if isinstance(node, _graph.ContextPushNode) else "POP"
|
1063
1187
|
direction = "->" if isinstance(node, _graph.ContextPushNode) else "<-"
|
1064
1188
|
|
1065
1189
|
for inner_id, outer_id in node.mapping.items():
|
1066
|
-
item_type =
|
1190
|
+
item_type = self._type_str(inner_id.result_type)
|
1067
1191
|
msg = f"{push_or_pop} {item_type} [{outer_id.name}] {direction} [{inner_id.name}] / {node.id.namespace}"
|
1068
|
-
|
1192
|
+
self._log.info(msg)
|
1069
1193
|
|
1070
|
-
|
1071
|
-
def _log_model_node_details(cls, node: _graph.RunModelNode):
|
1194
|
+
def _log_model_node_details(self, node: _graph.RunModelNode):
|
1072
1195
|
|
1073
|
-
|
1196
|
+
self._type_str(_data.DataView)
|
1074
1197
|
|
1075
1198
|
for output in node.model_def.outputs:
|
1076
|
-
result_type =
|
1199
|
+
result_type = self._type_str(_data.DataView)
|
1077
1200
|
msg = f"RESULT {result_type} [{output}] / {node.bundle_namespace}"
|
1078
|
-
|
1201
|
+
self._log.info(msg)
|
1079
1202
|
|
1080
1203
|
@classmethod
|
1081
1204
|
def _logging_type(cls, node: _EngineNode) -> LoggingType:
|