tracdap-runtime 0.7.1__py3-none-any.whl → 0.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tracdap/rt/_impl/core/__init__.py +14 -0
- tracdap/rt/_impl/{config_parser.py → core/config_parser.py} +36 -19
- tracdap/rt/_impl/{data.py → core/data.py} +136 -32
- tracdap/rt/_impl/core/logging.py +195 -0
- tracdap/rt/_impl/{models.py → core/models.py} +15 -12
- tracdap/rt/_impl/{repos.py → core/repos.py} +12 -3
- tracdap/rt/_impl/{schemas.py → core/schemas.py} +5 -5
- tracdap/rt/_impl/{shim.py → core/shim.py} +5 -4
- tracdap/rt/_impl/{storage.py → core/storage.py} +21 -10
- tracdap/rt/_impl/core/struct.py +547 -0
- tracdap/rt/_impl/{util.py → core/util.py} +1 -111
- tracdap/rt/_impl/{validation.py → core/validation.py} +99 -31
- tracdap/rt/_impl/exec/__init__.py +14 -0
- tracdap/rt/{_exec → _impl/exec}/actors.py +12 -14
- tracdap/rt/{_exec → _impl/exec}/context.py +228 -82
- tracdap/rt/{_exec → _impl/exec}/dev_mode.py +163 -81
- tracdap/rt/{_exec → _impl/exec}/engine.py +230 -105
- tracdap/rt/{_exec → _impl/exec}/functions.py +191 -100
- tracdap/rt/{_exec → _impl/exec}/graph.py +24 -36
- tracdap/rt/{_exec → _impl/exec}/graph_builder.py +252 -115
- tracdap/rt/_impl/grpc/codec.py +1 -1
- tracdap/rt/{_exec → _impl/grpc}/server.py +7 -6
- tracdap/rt/_impl/grpc/tracdap/api/internal/runtime_pb2.py +3 -3
- tracdap/rt/_impl/grpc/tracdap/api/internal/runtime_pb2_grpc.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/common_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/config_pb2.py +40 -0
- tracdap/rt/_impl/grpc/tracdap/metadata/config_pb2.pyi +62 -0
- tracdap/rt/_impl/grpc/tracdap/metadata/custom_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/data_pb2.py +32 -20
- tracdap/rt/_impl/grpc/tracdap/metadata/data_pb2.pyi +48 -2
- tracdap/rt/_impl/grpc/tracdap/metadata/file_pb2.py +4 -2
- tracdap/rt/_impl/grpc/tracdap/metadata/file_pb2.pyi +8 -0
- tracdap/rt/_impl/grpc/tracdap/metadata/flow_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/job_pb2.py +65 -63
- tracdap/rt/_impl/grpc/tracdap/metadata/job_pb2.pyi +16 -2
- tracdap/rt/_impl/grpc/tracdap/metadata/model_pb2.py +28 -26
- tracdap/rt/_impl/grpc/tracdap/metadata/model_pb2.pyi +14 -4
- tracdap/rt/_impl/grpc/tracdap/metadata/object_id_pb2.py +4 -4
- tracdap/rt/_impl/grpc/tracdap/metadata/object_id_pb2.pyi +6 -0
- tracdap/rt/_impl/grpc/tracdap/metadata/object_pb2.py +9 -7
- tracdap/rt/_impl/grpc/tracdap/metadata/object_pb2.pyi +12 -4
- tracdap/rt/_impl/grpc/tracdap/metadata/resource_pb2.py +18 -5
- tracdap/rt/_impl/grpc/tracdap/metadata/resource_pb2.pyi +42 -2
- tracdap/rt/_impl/grpc/tracdap/metadata/search_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/{stoarge_pb2.py → storage_pb2.py} +4 -4
- tracdap/rt/_impl/grpc/tracdap/metadata/tag_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/tag_update_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/type_pb2.py +1 -1
- tracdap/rt/{_exec → _impl}/runtime.py +32 -18
- tracdap/rt/_impl/static_api.py +65 -37
- tracdap/rt/_plugins/format_csv.py +1 -1
- tracdap/rt/_plugins/repo_git.py +56 -11
- tracdap/rt/_plugins/storage_sql.py +1 -1
- tracdap/rt/_version.py +1 -1
- tracdap/rt/api/__init__.py +5 -24
- tracdap/rt/api/constants.py +57 -0
- tracdap/rt/api/experimental.py +32 -0
- tracdap/rt/api/hook.py +26 -7
- tracdap/rt/api/model_api.py +16 -0
- tracdap/rt/api/static_api.py +265 -127
- tracdap/rt/config/__init__.py +11 -11
- tracdap/rt/config/common.py +2 -26
- tracdap/rt/config/dynamic.py +28 -0
- tracdap/rt/config/platform.py +17 -31
- tracdap/rt/config/runtime.py +2 -0
- tracdap/rt/ext/embed.py +2 -2
- tracdap/rt/ext/plugins.py +3 -3
- tracdap/rt/launch/launch.py +12 -14
- tracdap/rt/metadata/__init__.py +28 -18
- tracdap/rt/metadata/config.py +95 -0
- tracdap/rt/metadata/data.py +40 -0
- tracdap/rt/metadata/file.py +10 -0
- tracdap/rt/metadata/job.py +16 -0
- tracdap/rt/metadata/model.py +12 -2
- tracdap/rt/metadata/object.py +9 -1
- tracdap/rt/metadata/object_id.py +6 -0
- tracdap/rt/metadata/resource.py +41 -1
- {tracdap_runtime-0.7.1.dist-info → tracdap_runtime-0.8.0.dist-info}/METADATA +23 -17
- tracdap_runtime-0.8.0.dist-info/RECORD +129 -0
- {tracdap_runtime-0.7.1.dist-info → tracdap_runtime-0.8.0.dist-info}/WHEEL +1 -1
- tracdap/rt/_exec/__init__.py +0 -0
- tracdap_runtime-0.7.1.dist-info/RECORD +0 -121
- /tracdap/rt/_impl/{guard_rails.py → core/guard_rails.py} +0 -0
- /tracdap/rt/_impl/{type_system.py → core/type_system.py} +0 -0
- /tracdap/rt/_impl/grpc/tracdap/metadata/{stoarge_pb2.pyi → storage_pb2.pyi} +0 -0
- /tracdap/rt/metadata/{stoarge.py → storage.py} +0 -0
- {tracdap_runtime-0.7.1.dist-info → tracdap_runtime-0.8.0.dist-info/licenses}/LICENSE +0 -0
- {tracdap_runtime-0.7.1.dist-info → tracdap_runtime-0.8.0.dist-info}/top_level.txt +0 -0
@@ -16,18 +16,22 @@
|
|
16
16
|
import copy as cp
|
17
17
|
import dataclasses as dc
|
18
18
|
import enum
|
19
|
+
import io
|
20
|
+
import pathlib
|
19
21
|
import typing as tp
|
20
22
|
|
21
23
|
import tracdap.rt.metadata as _meta
|
22
24
|
import tracdap.rt.config as _cfg
|
23
25
|
import tracdap.rt.exceptions as _ex
|
24
|
-
import tracdap.rt.
|
25
|
-
import tracdap.rt.
|
26
|
-
import tracdap.rt.
|
27
|
-
import tracdap.rt._impl.
|
28
|
-
import tracdap.rt._impl.data as _data
|
29
|
-
import tracdap.rt._impl.
|
30
|
-
import tracdap.rt._impl.
|
26
|
+
import tracdap.rt._impl.exec.actors as _actors
|
27
|
+
import tracdap.rt._impl.exec.graph_builder as _graph
|
28
|
+
import tracdap.rt._impl.exec.functions as _func
|
29
|
+
import tracdap.rt._impl.core.config_parser as _cfg_p
|
30
|
+
import tracdap.rt._impl.core.data as _data
|
31
|
+
import tracdap.rt._impl.core.logging as _logging
|
32
|
+
import tracdap.rt._impl.core.models as _models
|
33
|
+
import tracdap.rt._impl.core.storage as _storage
|
34
|
+
import tracdap.rt._impl.core.util as _util
|
31
35
|
|
32
36
|
from .graph import NodeId
|
33
37
|
|
@@ -79,18 +83,44 @@ class _EngineContext:
|
|
79
83
|
pending_nodes, active_nodes, succeeded_nodes, failed_nodes)
|
80
84
|
|
81
85
|
|
86
|
+
@dc.dataclass
|
87
|
+
class _JobResultSpec:
|
88
|
+
|
89
|
+
save_result: bool = False
|
90
|
+
result_dir: tp.Union[str, pathlib.Path] = None
|
91
|
+
result_format: str = None
|
92
|
+
|
93
|
+
|
82
94
|
@dc.dataclass
|
83
95
|
class _JobState:
|
84
96
|
|
85
97
|
job_id: _meta.TagHeader
|
86
|
-
|
98
|
+
log_init: dc.InitVar[tp.Optional[_logging.LogProvider]] = None
|
87
99
|
|
100
|
+
actor_id: _actors.ActorId = None
|
88
101
|
monitors: tp.List[_actors.ActorId] = dc.field(default_factory=list)
|
89
102
|
|
90
103
|
job_config: _cfg.JobConfig = None
|
91
104
|
job_result: _cfg.JobResult = None
|
92
105
|
job_error: Exception = None
|
93
106
|
|
107
|
+
parent_key: str = None
|
108
|
+
result_spec: _JobResultSpec = None
|
109
|
+
|
110
|
+
log_buffer: io.BytesIO = None
|
111
|
+
log_provider: _logging.LogProvider = None
|
112
|
+
log: _logging.Logger = None
|
113
|
+
|
114
|
+
def __post_init__(self, log_init):
|
115
|
+
|
116
|
+
if isinstance(self.log, _logging.LogProvider):
|
117
|
+
self.log_provider = log_init
|
118
|
+
else:
|
119
|
+
self.log_buffer = io.BytesIO()
|
120
|
+
self.log_provider = _logging.job_log_provider(self.log_buffer)
|
121
|
+
|
122
|
+
self.log = self.log_provider.logger_for_class(TracEngine)
|
123
|
+
|
94
124
|
|
95
125
|
class TracEngine(_actors.Actor):
|
96
126
|
|
@@ -107,7 +137,7 @@ class TracEngine(_actors.Actor):
|
|
107
137
|
|
108
138
|
super().__init__()
|
109
139
|
|
110
|
-
self._log =
|
140
|
+
self._log = _logging.logger_for_object(self)
|
111
141
|
|
112
142
|
self._sys_config = sys_config
|
113
143
|
self._models = models
|
@@ -164,13 +194,17 @@ class TracEngine(_actors.Actor):
|
|
164
194
|
job_result_format: str):
|
165
195
|
|
166
196
|
job_key = _util.object_key(job_config.jobId)
|
197
|
+
job_state = _JobState(job_config.jobId)
|
198
|
+
|
199
|
+
job_state.log.info(f"Job submitted: [{job_key}]")
|
167
200
|
|
168
201
|
result_needed = bool(job_result_dir)
|
169
|
-
result_spec =
|
202
|
+
result_spec = _JobResultSpec(result_needed, job_result_dir, job_result_format)
|
170
203
|
|
171
|
-
|
204
|
+
job_processor = JobProcessor(
|
205
|
+
self._sys_config, self._models, self._storage, job_state.log_provider,
|
206
|
+
job_key, job_config, graph_spec=None)
|
172
207
|
|
173
|
-
job_processor = JobProcessor(self._models, self._storage, job_key, job_config, result_spec, graph_spec=None)
|
174
208
|
job_actor_id = self.actors().spawn(job_processor)
|
175
209
|
|
176
210
|
job_monitor_success = lambda ctx, key, result: self._notify_callback(key, result, None)
|
@@ -178,24 +212,36 @@ class TracEngine(_actors.Actor):
|
|
178
212
|
job_monitor = JobMonitor(job_key, job_monitor_success, job_monitor_failure)
|
179
213
|
job_monitor_id = self.actors().spawn(job_monitor)
|
180
214
|
|
181
|
-
job_state = _JobState(job_config.jobId)
|
182
215
|
job_state.actor_id = job_actor_id
|
183
216
|
job_state.monitors.append(job_monitor_id)
|
184
217
|
job_state.job_config = job_config
|
218
|
+
job_state.result_spec = result_spec
|
185
219
|
|
186
220
|
self._jobs[job_key] = job_state
|
187
221
|
|
188
222
|
@_actors.Message
|
189
|
-
def submit_child_job(self, child_id: _meta.TagHeader, child_graph: _graph.Graph, monitor_id: _actors.ActorId):
|
223
|
+
def submit_child_job(self, parent_key: str, child_id: _meta.TagHeader, child_graph: _graph.Graph, monitor_id: _actors.ActorId):
|
224
|
+
|
225
|
+
parent_state = self._jobs.get(parent_key)
|
226
|
+
|
227
|
+
# Ignore duplicate messages from the job processor (can happen in unusual error cases)
|
228
|
+
if parent_state is None:
|
229
|
+
self._log.warning(f"Ignoring [submit_child_job] message, parent [{parent_key}] has already completed")
|
230
|
+
return
|
190
231
|
|
191
232
|
child_key = _util.object_key(child_id)
|
192
233
|
|
193
|
-
child_processor = JobProcessor(
|
234
|
+
child_processor = JobProcessor(
|
235
|
+
self._sys_config, self._models, self._storage, parent_state.log_provider,
|
236
|
+
child_key, None, graph_spec=child_graph)
|
237
|
+
|
194
238
|
child_actor_id = self.actors().spawn(child_processor)
|
195
239
|
|
196
|
-
child_state = _JobState(child_id)
|
240
|
+
child_state = _JobState(child_id, parent_state.log_provider)
|
197
241
|
child_state.actor_id = child_actor_id
|
198
242
|
child_state.monitors.append(monitor_id)
|
243
|
+
child_state.parent_key = parent_key
|
244
|
+
child_state.result_spec = _JobResultSpec(False) # Do not output separate results for child jobs
|
199
245
|
|
200
246
|
self._jobs[child_key] = child_state
|
201
247
|
|
@@ -219,9 +265,9 @@ class TracEngine(_actors.Actor):
|
|
219
265
|
self._log.warning(f"Ignoring [job_succeeded] message, job [{job_key}] has already completed")
|
220
266
|
return
|
221
267
|
|
222
|
-
self._log.info(f"Recording job as successful: {job_key}")
|
223
|
-
|
224
268
|
job_state = self._jobs[job_key]
|
269
|
+
job_state.log.info(f"Recording job as successful: {job_key}")
|
270
|
+
|
225
271
|
job_state.job_result = job_result
|
226
272
|
|
227
273
|
for monitor_id in job_state.monitors:
|
@@ -237,11 +283,30 @@ class TracEngine(_actors.Actor):
|
|
237
283
|
self._log.warning(f"Ignoring [job_failed] message, job [{job_key}] has already completed")
|
238
284
|
return
|
239
285
|
|
240
|
-
self._log.error(f"Recording job as failed: {job_key}")
|
241
|
-
|
242
286
|
job_state = self._jobs[job_key]
|
287
|
+
job_state.log.error(f"Recording job as failed: {job_key}")
|
288
|
+
|
243
289
|
job_state.job_error = error
|
244
290
|
|
291
|
+
# Create a failed result so there is something to report
|
292
|
+
result_id = job_state.job_config.resultMapping.get("trac_job_result")
|
293
|
+
|
294
|
+
if result_id is not None:
|
295
|
+
|
296
|
+
job_state.job_result = _cfg.JobResult(
|
297
|
+
jobId=job_state.job_id,
|
298
|
+
statusCode=_meta.JobStatusCode.FAILED,
|
299
|
+
statusMessage=str(error))
|
300
|
+
|
301
|
+
result_def = _meta.ResultDefinition()
|
302
|
+
result_def.jobId = _util.selector_for(job_state.job_id)
|
303
|
+
result_def.statusCode = _meta.JobStatusCode.FAILED
|
304
|
+
|
305
|
+
result_key = _util.object_key(result_id)
|
306
|
+
result_obj = _meta.ObjectDefinition(objectType=_meta.ObjectType.RESULT, result=result_def)
|
307
|
+
|
308
|
+
job_state.job_result.results[result_key] = result_obj
|
309
|
+
|
245
310
|
for monitor_id in job_state.monitors:
|
246
311
|
self.actors().send(monitor_id, "job_failed", error)
|
247
312
|
|
@@ -256,6 +321,14 @@ class TracEngine(_actors.Actor):
|
|
256
321
|
|
257
322
|
job_state = self._jobs.get(job_key)
|
258
323
|
|
324
|
+
# Record output metadata if required (not needed for local runs or when using API server)
|
325
|
+
if job_state.parent_key is None and job_state.result_spec.save_result:
|
326
|
+
|
327
|
+
if "trac_job_log_file" in job_state.job_config.resultMapping:
|
328
|
+
self._save_job_log_file(job_key, job_state)
|
329
|
+
|
330
|
+
self._save_job_result(job_key, job_state)
|
331
|
+
|
259
332
|
# Stop any monitors that were created directly by the engine
|
260
333
|
# (Other actors are responsible for stopping their own monitors)
|
261
334
|
while job_state.monitors:
|
@@ -265,9 +338,57 @@ class TracEngine(_actors.Actor):
|
|
265
338
|
self.actors().stop(monitor_id)
|
266
339
|
|
267
340
|
if job_state.actor_id is not None:
|
268
|
-
self.actors().stop(job_state.actor_id
|
341
|
+
self.actors().stop(job_state.actor_id)
|
269
342
|
job_state.actor_id = None
|
270
343
|
|
344
|
+
def _save_job_log_file(self, job_key: str, job_state: _JobState):
|
345
|
+
|
346
|
+
self._log.info(f"Saving job log file for [{job_key}]")
|
347
|
+
|
348
|
+
# Saving log files could go into a separate actor, perhaps a job monitor along with _save_job_result()
|
349
|
+
|
350
|
+
file_id = job_state.job_config.resultMapping["trac_job_log_file"]
|
351
|
+
storage_id = job_state.job_config.resultMapping["trac_job_log_file:STORAGE"]
|
352
|
+
|
353
|
+
file_type = _meta.FileType("TXT", "text/plain")
|
354
|
+
file_def, storage_def = _graph.GraphBuilder.build_output_file_and_storage(
|
355
|
+
"trac_job_log_file", file_type,
|
356
|
+
self._sys_config, job_state.job_config)
|
357
|
+
|
358
|
+
storage_item = storage_def.dataItems[file_def.dataItem].incarnations[0].copies[0]
|
359
|
+
storage = self._storage.get_file_storage(storage_item.storageKey)
|
360
|
+
|
361
|
+
with storage.write_byte_stream(storage_item.storagePath) as stream:
|
362
|
+
stream.write(job_state.log_buffer.getbuffer())
|
363
|
+
file_def.size = stream.tell()
|
364
|
+
|
365
|
+
result_id = job_state.job_config.resultMapping["trac_job_result"]
|
366
|
+
result_def = job_state.job_result.results[_util.object_key(result_id)].result
|
367
|
+
result_def.logFileId = _util.selector_for(file_id)
|
368
|
+
|
369
|
+
file_obj = _meta.ObjectDefinition(objectType=_meta.ObjectType.FILE, file=file_def)
|
370
|
+
storage_obj = _meta.ObjectDefinition(objectType=_meta.ObjectType.STORAGE, storage=storage_def)
|
371
|
+
|
372
|
+
job_state.job_result.results[_util.object_key(file_id)] = file_obj
|
373
|
+
job_state.job_result.results[_util.object_key(storage_id)] = storage_obj
|
374
|
+
|
375
|
+
def _save_job_result(self, job_key: str, job_state: _JobState):
|
376
|
+
|
377
|
+
self._log.info(f"Saving job result for [{job_key}]")
|
378
|
+
|
379
|
+
# It might be better abstract reporting of results, job status etc., perhaps with a job monitor
|
380
|
+
|
381
|
+
if job_state.result_spec.save_result:
|
382
|
+
|
383
|
+
result_format = job_state.result_spec.result_format
|
384
|
+
result_dir = job_state.result_spec.result_dir
|
385
|
+
result_file = f"job_result_{job_key}.{result_format}"
|
386
|
+
result_path = pathlib.Path(result_dir).joinpath(result_file)
|
387
|
+
|
388
|
+
with open(result_path, "xt") as result_stream:
|
389
|
+
result_content = _cfg_p.ConfigQuoter.quote(job_state.job_result, result_format)
|
390
|
+
result_stream.write(result_content)
|
391
|
+
|
271
392
|
def _get_job_info(self, job_key: str, details: bool = False) -> tp.Optional[_cfg.JobResult]:
|
272
393
|
|
273
394
|
job_state = self._jobs.get(job_key)
|
@@ -336,19 +457,25 @@ class JobProcessor(_actors.Actor):
|
|
336
457
|
"""
|
337
458
|
|
338
459
|
def __init__(
|
339
|
-
self,
|
340
|
-
|
341
|
-
graph_spec: tp.Optional[_graph.Graph]):
|
460
|
+
self, sys_config: _cfg.RuntimeConfig,
|
461
|
+
models: _models.ModelLoader, storage: _storage.StorageManager, log_provider: _logging.LogProvider,
|
462
|
+
job_key: str, job_config: tp.Optional[_cfg.JobConfig], graph_spec: tp.Optional[_graph.Graph]):
|
342
463
|
|
343
464
|
super().__init__()
|
465
|
+
|
466
|
+
# Either a job config or a pre-built spec is required
|
467
|
+
if not job_config and not graph_spec:
|
468
|
+
raise _ex.EUnexpected()
|
469
|
+
|
344
470
|
self.job_key = job_key
|
345
471
|
self.job_config = job_config
|
346
|
-
self.result_spec = result_spec
|
347
472
|
self.graph_spec = graph_spec
|
473
|
+
self._sys_config = sys_config
|
348
474
|
self._models = models
|
349
475
|
self._storage = storage
|
350
|
-
self.
|
351
|
-
self.
|
476
|
+
self._log_provider = log_provider
|
477
|
+
self._resolver = _func.FunctionResolver(models, storage, log_provider)
|
478
|
+
self._log = log_provider.logger_for_object(self)
|
352
479
|
|
353
480
|
def on_start(self):
|
354
481
|
|
@@ -358,7 +485,7 @@ class JobProcessor(_actors.Actor):
|
|
358
485
|
if self.graph_spec is not None:
|
359
486
|
self.actors().send(self.actors().id, "build_graph_succeeded", self.graph_spec)
|
360
487
|
else:
|
361
|
-
self.actors().spawn(GraphBuilder(self.job_config, self.
|
488
|
+
self.actors().spawn(GraphBuilder(self._sys_config, self.job_config, self._log_provider))
|
362
489
|
|
363
490
|
def on_stop(self):
|
364
491
|
|
@@ -396,14 +523,14 @@ class JobProcessor(_actors.Actor):
|
|
396
523
|
# Add all the nodes as pending nodes to start
|
397
524
|
graph.pending_nodes.update(graph.nodes.keys())
|
398
525
|
|
399
|
-
self.actors().spawn(FunctionResolver(self._resolver, graph))
|
526
|
+
self.actors().spawn(FunctionResolver(self._resolver, self._log_provider, graph))
|
400
527
|
if self.actors().sender != self.actors().id and self.actors().sender != self.actors().parent:
|
401
528
|
self.actors().stop(self.actors().sender)
|
402
529
|
|
403
530
|
@_actors.Message
|
404
531
|
def resolve_functions_succeeded(self, graph: _EngineContext):
|
405
532
|
|
406
|
-
self.actors().spawn(GraphProcessor(graph, self._resolver))
|
533
|
+
self.actors().spawn(GraphProcessor(graph, self._resolver, self._log_provider))
|
407
534
|
if self.actors().sender != self.actors().id and self.actors().sender != self.actors().parent:
|
408
535
|
self.actors().stop(self.actors().sender)
|
409
536
|
|
@@ -426,11 +553,12 @@ class GraphBuilder(_actors.Actor):
|
|
426
553
|
GraphBuilder is a worker (actor) to wrap the GraphBuilder logic from graph_builder.py
|
427
554
|
"""
|
428
555
|
|
429
|
-
def __init__(self, job_config: _cfg.JobConfig,
|
556
|
+
def __init__(self, sys_config: _cfg.RuntimeConfig, job_config: _cfg.JobConfig, log_provider: _logging.LogProvider):
|
557
|
+
|
430
558
|
super().__init__()
|
559
|
+
self.sys_config = sys_config
|
431
560
|
self.job_config = job_config
|
432
|
-
self.
|
433
|
-
self._log = _util.logger_for_object(self)
|
561
|
+
self._log = log_provider.logger_for_object(self)
|
434
562
|
|
435
563
|
def on_start(self):
|
436
564
|
self.build_graph(self, self.job_config)
|
@@ -440,8 +568,7 @@ class GraphBuilder(_actors.Actor):
|
|
440
568
|
|
441
569
|
self._log.info("Building execution graph")
|
442
570
|
|
443
|
-
|
444
|
-
graph_builder = _graph.GraphBuilder(job_config, self.result_spec)
|
571
|
+
graph_builder = _graph.GraphBuilder(self.sys_config, job_config)
|
445
572
|
graph_spec = graph_builder.build_job(job_config.job)
|
446
573
|
|
447
574
|
self.actors().reply("build_graph_succeeded", graph_spec)
|
@@ -450,14 +577,14 @@ class GraphBuilder(_actors.Actor):
|
|
450
577
|
class FunctionResolver(_actors.Actor):
|
451
578
|
|
452
579
|
"""
|
453
|
-
|
580
|
+
FunctionResolver is a worker (actors) to wrap the FunctionResolver logic in functions.py
|
454
581
|
"""
|
455
582
|
|
456
|
-
def __init__(self, resolver: _func.FunctionResolver, graph: _EngineContext):
|
583
|
+
def __init__(self, resolver: _func.FunctionResolver, log_provider: _logging.LogProvider, graph: _EngineContext):
|
457
584
|
super().__init__()
|
458
585
|
self.graph = graph
|
459
586
|
self._resolver = resolver
|
460
|
-
self._log =
|
587
|
+
self._log = log_provider.logger_for_object(self)
|
461
588
|
|
462
589
|
def on_start(self):
|
463
590
|
self.resolve_functions(self, self.graph)
|
@@ -486,13 +613,15 @@ class GraphProcessor(_actors.Actor):
|
|
486
613
|
Once all running nodes are stopped, an error is reported to the parent
|
487
614
|
"""
|
488
615
|
|
489
|
-
def __init__(self, graph: _EngineContext, resolver: _func.FunctionResolver):
|
616
|
+
def __init__(self, graph: _EngineContext, resolver: _func.FunctionResolver, log_provider: _logging.LogProvider):
|
490
617
|
super().__init__()
|
491
618
|
self.graph = graph
|
492
619
|
self.root_id_ = graph.root_id
|
493
620
|
self.processors: tp.Dict[NodeId, _actors.ActorId] = dict()
|
494
621
|
self._resolver = resolver
|
495
|
-
self._log =
|
622
|
+
self._log = log_provider.logger_for_object(self)
|
623
|
+
self._graph_logger = GraphLogger(log_provider)
|
624
|
+
self._node_logger = NodeLogger(log_provider)
|
496
625
|
|
497
626
|
def on_start(self):
|
498
627
|
|
@@ -513,7 +642,7 @@ class GraphProcessor(_actors.Actor):
|
|
513
642
|
for node_id, node in graph.nodes.items():
|
514
643
|
if node_id in graph.succeeded_nodes and not self._is_required_node(node, graph):
|
515
644
|
node = processed_graph.nodes.pop(node_id)
|
516
|
-
|
645
|
+
self._node_logger.log_node_evict(node)
|
517
646
|
del node
|
518
647
|
|
519
648
|
pending_nodes = cp.copy(graph.pending_nodes)
|
@@ -538,13 +667,13 @@ class GraphProcessor(_actors.Actor):
|
|
538
667
|
# There is scope for a much more sophisticated approach, with prioritized scheduling
|
539
668
|
|
540
669
|
if isinstance(node.node, _graph.ChildJobNode):
|
541
|
-
processor = ChildJobNodeProcessor(processed_graph, node)
|
670
|
+
processor = ChildJobNodeProcessor(processed_graph, node, self._node_logger)
|
542
671
|
elif isinstance(node.node, _graph.RunModelNode) or isinstance(node.node, _graph.ImportModelNode):
|
543
|
-
processor = ModelNodeProcessor(processed_graph, node)
|
672
|
+
processor = ModelNodeProcessor(processed_graph, node, self._node_logger)
|
544
673
|
elif isinstance(node.node, _graph.LoadDataNode) or isinstance(node.node, _graph.SaveDataNode):
|
545
|
-
processor = DataNodeProcessor(processed_graph, node)
|
674
|
+
processor = DataNodeProcessor(processed_graph, node, self._node_logger)
|
546
675
|
else:
|
547
|
-
processor = NodeProcessor(processed_graph, node)
|
676
|
+
processor = NodeProcessor(processed_graph, node, self._node_logger)
|
548
677
|
|
549
678
|
# New nodes can be launched with the updated graph
|
550
679
|
# Anything that was pruned is not needed by the new node
|
@@ -612,7 +741,7 @@ class GraphProcessor(_actors.Actor):
|
|
612
741
|
new_graph.pending_nodes = cp.copy(new_graph.pending_nodes)
|
613
742
|
|
614
743
|
for node_id, node in new_nodes.items():
|
615
|
-
|
744
|
+
self._graph_logger.log_node_add(node)
|
616
745
|
node_func = self._resolver.resolve_node(node)
|
617
746
|
new_node = _EngineNode(node, node_func)
|
618
747
|
new_graph.nodes[node_id] = new_node
|
@@ -622,7 +751,7 @@ class GraphProcessor(_actors.Actor):
|
|
622
751
|
engine_node = cp.copy(new_graph.nodes[node_id])
|
623
752
|
engine_node.dependencies = cp.copy(engine_node.dependencies)
|
624
753
|
for dep in deps:
|
625
|
-
|
754
|
+
self._graph_logger.log_dependency_add(node_id, dep.node_id)
|
626
755
|
engine_node.dependencies[dep.node_id] = dep.dependency_type
|
627
756
|
new_graph.nodes[node_id] = engine_node
|
628
757
|
|
@@ -782,11 +911,12 @@ class NodeProcessor(_actors.Actor):
|
|
782
911
|
|
783
912
|
__NONE_TYPE = type(None)
|
784
913
|
|
785
|
-
def __init__(self, graph: _EngineContext, node: _EngineNode):
|
914
|
+
def __init__(self, graph: _EngineContext, node: _EngineNode, node_logger: "NodeLogger"):
|
786
915
|
super().__init__()
|
787
916
|
self.graph = graph
|
788
917
|
self.node = node
|
789
918
|
self.node_id = node.node.id
|
919
|
+
self.node_logger = node_logger
|
790
920
|
|
791
921
|
|
792
922
|
def on_start(self):
|
@@ -821,7 +951,7 @@ class NodeProcessor(_actors.Actor):
|
|
821
951
|
|
822
952
|
try:
|
823
953
|
|
824
|
-
|
954
|
+
self.node_logger.log_node_start(self.node)
|
825
955
|
|
826
956
|
# Context contains only node states available when the context is set up
|
827
957
|
ctx = NodeContextImpl(self.graph.nodes)
|
@@ -834,13 +964,13 @@ class NodeProcessor(_actors.Actor):
|
|
834
964
|
|
835
965
|
self._check_result_type(result)
|
836
966
|
|
837
|
-
|
967
|
+
self.node_logger.log_node_succeeded(self.node)
|
838
968
|
|
839
969
|
self.actors().send_parent("node_succeeded", self.node_id, result)
|
840
970
|
|
841
971
|
except Exception as e:
|
842
972
|
|
843
|
-
|
973
|
+
self.node_logger.log_node_failed(self.node, e)
|
844
974
|
|
845
975
|
self.actors().send_parent("node_failed", self.node_id, e)
|
846
976
|
|
@@ -896,28 +1026,29 @@ class NodeProcessor(_actors.Actor):
|
|
896
1026
|
|
897
1027
|
class ModelNodeProcessor(NodeProcessor):
|
898
1028
|
|
899
|
-
def __init__(self, graph: _EngineContext, node: _EngineNode):
|
900
|
-
super().__init__(graph, node)
|
1029
|
+
def __init__(self, graph: _EngineContext, node: _EngineNode, node_logger: "NodeLogger"):
|
1030
|
+
super().__init__(graph, node, node_logger)
|
901
1031
|
|
902
1032
|
|
903
1033
|
class DataNodeProcessor(NodeProcessor):
|
904
1034
|
|
905
|
-
def __init__(self, graph: _EngineContext, node: _EngineNode):
|
906
|
-
super().__init__(graph, node)
|
1035
|
+
def __init__(self, graph: _EngineContext, node: _EngineNode, node_logger: "NodeLogger"):
|
1036
|
+
super().__init__(graph, node, node_logger)
|
907
1037
|
|
908
1038
|
|
909
1039
|
class ChildJobNodeProcessor(NodeProcessor):
|
910
1040
|
|
911
|
-
def __init__(self, graph: _EngineContext, node: _EngineNode):
|
912
|
-
super().__init__(graph, node)
|
1041
|
+
def __init__(self, graph: _EngineContext, node: _EngineNode, node_logger: "NodeLogger"):
|
1042
|
+
super().__init__(graph, node, node_logger)
|
913
1043
|
|
914
1044
|
@_actors.Message
|
915
1045
|
def evaluate_node(self):
|
916
1046
|
|
917
|
-
|
1047
|
+
self.node_logger.log_node_start(self.node)
|
918
1048
|
|
919
1049
|
job_id = self.node.node.job_id # noqa
|
920
1050
|
job_key = _util.object_key(job_id)
|
1051
|
+
parent_key = self.graph.job_key
|
921
1052
|
|
922
1053
|
node_id = self.actors().id
|
923
1054
|
|
@@ -932,21 +1063,21 @@ class ChildJobNodeProcessor(NodeProcessor):
|
|
932
1063
|
|
933
1064
|
graph_spec: _graph.Graph = self.node.node.graph # noqa
|
934
1065
|
|
935
|
-
self.actors().send(self.graph.engine_id, "submit_child_job", job_id, graph_spec, monitor_id)
|
1066
|
+
self.actors().send(self.graph.engine_id, "submit_child_job", parent_key, job_id, graph_spec, monitor_id)
|
936
1067
|
|
937
1068
|
@_actors.Message
|
938
1069
|
def child_job_succeeded(self, job_result: _cfg.JobResult):
|
939
1070
|
|
940
1071
|
self._check_result_type(job_result)
|
941
1072
|
|
942
|
-
|
1073
|
+
self.node_logger.log_node_succeeded(self.node)
|
943
1074
|
|
944
1075
|
self.actors().send_parent("node_succeeded", self.node_id, job_result)
|
945
1076
|
|
946
1077
|
@_actors.Message
|
947
1078
|
def child_job_failed(self, job_error: Exception):
|
948
1079
|
|
949
|
-
|
1080
|
+
self.node_logger.log_node_failed(self.node, job_error)
|
950
1081
|
|
951
1082
|
self.actors().send_parent("node_failed", self.node_id, job_error)
|
952
1083
|
|
@@ -957,23 +1088,22 @@ class GraphLogger:
|
|
957
1088
|
Log the activity of the GraphProcessor
|
958
1089
|
"""
|
959
1090
|
|
960
|
-
|
1091
|
+
def __init__(self, log_provider: _logging.LogProvider):
|
1092
|
+
self._log = log_provider.logger_for_class(GraphProcessor)
|
961
1093
|
|
962
|
-
|
963
|
-
def log_node_add(cls, node: _graph.Node):
|
1094
|
+
def log_node_add(self, node: _graph.Node):
|
964
1095
|
|
965
1096
|
node_name = node.id.name
|
966
1097
|
namespace = node.id.namespace
|
967
1098
|
|
968
|
-
|
1099
|
+
self._log.info(f"ADD {self._func_type(node)} [{node_name}] / {namespace}")
|
969
1100
|
|
970
|
-
|
971
|
-
def log_dependency_add(cls, node_id: NodeId, dep_id: NodeId):
|
1101
|
+
def log_dependency_add(self, node_id: NodeId, dep_id: NodeId):
|
972
1102
|
|
973
1103
|
if node_id.namespace == dep_id.namespace:
|
974
|
-
|
1104
|
+
self._log.info(f"ADD DEPENDENCY [{node_id.name}] -> [{dep_id.name}] / {node_id.namespace}")
|
975
1105
|
else:
|
976
|
-
|
1106
|
+
self._log.info(f"ADD DEPENDENCY [{node_id.name}] / {node_id.namespace} -> [{dep_id.name}] / {dep_id.namespace}")
|
977
1107
|
|
978
1108
|
@classmethod
|
979
1109
|
def _func_type(cls, node: _graph.Node):
|
@@ -990,7 +1120,8 @@ class NodeLogger:
|
|
990
1120
|
|
991
1121
|
# Separate out the logic for logging nodes, so the NodeProcessor itself stays a bit cleaner
|
992
1122
|
|
993
|
-
|
1123
|
+
def __init__(self, log_provider: _logging.LogProvider):
|
1124
|
+
self._log = log_provider.logger_for_class(NodeProcessor)
|
994
1125
|
|
995
1126
|
class LoggingType(enum.Enum):
|
996
1127
|
DEFAULT = 0
|
@@ -999,81 +1130,75 @@ class NodeLogger:
|
|
999
1130
|
SIMPLE_MAPPING = 3
|
1000
1131
|
MODEL = 4
|
1001
1132
|
|
1002
|
-
|
1003
|
-
def log_node_start(cls, node: _EngineNode):
|
1133
|
+
def log_node_start(self, node: _EngineNode):
|
1004
1134
|
|
1005
|
-
logging_type =
|
1135
|
+
logging_type = self._logging_type(node)
|
1006
1136
|
node_name = node.node.id.name
|
1007
1137
|
namespace = node.node.id.namespace
|
1008
1138
|
|
1009
|
-
if logging_type ==
|
1010
|
-
|
1139
|
+
if logging_type == self.LoggingType.STATIC_VALUE:
|
1140
|
+
self._log.info(f"SET {self._value_type(node)} [{node_name}] / {namespace}")
|
1011
1141
|
|
1012
|
-
elif logging_type in [
|
1013
|
-
|
1142
|
+
elif logging_type in [self.LoggingType.SIMPLE_MAPPING]:
|
1143
|
+
self._log.info(f"MAP {self._value_type(node)} [{self._mapping_source(node)}] -> [{node_name}] / {namespace}")
|
1014
1144
|
|
1015
1145
|
else:
|
1016
|
-
|
1146
|
+
self._log.info(f"START {self._func_type(node)} [{node_name}] / {namespace}")
|
1017
1147
|
|
1018
|
-
|
1019
|
-
def log_node_succeeded(cls, node: _EngineNode):
|
1148
|
+
def log_node_succeeded(self, node: _EngineNode):
|
1020
1149
|
|
1021
|
-
logging_type =
|
1150
|
+
logging_type = self._logging_type(node)
|
1022
1151
|
node_name = node.node.id.name
|
1023
1152
|
namespace = node.node.id.namespace
|
1024
1153
|
|
1025
|
-
if logging_type in [
|
1154
|
+
if logging_type in [self.LoggingType.STATIC_VALUE, self.LoggingType.SIMPLE_MAPPING]:
|
1026
1155
|
return
|
1027
1156
|
|
1028
|
-
if logging_type ==
|
1029
|
-
|
1157
|
+
if logging_type == self.LoggingType.PUSH_POP:
|
1158
|
+
self._log_push_pop_node_details(node.node) # noqa
|
1030
1159
|
|
1031
|
-
if logging_type ==
|
1032
|
-
|
1160
|
+
if logging_type == self.LoggingType.MODEL:
|
1161
|
+
self._log_model_node_details(node.node) # noqa
|
1033
1162
|
|
1034
|
-
|
1163
|
+
self._log.info(f"DONE {self._func_type(node)} [{node_name}] / {namespace}")
|
1035
1164
|
|
1036
|
-
|
1037
|
-
def log_node_failed(cls, node: _EngineNode, e: Exception):
|
1165
|
+
def log_node_failed(self, node: _EngineNode, e: Exception):
|
1038
1166
|
|
1039
1167
|
node_name = node.node.id.name
|
1040
1168
|
namespace = node.node.id.namespace
|
1041
1169
|
|
1042
|
-
|
1043
|
-
|
1170
|
+
self._log.error(f"FAILED {self._func_type(node)} [{node_name}] / {namespace}")
|
1171
|
+
self._log.exception(e)
|
1044
1172
|
|
1045
|
-
|
1046
|
-
def log_node_evict(cls, node: _EngineNode):
|
1173
|
+
def log_node_evict(self, node: _EngineNode):
|
1047
1174
|
|
1048
|
-
logging_type =
|
1175
|
+
logging_type = self._logging_type(node)
|
1049
1176
|
node_name = node.node.id.name
|
1050
1177
|
namespace = node.node.id.namespace
|
1051
1178
|
|
1052
|
-
if logging_type in [
|
1179
|
+
if logging_type in [self.LoggingType.STATIC_VALUE, self.LoggingType.SIMPLE_MAPPING]:
|
1053
1180
|
return
|
1054
1181
|
|
1055
|
-
|
1182
|
+
self._log.info(f"EVICT {self._func_type(node)} [{node_name}] / {namespace}")
|
1056
1183
|
|
1057
|
-
|
1058
|
-
def _log_push_pop_node_details(cls, node: tp.Union[_graph.ContextPushNode, _graph.ContextPopNode]):
|
1184
|
+
def _log_push_pop_node_details(self, node: tp.Union[_graph.ContextPushNode, _graph.ContextPopNode]):
|
1059
1185
|
|
1060
1186
|
push_or_pop = "PUSH" if isinstance(node, _graph.ContextPushNode) else "POP"
|
1061
1187
|
direction = "->" if isinstance(node, _graph.ContextPushNode) else "<-"
|
1062
1188
|
|
1063
1189
|
for inner_id, outer_id in node.mapping.items():
|
1064
|
-
item_type =
|
1190
|
+
item_type = self._type_str(inner_id.result_type)
|
1065
1191
|
msg = f"{push_or_pop} {item_type} [{outer_id.name}] {direction} [{inner_id.name}] / {node.id.namespace}"
|
1066
|
-
|
1192
|
+
self._log.info(msg)
|
1067
1193
|
|
1068
|
-
|
1069
|
-
def _log_model_node_details(cls, node: _graph.RunModelNode):
|
1194
|
+
def _log_model_node_details(self, node: _graph.RunModelNode):
|
1070
1195
|
|
1071
|
-
|
1196
|
+
self._type_str(_data.DataView)
|
1072
1197
|
|
1073
1198
|
for output in node.model_def.outputs:
|
1074
|
-
result_type =
|
1199
|
+
result_type = self._type_str(_data.DataView)
|
1075
1200
|
msg = f"RESULT {result_type} [{output}] / {node.bundle_namespace}"
|
1076
|
-
|
1201
|
+
self._log.info(msg)
|
1077
1202
|
|
1078
1203
|
@classmethod
|
1079
1204
|
def _logging_type(cls, node: _EngineNode) -> LoggingType:
|