tracdap-runtime 0.8.0rc2__py3-none-any.whl → 0.9.0b2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tracdap/rt/_impl/core/config_parser.py +29 -3
- tracdap/rt/_impl/core/data.py +627 -40
- tracdap/rt/_impl/core/repos.py +17 -8
- tracdap/rt/_impl/core/storage.py +25 -13
- tracdap/rt/_impl/core/struct.py +254 -60
- tracdap/rt/_impl/core/util.py +125 -11
- tracdap/rt/_impl/exec/context.py +35 -8
- tracdap/rt/_impl/exec/dev_mode.py +169 -127
- tracdap/rt/_impl/exec/engine.py +203 -140
- tracdap/rt/_impl/exec/functions.py +228 -263
- tracdap/rt/_impl/exec/graph.py +141 -126
- tracdap/rt/_impl/exec/graph_builder.py +428 -449
- tracdap/rt/_impl/grpc/codec.py +8 -13
- tracdap/rt/_impl/grpc/server.py +7 -7
- tracdap/rt/_impl/grpc/tracdap/api/internal/runtime_pb2.py +25 -18
- tracdap/rt/_impl/grpc/tracdap/api/internal/runtime_pb2.pyi +27 -9
- tracdap/rt/_impl/grpc/tracdap/metadata/common_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/config_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/custom_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/data_pb2.py +37 -35
- tracdap/rt/_impl/grpc/tracdap/metadata/data_pb2.pyi +37 -43
- tracdap/rt/_impl/grpc/tracdap/metadata/file_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/flow_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/job_pb2.py +67 -63
- tracdap/rt/_impl/grpc/tracdap/metadata/job_pb2.pyi +11 -2
- tracdap/rt/_impl/grpc/tracdap/metadata/model_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/object_id_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/object_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/resource_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/search_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/storage_pb2.py +11 -9
- tracdap/rt/_impl/grpc/tracdap/metadata/storage_pb2.pyi +11 -2
- tracdap/rt/_impl/grpc/tracdap/metadata/tag_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/tag_update_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/type_pb2.py +23 -19
- tracdap/rt/_impl/grpc/tracdap/metadata/type_pb2.pyi +15 -2
- tracdap/rt/_impl/runtime.py +3 -9
- tracdap/rt/_impl/static_api.py +5 -6
- tracdap/rt/_plugins/format_csv.py +2 -2
- tracdap/rt/_plugins/repo_git.py +56 -11
- tracdap/rt/_plugins/storage_aws.py +165 -150
- tracdap/rt/_plugins/storage_azure.py +17 -11
- tracdap/rt/_plugins/storage_gcp.py +35 -18
- tracdap/rt/_version.py +1 -1
- tracdap/rt/api/model_api.py +45 -0
- tracdap/rt/config/__init__.py +7 -9
- tracdap/rt/config/common.py +3 -14
- tracdap/rt/config/job.py +17 -3
- tracdap/rt/config/platform.py +9 -32
- tracdap/rt/config/result.py +8 -4
- tracdap/rt/config/runtime.py +5 -10
- tracdap/rt/config/tenant.py +28 -0
- tracdap/rt/launch/cli.py +0 -8
- tracdap/rt/launch/launch.py +1 -3
- tracdap/rt/metadata/__init__.py +35 -35
- tracdap/rt/metadata/data.py +19 -31
- tracdap/rt/metadata/job.py +3 -1
- tracdap/rt/metadata/storage.py +9 -0
- tracdap/rt/metadata/type.py +9 -5
- {tracdap_runtime-0.8.0rc2.dist-info → tracdap_runtime-0.9.0b2.dist-info}/METADATA +5 -3
- {tracdap_runtime-0.8.0rc2.dist-info → tracdap_runtime-0.9.0b2.dist-info}/RECORD +64 -63
- {tracdap_runtime-0.8.0rc2.dist-info → tracdap_runtime-0.9.0b2.dist-info}/WHEEL +1 -1
- {tracdap_runtime-0.8.0rc2.dist-info → tracdap_runtime-0.9.0b2.dist-info}/licenses/LICENSE +0 -0
- {tracdap_runtime-0.8.0rc2.dist-info → tracdap_runtime-0.9.0b2.dist-info}/top_level.txt +0 -0
tracdap/rt/_impl/exec/engine.py
CHANGED
@@ -17,7 +17,6 @@ import copy as cp
|
|
17
17
|
import dataclasses as dc
|
18
18
|
import enum
|
19
19
|
import io
|
20
|
-
import pathlib
|
21
20
|
import typing as tp
|
22
21
|
|
23
22
|
import tracdap.rt.metadata as _meta
|
@@ -84,43 +83,41 @@ class _EngineContext:
|
|
84
83
|
|
85
84
|
|
86
85
|
@dc.dataclass
|
87
|
-
class
|
86
|
+
class _JobLog:
|
88
87
|
|
89
|
-
|
90
|
-
|
91
|
-
|
88
|
+
log_init: "dc.InitVar[tp.Optional[_JobLog]]" = None
|
89
|
+
|
90
|
+
log_file_needed: bool = True
|
91
|
+
|
92
|
+
log_buffer: io.BytesIO = None
|
93
|
+
log_provider: _logging.LogProvider = None
|
94
|
+
|
95
|
+
def __post_init__(self, log_init):
|
96
|
+
|
97
|
+
if log_init is not None:
|
98
|
+
self.log_provider = log_init.log_provider
|
99
|
+
self.log_file_needed = False
|
100
|
+
elif self.log_file_needed:
|
101
|
+
self.log_buffer = io.BytesIO()
|
102
|
+
self.log_provider = _logging.job_log_provider(self.log_buffer)
|
103
|
+
else:
|
104
|
+
self.log_provider = _logging.LogProvider()
|
92
105
|
|
93
106
|
|
94
107
|
@dc.dataclass
|
95
108
|
class _JobState:
|
96
109
|
|
97
110
|
job_id: _meta.TagHeader
|
98
|
-
|
111
|
+
job_config: _cfg.JobConfig = None
|
112
|
+
parent_key: str = None
|
99
113
|
|
100
114
|
actor_id: _actors.ActorId = None
|
101
115
|
monitors: tp.List[_actors.ActorId] = dc.field(default_factory=list)
|
116
|
+
job_log: _JobLog = None
|
102
117
|
|
103
|
-
job_config: _cfg.JobConfig = None
|
104
118
|
job_result: _cfg.JobResult = None
|
105
119
|
job_error: Exception = None
|
106
120
|
|
107
|
-
parent_key: str = None
|
108
|
-
result_spec: _JobResultSpec = None
|
109
|
-
|
110
|
-
log_buffer: io.BytesIO = None
|
111
|
-
log_provider: _logging.LogProvider = None
|
112
|
-
log: _logging.Logger = None
|
113
|
-
|
114
|
-
def __post_init__(self, log_init):
|
115
|
-
|
116
|
-
if isinstance(self.log, _logging.LogProvider):
|
117
|
-
self.log_provider = log_init
|
118
|
-
else:
|
119
|
-
self.log_buffer = io.BytesIO()
|
120
|
-
self.log_provider = _logging.job_log_provider(self.log_buffer)
|
121
|
-
|
122
|
-
self.log = self.log_provider.logger_for_class(TracEngine)
|
123
|
-
|
124
121
|
|
125
122
|
class TracEngine(_actors.Actor):
|
126
123
|
|
@@ -188,22 +185,25 @@ class TracEngine(_actors.Actor):
|
|
188
185
|
return super().on_signal(signal)
|
189
186
|
|
190
187
|
@_actors.Message
|
191
|
-
def submit_job(
|
192
|
-
self, job_config: _cfg.JobConfig,
|
193
|
-
job_result_dir: str,
|
194
|
-
job_result_format: str):
|
188
|
+
def submit_job(self, job_config: _cfg.JobConfig):
|
195
189
|
|
196
190
|
job_key = _util.object_key(job_config.jobId)
|
191
|
+
|
192
|
+
self._log.info(f"Received a new job: [{job_key}]")
|
193
|
+
|
197
194
|
job_state = _JobState(job_config.jobId)
|
195
|
+
job_state.job_config = job_config
|
198
196
|
|
199
|
-
|
197
|
+
job_logs_enabled = _util.read_property(
|
198
|
+
job_config.properties,
|
199
|
+
_cfg_p.ConfigKeys.RESULT_LOGS_ENABLED,
|
200
|
+
False, bool)
|
200
201
|
|
201
|
-
|
202
|
-
result_spec = _JobResultSpec(result_needed, job_result_dir, job_result_format)
|
202
|
+
job_log = _JobLog(log_file_needed=job_logs_enabled)
|
203
203
|
|
204
204
|
job_processor = JobProcessor(
|
205
|
-
self._sys_config, self._models, self._storage,
|
206
|
-
job_key, job_config, graph_spec=None)
|
205
|
+
self._sys_config, self._models, self._storage,
|
206
|
+
job_key, job_config, graph_spec=None, job_log=job_log)
|
207
207
|
|
208
208
|
job_actor_id = self.actors().spawn(job_processor)
|
209
209
|
|
@@ -214,8 +214,7 @@ class TracEngine(_actors.Actor):
|
|
214
214
|
|
215
215
|
job_state.actor_id = job_actor_id
|
216
216
|
job_state.monitors.append(job_monitor_id)
|
217
|
-
job_state.
|
218
|
-
job_state.result_spec = result_spec
|
217
|
+
job_state.job_log = job_log
|
219
218
|
|
220
219
|
self._jobs[job_key] = job_state
|
221
220
|
|
@@ -231,17 +230,26 @@ class TracEngine(_actors.Actor):
|
|
231
230
|
|
232
231
|
child_key = _util.object_key(child_id)
|
233
232
|
|
233
|
+
self._log.info(f"Received a child job: [{child_key}] for parent [{parent_key}]")
|
234
|
+
|
235
|
+
# Copy job config properties from parent job
|
236
|
+
child_config = _cfg.JobConfig()
|
237
|
+
child_config.properties.update(parent_state.job_config.properties)
|
238
|
+
|
239
|
+
child_job_log = _JobLog(parent_state.job_log)
|
240
|
+
|
234
241
|
child_processor = JobProcessor(
|
235
|
-
self._sys_config, self._models, self._storage,
|
236
|
-
child_key, None, graph_spec=child_graph)
|
242
|
+
self._sys_config, self._models, self._storage,
|
243
|
+
child_key, None, graph_spec=child_graph, job_log=child_job_log)
|
237
244
|
|
238
245
|
child_actor_id = self.actors().spawn(child_processor)
|
239
246
|
|
240
|
-
child_state = _JobState(child_id
|
247
|
+
child_state = _JobState(child_id)
|
248
|
+
child_state.job_config = child_config
|
249
|
+
child_state.parent_key = parent_key
|
241
250
|
child_state.actor_id = child_actor_id
|
242
251
|
child_state.monitors.append(monitor_id)
|
243
|
-
child_state.
|
244
|
-
child_state.result_spec = _JobResultSpec(False) # Do not output separate results for child jobs
|
252
|
+
child_state.job_log = child_job_log
|
245
253
|
|
246
254
|
self._jobs[child_key] = child_state
|
247
255
|
|
@@ -265,9 +273,9 @@ class TracEngine(_actors.Actor):
|
|
265
273
|
self._log.warning(f"Ignoring [job_succeeded] message, job [{job_key}] has already completed")
|
266
274
|
return
|
267
275
|
|
268
|
-
|
269
|
-
job_state.log.info(f"Recording job as successful: {job_key}")
|
276
|
+
self._log.info(f"Marking job as successful: {job_key}")
|
270
277
|
|
278
|
+
job_state = self._jobs[job_key]
|
271
279
|
job_state.job_result = job_result
|
272
280
|
|
273
281
|
for monitor_id in job_state.monitors:
|
@@ -276,36 +284,30 @@ class TracEngine(_actors.Actor):
|
|
276
284
|
self._finalize_job(job_key)
|
277
285
|
|
278
286
|
@_actors.Message
|
279
|
-
def job_failed(self, job_key: str, error: Exception):
|
287
|
+
def job_failed(self, job_key: str, error: Exception, job_result: tp.Optional[_cfg.JobResult] = None):
|
280
288
|
|
281
289
|
# Ignore duplicate messages from the job processor (can happen in unusual error cases)
|
282
290
|
if job_key not in self._jobs:
|
283
291
|
self._log.warning(f"Ignoring [job_failed] message, job [{job_key}] has already completed")
|
284
292
|
return
|
285
293
|
|
286
|
-
|
287
|
-
job_state.log.error(f"Recording job as failed: {job_key}")
|
288
|
-
|
289
|
-
job_state.job_error = error
|
290
|
-
|
291
|
-
# Create a failed result so there is something to report
|
292
|
-
result_id = job_state.job_config.resultMapping.get("trac_job_result")
|
294
|
+
self._log.error(f"Marking job as failed: {job_key}")
|
293
295
|
|
294
|
-
|
295
|
-
|
296
|
-
job_state.job_result = _cfg.JobResult(
|
297
|
-
jobId=job_state.job_id,
|
298
|
-
statusCode=_meta.JobStatusCode.FAILED,
|
299
|
-
statusMessage=str(error))
|
296
|
+
job_state = self._jobs[job_key]
|
300
297
|
|
298
|
+
# Build a failed result if none is supplied by the job processor (should not normally happen)
|
299
|
+
# In this case, no job log will be included in the output
|
300
|
+
if job_result is None and job_state.job_config is not None:
|
301
|
+
job_id = job_state.job_id
|
302
|
+
result_id = job_state.job_config.resultId
|
301
303
|
result_def = _meta.ResultDefinition()
|
302
304
|
result_def.jobId = _util.selector_for(job_state.job_id)
|
303
305
|
result_def.statusCode = _meta.JobStatusCode.FAILED
|
306
|
+
result_def.statusMessage = str(error)
|
307
|
+
job_result = _cfg.JobResult(job_id, result_id, result_def)
|
304
308
|
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
job_state.job_result.results[result_key] = result_obj
|
309
|
+
job_state.job_result = job_result
|
310
|
+
job_state.job_error = error
|
309
311
|
|
310
312
|
for monitor_id in job_state.monitors:
|
311
313
|
self.actors().send(monitor_id, "job_failed", error)
|
@@ -321,12 +323,13 @@ class TracEngine(_actors.Actor):
|
|
321
323
|
|
322
324
|
job_state = self._jobs.get(job_key)
|
323
325
|
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
self._save_job_log_file(job_key, job_state)
|
326
|
+
result_enabled = _util.read_property(
|
327
|
+
job_state.job_config.properties,
|
328
|
+
_cfg_p.ConfigKeys.RESULT_ENABLED,
|
329
|
+
False, bool)
|
329
330
|
|
331
|
+
# Record output metadata if required
|
332
|
+
if result_enabled and job_state.parent_key is None:
|
330
333
|
self._save_job_result(job_key, job_state)
|
331
334
|
|
332
335
|
# Stop any monitors that were created directly by the engine
|
@@ -341,53 +344,17 @@ class TracEngine(_actors.Actor):
|
|
341
344
|
self.actors().stop(job_state.actor_id)
|
342
345
|
job_state.actor_id = None
|
343
346
|
|
344
|
-
def _save_job_log_file(self, job_key: str, job_state: _JobState):
|
345
|
-
|
346
|
-
self._log.info(f"Saving job log file for [{job_key}]")
|
347
|
-
|
348
|
-
# Saving log files could go into a separate actor, perhaps a job monitor along with _save_job_result()
|
349
|
-
|
350
|
-
file_id = job_state.job_config.resultMapping["trac_job_log_file"]
|
351
|
-
storage_id = job_state.job_config.resultMapping["trac_job_log_file:STORAGE"]
|
352
|
-
|
353
|
-
file_type = _meta.FileType("TXT", "text/plain")
|
354
|
-
file_def, storage_def = _graph.GraphBuilder.build_output_file_and_storage(
|
355
|
-
"trac_job_log_file", file_type,
|
356
|
-
self._sys_config, job_state.job_config)
|
357
|
-
|
358
|
-
storage_item = storage_def.dataItems[file_def.dataItem].incarnations[0].copies[0]
|
359
|
-
storage = self._storage.get_file_storage(storage_item.storageKey)
|
360
|
-
|
361
|
-
with storage.write_byte_stream(storage_item.storagePath) as stream:
|
362
|
-
stream.write(job_state.log_buffer.getbuffer())
|
363
|
-
file_def.size = stream.tell()
|
364
|
-
|
365
|
-
result_id = job_state.job_config.resultMapping["trac_job_result"]
|
366
|
-
result_def = job_state.job_result.results[_util.object_key(result_id)].result
|
367
|
-
result_def.logFileId = _util.selector_for(file_id)
|
368
|
-
|
369
|
-
file_obj = _meta.ObjectDefinition(objectType=_meta.ObjectType.FILE, file=file_def)
|
370
|
-
storage_obj = _meta.ObjectDefinition(objectType=_meta.ObjectType.STORAGE, storage=storage_def)
|
371
|
-
|
372
|
-
job_state.job_result.results[_util.object_key(file_id)] = file_obj
|
373
|
-
job_state.job_result.results[_util.object_key(storage_id)] = storage_obj
|
374
|
-
|
375
347
|
def _save_job_result(self, job_key: str, job_state: _JobState):
|
376
348
|
|
377
349
|
self._log.info(f"Saving job result for [{job_key}]")
|
378
350
|
|
379
|
-
|
380
|
-
|
381
|
-
|
351
|
+
storage_key = _util.read_property(job_state.job_config.properties, _cfg_p.ConfigKeys.RESULT_STORAGE_LOCATION)
|
352
|
+
storage_path = _util.read_property(job_state.job_config.properties, _cfg_p.ConfigKeys.RESULT_STORAGE_PATH)
|
353
|
+
result_format = _util.read_property(job_state.job_config.properties, _cfg_p.ConfigKeys.RESULT_FORMAT, "JSON")
|
354
|
+
result_content = _cfg_p.ConfigQuoter.quote(job_state.job_result, result_format)
|
382
355
|
|
383
|
-
|
384
|
-
|
385
|
-
result_file = f"job_result_{job_key}.{result_format}"
|
386
|
-
result_path = pathlib.Path(result_dir).joinpath(result_file)
|
387
|
-
|
388
|
-
with open(result_path, "xt") as result_stream:
|
389
|
-
result_content = _cfg_p.ConfigQuoter.quote(job_state.job_result, result_format)
|
390
|
-
result_stream.write(result_content)
|
356
|
+
storage = self._storage.get_file_storage(storage_key)
|
357
|
+
storage.write_bytes(storage_path, result_content.encode('utf-8'))
|
391
358
|
|
392
359
|
def _get_job_info(self, job_key: str, details: bool = False) -> tp.Optional[_cfg.JobResult]:
|
393
360
|
|
@@ -398,24 +365,28 @@ class TracEngine(_actors.Actor):
|
|
398
365
|
|
399
366
|
job_result = _cfg.JobResult()
|
400
367
|
job_result.jobId = job_state.job_id
|
368
|
+
job_result.resultId = job_state.job_config.resultId
|
369
|
+
job_result.result = _meta.ResultDefinition()
|
370
|
+
job_result.result.jobId = _util.selector_for(job_state.job_id)
|
401
371
|
|
402
372
|
if job_state.actor_id is not None:
|
403
|
-
job_result.statusCode = _meta.JobStatusCode.RUNNING
|
373
|
+
job_result.result.statusCode = _meta.JobStatusCode.RUNNING
|
404
374
|
|
405
375
|
elif job_state.job_result is not None:
|
406
|
-
job_result.statusCode = job_state.job_result.statusCode
|
407
|
-
job_result.statusMessage = job_state.job_result.statusMessage
|
376
|
+
job_result.result.statusCode = job_state.job_result.result.statusCode
|
377
|
+
job_result.result.statusMessage = job_state.job_result.result.statusMessage
|
408
378
|
if details:
|
409
|
-
job_result.
|
379
|
+
job_result.objectIds = job_state.job_result.objectIds or list()
|
380
|
+
job_result.objects = job_state.job_result.objects or dict()
|
410
381
|
|
411
382
|
elif job_state.job_error is not None:
|
412
|
-
job_result.statusCode = _meta.JobStatusCode.FAILED
|
413
|
-
job_result.statusMessage = str(job_state.job_error.args[0])
|
383
|
+
job_result.result.statusCode = _meta.JobStatusCode.FAILED
|
384
|
+
job_result.result.statusMessage = str(job_state.job_error.args[0])
|
414
385
|
|
415
386
|
else:
|
416
387
|
# Alternatively return UNKNOWN status or throw an error here
|
417
|
-
job_result.statusCode = _meta.JobStatusCode.FAILED
|
418
|
-
job_result.statusMessage = "No details available"
|
388
|
+
job_result.result.statusCode = _meta.JobStatusCode.FAILED
|
389
|
+
job_result.result.statusMessage = "No details available"
|
419
390
|
|
420
391
|
return job_result
|
421
392
|
|
@@ -458,8 +429,9 @@ class JobProcessor(_actors.Actor):
|
|
458
429
|
|
459
430
|
def __init__(
|
460
431
|
self, sys_config: _cfg.RuntimeConfig,
|
461
|
-
models: _models.ModelLoader, storage: _storage.StorageManager,
|
462
|
-
job_key: str, job_config: tp.Optional[_cfg.JobConfig], graph_spec: tp.Optional[_graph.Graph]
|
432
|
+
models: _models.ModelLoader, storage: _storage.StorageManager,
|
433
|
+
job_key: str, job_config: tp.Optional[_cfg.JobConfig], graph_spec: tp.Optional[_graph.Graph],
|
434
|
+
job_log: tp.Optional[_JobLog] = None):
|
463
435
|
|
464
436
|
super().__init__()
|
465
437
|
|
@@ -473,9 +445,15 @@ class JobProcessor(_actors.Actor):
|
|
473
445
|
self._sys_config = sys_config
|
474
446
|
self._models = models
|
475
447
|
self._storage = storage
|
476
|
-
|
477
|
-
self.
|
478
|
-
self.
|
448
|
+
|
449
|
+
self._job_log = job_log if job_log is not None else _JobLog()
|
450
|
+
self._log_provider = self._job_log.log_provider
|
451
|
+
self._log = self._job_log.log_provider.logger_for_object(self)
|
452
|
+
|
453
|
+
self._log.info(f"New job created for [{self.job_key}]")
|
454
|
+
|
455
|
+
self._resolver = _func.FunctionResolver(models, storage, self._log_provider)
|
456
|
+
self._preallocated_ids: tp.Dict[_meta.ObjectType, tp.List[_meta.TagHeader]] = dict()
|
479
457
|
|
480
458
|
def on_start(self):
|
481
459
|
|
@@ -513,7 +491,10 @@ class JobProcessor(_actors.Actor):
|
|
513
491
|
return super().on_signal(signal)
|
514
492
|
|
515
493
|
@_actors.Message
|
516
|
-
def build_graph_succeeded(self, graph_spec: _graph.Graph):
|
494
|
+
def build_graph_succeeded(self, graph_spec: _graph.Graph, unallocated_ids = None):
|
495
|
+
|
496
|
+
# Save any unallocated IDs to use later (needed for saving the log file)
|
497
|
+
self._preallocated_ids = unallocated_ids or dict()
|
517
498
|
|
518
499
|
# Build a new engine context graph from the graph spec
|
519
500
|
engine_id = self.actors().parent
|
@@ -524,6 +505,7 @@ class JobProcessor(_actors.Actor):
|
|
524
505
|
graph.pending_nodes.update(graph.nodes.keys())
|
525
506
|
|
526
507
|
self.actors().spawn(FunctionResolver(self._resolver, self._log_provider, graph))
|
508
|
+
|
527
509
|
if self.actors().sender != self.actors().id and self.actors().sender != self.actors().parent:
|
528
510
|
self.actors().stop(self.actors().sender)
|
529
511
|
|
@@ -531,20 +513,103 @@ class JobProcessor(_actors.Actor):
|
|
531
513
|
def resolve_functions_succeeded(self, graph: _EngineContext):
|
532
514
|
|
533
515
|
self.actors().spawn(GraphProcessor(graph, self._resolver, self._log_provider))
|
516
|
+
|
534
517
|
if self.actors().sender != self.actors().id and self.actors().sender != self.actors().parent:
|
535
518
|
self.actors().stop(self.actors().sender)
|
536
519
|
|
537
520
|
@_actors.Message
|
538
521
|
def job_succeeded(self, job_result: _cfg.JobResult):
|
539
|
-
|
522
|
+
|
523
|
+
# This will be the last message in the job log file
|
524
|
+
self._log.info(f"Job succeeded [{self.job_key}]")
|
525
|
+
|
526
|
+
if self._job_log.log_file_needed:
|
527
|
+
self._save_job_log_file(job_result)
|
528
|
+
|
540
529
|
self.actors().stop(self.actors().sender)
|
541
530
|
self.actors().send_parent("job_succeeded", self.job_key, job_result)
|
542
531
|
|
543
532
|
@_actors.Message
|
544
533
|
def job_failed(self, error: Exception):
|
545
|
-
|
534
|
+
|
535
|
+
# This will be the last message in the job log file
|
536
|
+
self._log.error(f"Job failed [{self.job_key}]")
|
537
|
+
|
546
538
|
self.actors().stop(self.actors().sender)
|
547
|
-
|
539
|
+
|
540
|
+
# For top level jobs, build a failed job result and save the log file
|
541
|
+
if self.job_config is not None:
|
542
|
+
|
543
|
+
job_id = self.job_config.jobId
|
544
|
+
result_id = self.job_config.resultId
|
545
|
+
result_def = _meta.ResultDefinition()
|
546
|
+
result_def.jobId = _util.selector_for(job_id)
|
547
|
+
result_def.statusCode = _meta.JobStatusCode.FAILED
|
548
|
+
result_def.statusMessage = str(error)
|
549
|
+
job_result = _cfg.JobResult(job_id, result_id, result_def)
|
550
|
+
|
551
|
+
if self._job_log.log_file_needed:
|
552
|
+
self._save_job_log_file(job_result)
|
553
|
+
|
554
|
+
self.actors().send_parent("job_failed", self.job_key, error, job_result)
|
555
|
+
|
556
|
+
# For child jobs, just send the error response
|
557
|
+
# Result and log file will be handled in the top level job
|
558
|
+
else:
|
559
|
+
self.actors().send_parent("job_failed", self.job_key, error)
|
560
|
+
|
561
|
+
def _save_job_log_file(self, job_result: _cfg.JobResult):
|
562
|
+
|
563
|
+
# Do not fail the job if log content is not available
|
564
|
+
if self._job_log.log_buffer is None:
|
565
|
+
self._log.warning(f"Job log not available for [{self.job_key}]")
|
566
|
+
return
|
567
|
+
|
568
|
+
# Saving log files could go into a separate actor
|
569
|
+
|
570
|
+
file_id = self._allocate_id(_meta.ObjectType.FILE)
|
571
|
+
storage_id = self._allocate_id(_meta.ObjectType.STORAGE)
|
572
|
+
|
573
|
+
self._log.info(f"Saving job log [{_util.object_key(file_id)}]")
|
574
|
+
|
575
|
+
file_name = "trac_job_log_file"
|
576
|
+
file_type = _meta.FileType("TXT", "text/plain")
|
577
|
+
|
578
|
+
file_spec = _data.build_file_spec(
|
579
|
+
file_id, storage_id,
|
580
|
+
file_name, file_type,
|
581
|
+
self._sys_config)
|
582
|
+
|
583
|
+
file_def = file_spec.definition
|
584
|
+
storage_def = file_spec.storage
|
585
|
+
|
586
|
+
storage_item = storage_def.dataItems[file_def.dataItem].incarnations[0].copies[0]
|
587
|
+
storage = self._storage.get_file_storage(storage_item.storageKey)
|
588
|
+
|
589
|
+
with storage.write_byte_stream(storage_item.storagePath) as stream:
|
590
|
+
stream.write(self._job_log.log_buffer.getbuffer())
|
591
|
+
file_def.size = stream.tell()
|
592
|
+
|
593
|
+
result_def = job_result.result
|
594
|
+
result_def.logFileId = _util.selector_for(file_id)
|
595
|
+
|
596
|
+
file_obj = _meta.ObjectDefinition(objectType=_meta.ObjectType.FILE, file=file_def)
|
597
|
+
storage_obj = _meta.ObjectDefinition(objectType=_meta.ObjectType.STORAGE, storage=storage_def)
|
598
|
+
|
599
|
+
job_result.objectIds.append(file_id)
|
600
|
+
job_result.objectIds.append(storage_id)
|
601
|
+
job_result.objects[_util.object_key(file_id)] = file_obj
|
602
|
+
job_result.objects[_util.object_key(storage_id)] = storage_obj
|
603
|
+
|
604
|
+
def _allocate_id(self, object_type: _meta.ObjectType):
|
605
|
+
|
606
|
+
preallocated_ids = self._preallocated_ids.get(object_type)
|
607
|
+
|
608
|
+
if preallocated_ids:
|
609
|
+
# Preallocated IDs have objectVersion = 0, use a new version to get objectVersion = 1
|
610
|
+
return _util.new_object_version(preallocated_ids.pop())
|
611
|
+
else:
|
612
|
+
return _util.new_object_id(object_type)
|
548
613
|
|
549
614
|
|
550
615
|
class GraphBuilder(_actors.Actor):
|
@@ -570,8 +635,9 @@ class GraphBuilder(_actors.Actor):
|
|
570
635
|
|
571
636
|
graph_builder = _graph.GraphBuilder(self.sys_config, job_config)
|
572
637
|
graph_spec = graph_builder.build_job(job_config.job)
|
638
|
+
unallocated_ids = graph_builder.unallocated_ids()
|
573
639
|
|
574
|
-
self.actors().reply("build_graph_succeeded", graph_spec)
|
640
|
+
self.actors().reply("build_graph_succeeded", graph_spec, unallocated_ids)
|
575
641
|
|
576
642
|
|
577
643
|
class FunctionResolver(_actors.Actor):
|
@@ -704,23 +770,21 @@ class GraphProcessor(_actors.Actor):
|
|
704
770
|
self.check_job_status(do_submit=False)
|
705
771
|
|
706
772
|
@_actors.Message
|
707
|
-
def update_graph(
|
708
|
-
self, requestor_id: NodeId,
|
709
|
-
new_nodes: tp.Dict[NodeId, _graph.Node],
|
710
|
-
new_deps: tp.Dict[NodeId, tp.List[_graph.Dependency]]):
|
773
|
+
def update_graph(self, requestor_id: NodeId, update: _graph.GraphUpdate):
|
711
774
|
|
712
775
|
new_graph = cp.copy(self.graph)
|
713
776
|
new_graph.nodes = cp.copy(new_graph.nodes)
|
714
777
|
|
715
778
|
# Attempt to insert a duplicate node is always an error
|
716
|
-
node_collision = list(filter(lambda nid: nid in self.graph.nodes,
|
779
|
+
node_collision = list(filter(lambda nid: nid in self.graph.nodes, update.nodes))
|
717
780
|
|
718
781
|
# Only allow adding deps to pending nodes for now (adding deps to active nodes will require more work)
|
719
|
-
dep_collision = list(filter(lambda nid: nid not in self.graph.pending_nodes,
|
782
|
+
dep_collision = list(filter(lambda nid: nid not in self.graph.pending_nodes, update.dependencies))
|
720
783
|
|
784
|
+
# Only allow adding deps to new nodes (deps to existing nodes should not be part of an update)
|
721
785
|
dep_invalid = list(filter(
|
722
|
-
lambda
|
723
|
-
|
786
|
+
lambda ds: any(filter(lambda d: d.node_id not in update.nodes, ds)),
|
787
|
+
update.dependencies.values()))
|
724
788
|
|
725
789
|
if any(node_collision) or any(dep_collision) or any(dep_invalid):
|
726
790
|
|
@@ -736,18 +800,20 @@ class GraphProcessor(_actors.Actor):
|
|
736
800
|
requestor.error = _ex.ETracInternal("Node collision during graph update")
|
737
801
|
new_graph.nodes[requestor_id] = requestor
|
738
802
|
|
803
|
+
self.graph = new_graph
|
804
|
+
|
739
805
|
return
|
740
806
|
|
741
807
|
new_graph.pending_nodes = cp.copy(new_graph.pending_nodes)
|
742
808
|
|
743
|
-
for node_id, node in
|
809
|
+
for node_id, node in update.nodes.items():
|
744
810
|
self._graph_logger.log_node_add(node)
|
745
811
|
node_func = self._resolver.resolve_node(node)
|
746
812
|
new_node = _EngineNode(node, node_func)
|
747
813
|
new_graph.nodes[node_id] = new_node
|
748
814
|
new_graph.pending_nodes.add(node_id)
|
749
815
|
|
750
|
-
for node_id, deps in
|
816
|
+
for node_id, deps in update.dependencies.items():
|
751
817
|
engine_node = cp.copy(new_graph.nodes[node_id])
|
752
818
|
engine_node.dependencies = cp.copy(engine_node.dependencies)
|
753
819
|
for dep in deps:
|
@@ -1302,8 +1368,5 @@ class NodeCallbackImpl(_func.NodeCallback):
|
|
1302
1368
|
self.__actor_ctx = actor_ctx
|
1303
1369
|
self.__node_id = node_id
|
1304
1370
|
|
1305
|
-
def
|
1306
|
-
|
1307
|
-
new_deps: tp.Dict[NodeId, tp.List[_graph.Dependency]]):
|
1308
|
-
|
1309
|
-
self.__actor_ctx.send_parent("update_graph", self.__node_id, new_nodes, new_deps)
|
1371
|
+
def send_graph_update(self, update: _graph.GraphUpdate):
|
1372
|
+
self.__actor_ctx.send_parent("update_graph", self.__node_id, update)
|