tracdap-runtime 0.8.0rc2__py3-none-any.whl → 0.9.0b2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. tracdap/rt/_impl/core/config_parser.py +29 -3
  2. tracdap/rt/_impl/core/data.py +627 -40
  3. tracdap/rt/_impl/core/repos.py +17 -8
  4. tracdap/rt/_impl/core/storage.py +25 -13
  5. tracdap/rt/_impl/core/struct.py +254 -60
  6. tracdap/rt/_impl/core/util.py +125 -11
  7. tracdap/rt/_impl/exec/context.py +35 -8
  8. tracdap/rt/_impl/exec/dev_mode.py +169 -127
  9. tracdap/rt/_impl/exec/engine.py +203 -140
  10. tracdap/rt/_impl/exec/functions.py +228 -263
  11. tracdap/rt/_impl/exec/graph.py +141 -126
  12. tracdap/rt/_impl/exec/graph_builder.py +428 -449
  13. tracdap/rt/_impl/grpc/codec.py +8 -13
  14. tracdap/rt/_impl/grpc/server.py +7 -7
  15. tracdap/rt/_impl/grpc/tracdap/api/internal/runtime_pb2.py +25 -18
  16. tracdap/rt/_impl/grpc/tracdap/api/internal/runtime_pb2.pyi +27 -9
  17. tracdap/rt/_impl/grpc/tracdap/metadata/common_pb2.py +1 -1
  18. tracdap/rt/_impl/grpc/tracdap/metadata/config_pb2.py +1 -1
  19. tracdap/rt/_impl/grpc/tracdap/metadata/custom_pb2.py +1 -1
  20. tracdap/rt/_impl/grpc/tracdap/metadata/data_pb2.py +37 -35
  21. tracdap/rt/_impl/grpc/tracdap/metadata/data_pb2.pyi +37 -43
  22. tracdap/rt/_impl/grpc/tracdap/metadata/file_pb2.py +1 -1
  23. tracdap/rt/_impl/grpc/tracdap/metadata/flow_pb2.py +1 -1
  24. tracdap/rt/_impl/grpc/tracdap/metadata/job_pb2.py +67 -63
  25. tracdap/rt/_impl/grpc/tracdap/metadata/job_pb2.pyi +11 -2
  26. tracdap/rt/_impl/grpc/tracdap/metadata/model_pb2.py +1 -1
  27. tracdap/rt/_impl/grpc/tracdap/metadata/object_id_pb2.py +1 -1
  28. tracdap/rt/_impl/grpc/tracdap/metadata/object_pb2.py +1 -1
  29. tracdap/rt/_impl/grpc/tracdap/metadata/resource_pb2.py +1 -1
  30. tracdap/rt/_impl/grpc/tracdap/metadata/search_pb2.py +1 -1
  31. tracdap/rt/_impl/grpc/tracdap/metadata/storage_pb2.py +11 -9
  32. tracdap/rt/_impl/grpc/tracdap/metadata/storage_pb2.pyi +11 -2
  33. tracdap/rt/_impl/grpc/tracdap/metadata/tag_pb2.py +1 -1
  34. tracdap/rt/_impl/grpc/tracdap/metadata/tag_update_pb2.py +1 -1
  35. tracdap/rt/_impl/grpc/tracdap/metadata/type_pb2.py +23 -19
  36. tracdap/rt/_impl/grpc/tracdap/metadata/type_pb2.pyi +15 -2
  37. tracdap/rt/_impl/runtime.py +3 -9
  38. tracdap/rt/_impl/static_api.py +5 -6
  39. tracdap/rt/_plugins/format_csv.py +2 -2
  40. tracdap/rt/_plugins/repo_git.py +56 -11
  41. tracdap/rt/_plugins/storage_aws.py +165 -150
  42. tracdap/rt/_plugins/storage_azure.py +17 -11
  43. tracdap/rt/_plugins/storage_gcp.py +35 -18
  44. tracdap/rt/_version.py +1 -1
  45. tracdap/rt/api/model_api.py +45 -0
  46. tracdap/rt/config/__init__.py +7 -9
  47. tracdap/rt/config/common.py +3 -14
  48. tracdap/rt/config/job.py +17 -3
  49. tracdap/rt/config/platform.py +9 -32
  50. tracdap/rt/config/result.py +8 -4
  51. tracdap/rt/config/runtime.py +5 -10
  52. tracdap/rt/config/tenant.py +28 -0
  53. tracdap/rt/launch/cli.py +0 -8
  54. tracdap/rt/launch/launch.py +1 -3
  55. tracdap/rt/metadata/__init__.py +35 -35
  56. tracdap/rt/metadata/data.py +19 -31
  57. tracdap/rt/metadata/job.py +3 -1
  58. tracdap/rt/metadata/storage.py +9 -0
  59. tracdap/rt/metadata/type.py +9 -5
  60. {tracdap_runtime-0.8.0rc2.dist-info → tracdap_runtime-0.9.0b2.dist-info}/METADATA +5 -3
  61. {tracdap_runtime-0.8.0rc2.dist-info → tracdap_runtime-0.9.0b2.dist-info}/RECORD +64 -63
  62. {tracdap_runtime-0.8.0rc2.dist-info → tracdap_runtime-0.9.0b2.dist-info}/WHEEL +1 -1
  63. {tracdap_runtime-0.8.0rc2.dist-info → tracdap_runtime-0.9.0b2.dist-info}/licenses/LICENSE +0 -0
  64. {tracdap_runtime-0.8.0rc2.dist-info → tracdap_runtime-0.9.0b2.dist-info}/top_level.txt +0 -0
@@ -17,7 +17,6 @@ import copy as cp
17
17
  import dataclasses as dc
18
18
  import enum
19
19
  import io
20
- import pathlib
21
20
  import typing as tp
22
21
 
23
22
  import tracdap.rt.metadata as _meta
@@ -84,43 +83,41 @@ class _EngineContext:
84
83
 
85
84
 
86
85
  @dc.dataclass
87
- class _JobResultSpec:
86
+ class _JobLog:
88
87
 
89
- save_result: bool = False
90
- result_dir: tp.Union[str, pathlib.Path] = None
91
- result_format: str = None
88
+ log_init: "dc.InitVar[tp.Optional[_JobLog]]" = None
89
+
90
+ log_file_needed: bool = True
91
+
92
+ log_buffer: io.BytesIO = None
93
+ log_provider: _logging.LogProvider = None
94
+
95
+ def __post_init__(self, log_init):
96
+
97
+ if log_init is not None:
98
+ self.log_provider = log_init.log_provider
99
+ self.log_file_needed = False
100
+ elif self.log_file_needed:
101
+ self.log_buffer = io.BytesIO()
102
+ self.log_provider = _logging.job_log_provider(self.log_buffer)
103
+ else:
104
+ self.log_provider = _logging.LogProvider()
92
105
 
93
106
 
94
107
  @dc.dataclass
95
108
  class _JobState:
96
109
 
97
110
  job_id: _meta.TagHeader
98
- log_init: dc.InitVar[tp.Optional[_logging.LogProvider]] = None
111
+ job_config: _cfg.JobConfig = None
112
+ parent_key: str = None
99
113
 
100
114
  actor_id: _actors.ActorId = None
101
115
  monitors: tp.List[_actors.ActorId] = dc.field(default_factory=list)
116
+ job_log: _JobLog = None
102
117
 
103
- job_config: _cfg.JobConfig = None
104
118
  job_result: _cfg.JobResult = None
105
119
  job_error: Exception = None
106
120
 
107
- parent_key: str = None
108
- result_spec: _JobResultSpec = None
109
-
110
- log_buffer: io.BytesIO = None
111
- log_provider: _logging.LogProvider = None
112
- log: _logging.Logger = None
113
-
114
- def __post_init__(self, log_init):
115
-
116
- if isinstance(self.log, _logging.LogProvider):
117
- self.log_provider = log_init
118
- else:
119
- self.log_buffer = io.BytesIO()
120
- self.log_provider = _logging.job_log_provider(self.log_buffer)
121
-
122
- self.log = self.log_provider.logger_for_class(TracEngine)
123
-
124
121
 
125
122
  class TracEngine(_actors.Actor):
126
123
 
@@ -188,22 +185,25 @@ class TracEngine(_actors.Actor):
188
185
  return super().on_signal(signal)
189
186
 
190
187
  @_actors.Message
191
- def submit_job(
192
- self, job_config: _cfg.JobConfig,
193
- job_result_dir: str,
194
- job_result_format: str):
188
+ def submit_job(self, job_config: _cfg.JobConfig):
195
189
 
196
190
  job_key = _util.object_key(job_config.jobId)
191
+
192
+ self._log.info(f"Received a new job: [{job_key}]")
193
+
197
194
  job_state = _JobState(job_config.jobId)
195
+ job_state.job_config = job_config
198
196
 
199
- job_state.log.info(f"Job submitted: [{job_key}]")
197
+ job_logs_enabled = _util.read_property(
198
+ job_config.properties,
199
+ _cfg_p.ConfigKeys.RESULT_LOGS_ENABLED,
200
+ False, bool)
200
201
 
201
- result_needed = bool(job_result_dir)
202
- result_spec = _JobResultSpec(result_needed, job_result_dir, job_result_format)
202
+ job_log = _JobLog(log_file_needed=job_logs_enabled)
203
203
 
204
204
  job_processor = JobProcessor(
205
- self._sys_config, self._models, self._storage, job_state.log_provider,
206
- job_key, job_config, graph_spec=None)
205
+ self._sys_config, self._models, self._storage,
206
+ job_key, job_config, graph_spec=None, job_log=job_log)
207
207
 
208
208
  job_actor_id = self.actors().spawn(job_processor)
209
209
 
@@ -214,8 +214,7 @@ class TracEngine(_actors.Actor):
214
214
 
215
215
  job_state.actor_id = job_actor_id
216
216
  job_state.monitors.append(job_monitor_id)
217
- job_state.job_config = job_config
218
- job_state.result_spec = result_spec
217
+ job_state.job_log = job_log
219
218
 
220
219
  self._jobs[job_key] = job_state
221
220
 
@@ -231,17 +230,26 @@ class TracEngine(_actors.Actor):
231
230
 
232
231
  child_key = _util.object_key(child_id)
233
232
 
233
+ self._log.info(f"Received a child job: [{child_key}] for parent [{parent_key}]")
234
+
235
+ # Copy job config properties from parent job
236
+ child_config = _cfg.JobConfig()
237
+ child_config.properties.update(parent_state.job_config.properties)
238
+
239
+ child_job_log = _JobLog(parent_state.job_log)
240
+
234
241
  child_processor = JobProcessor(
235
- self._sys_config, self._models, self._storage, parent_state.log_provider,
236
- child_key, None, graph_spec=child_graph)
242
+ self._sys_config, self._models, self._storage,
243
+ child_key, None, graph_spec=child_graph, job_log=child_job_log)
237
244
 
238
245
  child_actor_id = self.actors().spawn(child_processor)
239
246
 
240
- child_state = _JobState(child_id, parent_state.log_provider)
247
+ child_state = _JobState(child_id)
248
+ child_state.job_config = child_config
249
+ child_state.parent_key = parent_key
241
250
  child_state.actor_id = child_actor_id
242
251
  child_state.monitors.append(monitor_id)
243
- child_state.parent_key = parent_key
244
- child_state.result_spec = _JobResultSpec(False) # Do not output separate results for child jobs
252
+ child_state.job_log = child_job_log
245
253
 
246
254
  self._jobs[child_key] = child_state
247
255
 
@@ -265,9 +273,9 @@ class TracEngine(_actors.Actor):
265
273
  self._log.warning(f"Ignoring [job_succeeded] message, job [{job_key}] has already completed")
266
274
  return
267
275
 
268
- job_state = self._jobs[job_key]
269
- job_state.log.info(f"Recording job as successful: {job_key}")
276
+ self._log.info(f"Marking job as successful: {job_key}")
270
277
 
278
+ job_state = self._jobs[job_key]
271
279
  job_state.job_result = job_result
272
280
 
273
281
  for monitor_id in job_state.monitors:
@@ -276,36 +284,30 @@ class TracEngine(_actors.Actor):
276
284
  self._finalize_job(job_key)
277
285
 
278
286
  @_actors.Message
279
- def job_failed(self, job_key: str, error: Exception):
287
+ def job_failed(self, job_key: str, error: Exception, job_result: tp.Optional[_cfg.JobResult] = None):
280
288
 
281
289
  # Ignore duplicate messages from the job processor (can happen in unusual error cases)
282
290
  if job_key not in self._jobs:
283
291
  self._log.warning(f"Ignoring [job_failed] message, job [{job_key}] has already completed")
284
292
  return
285
293
 
286
- job_state = self._jobs[job_key]
287
- job_state.log.error(f"Recording job as failed: {job_key}")
288
-
289
- job_state.job_error = error
290
-
291
- # Create a failed result so there is something to report
292
- result_id = job_state.job_config.resultMapping.get("trac_job_result")
294
+ self._log.error(f"Marking job as failed: {job_key}")
293
295
 
294
- if result_id is not None:
295
-
296
- job_state.job_result = _cfg.JobResult(
297
- jobId=job_state.job_id,
298
- statusCode=_meta.JobStatusCode.FAILED,
299
- statusMessage=str(error))
296
+ job_state = self._jobs[job_key]
300
297
 
298
+ # Build a failed result if none is supplied by the job processor (should not normally happen)
299
+ # In this case, no job log will be included in the output
300
+ if job_result is None and job_state.job_config is not None:
301
+ job_id = job_state.job_id
302
+ result_id = job_state.job_config.resultId
301
303
  result_def = _meta.ResultDefinition()
302
304
  result_def.jobId = _util.selector_for(job_state.job_id)
303
305
  result_def.statusCode = _meta.JobStatusCode.FAILED
306
+ result_def.statusMessage = str(error)
307
+ job_result = _cfg.JobResult(job_id, result_id, result_def)
304
308
 
305
- result_key = _util.object_key(result_id)
306
- result_obj = _meta.ObjectDefinition(objectType=_meta.ObjectType.RESULT, result=result_def)
307
-
308
- job_state.job_result.results[result_key] = result_obj
309
+ job_state.job_result = job_result
310
+ job_state.job_error = error
309
311
 
310
312
  for monitor_id in job_state.monitors:
311
313
  self.actors().send(monitor_id, "job_failed", error)
@@ -321,12 +323,13 @@ class TracEngine(_actors.Actor):
321
323
 
322
324
  job_state = self._jobs.get(job_key)
323
325
 
324
- # Record output metadata if required (not needed for local runs or when using API server)
325
- if job_state.parent_key is None and job_state.result_spec.save_result:
326
-
327
- if "trac_job_log_file" in job_state.job_config.resultMapping:
328
- self._save_job_log_file(job_key, job_state)
326
+ result_enabled = _util.read_property(
327
+ job_state.job_config.properties,
328
+ _cfg_p.ConfigKeys.RESULT_ENABLED,
329
+ False, bool)
329
330
 
331
+ # Record output metadata if required
332
+ if result_enabled and job_state.parent_key is None:
330
333
  self._save_job_result(job_key, job_state)
331
334
 
332
335
  # Stop any monitors that were created directly by the engine
@@ -341,53 +344,17 @@ class TracEngine(_actors.Actor):
341
344
  self.actors().stop(job_state.actor_id)
342
345
  job_state.actor_id = None
343
346
 
344
- def _save_job_log_file(self, job_key: str, job_state: _JobState):
345
-
346
- self._log.info(f"Saving job log file for [{job_key}]")
347
-
348
- # Saving log files could go into a separate actor, perhaps a job monitor along with _save_job_result()
349
-
350
- file_id = job_state.job_config.resultMapping["trac_job_log_file"]
351
- storage_id = job_state.job_config.resultMapping["trac_job_log_file:STORAGE"]
352
-
353
- file_type = _meta.FileType("TXT", "text/plain")
354
- file_def, storage_def = _graph.GraphBuilder.build_output_file_and_storage(
355
- "trac_job_log_file", file_type,
356
- self._sys_config, job_state.job_config)
357
-
358
- storage_item = storage_def.dataItems[file_def.dataItem].incarnations[0].copies[0]
359
- storage = self._storage.get_file_storage(storage_item.storageKey)
360
-
361
- with storage.write_byte_stream(storage_item.storagePath) as stream:
362
- stream.write(job_state.log_buffer.getbuffer())
363
- file_def.size = stream.tell()
364
-
365
- result_id = job_state.job_config.resultMapping["trac_job_result"]
366
- result_def = job_state.job_result.results[_util.object_key(result_id)].result
367
- result_def.logFileId = _util.selector_for(file_id)
368
-
369
- file_obj = _meta.ObjectDefinition(objectType=_meta.ObjectType.FILE, file=file_def)
370
- storage_obj = _meta.ObjectDefinition(objectType=_meta.ObjectType.STORAGE, storage=storage_def)
371
-
372
- job_state.job_result.results[_util.object_key(file_id)] = file_obj
373
- job_state.job_result.results[_util.object_key(storage_id)] = storage_obj
374
-
375
347
  def _save_job_result(self, job_key: str, job_state: _JobState):
376
348
 
377
349
  self._log.info(f"Saving job result for [{job_key}]")
378
350
 
379
- # It might be better abstract reporting of results, job status etc., perhaps with a job monitor
380
-
381
- if job_state.result_spec.save_result:
351
+ storage_key = _util.read_property(job_state.job_config.properties, _cfg_p.ConfigKeys.RESULT_STORAGE_LOCATION)
352
+ storage_path = _util.read_property(job_state.job_config.properties, _cfg_p.ConfigKeys.RESULT_STORAGE_PATH)
353
+ result_format = _util.read_property(job_state.job_config.properties, _cfg_p.ConfigKeys.RESULT_FORMAT, "JSON")
354
+ result_content = _cfg_p.ConfigQuoter.quote(job_state.job_result, result_format)
382
355
 
383
- result_format = job_state.result_spec.result_format
384
- result_dir = job_state.result_spec.result_dir
385
- result_file = f"job_result_{job_key}.{result_format}"
386
- result_path = pathlib.Path(result_dir).joinpath(result_file)
387
-
388
- with open(result_path, "xt") as result_stream:
389
- result_content = _cfg_p.ConfigQuoter.quote(job_state.job_result, result_format)
390
- result_stream.write(result_content)
356
+ storage = self._storage.get_file_storage(storage_key)
357
+ storage.write_bytes(storage_path, result_content.encode('utf-8'))
391
358
 
392
359
  def _get_job_info(self, job_key: str, details: bool = False) -> tp.Optional[_cfg.JobResult]:
393
360
 
@@ -398,24 +365,28 @@ class TracEngine(_actors.Actor):
398
365
 
399
366
  job_result = _cfg.JobResult()
400
367
  job_result.jobId = job_state.job_id
368
+ job_result.resultId = job_state.job_config.resultId
369
+ job_result.result = _meta.ResultDefinition()
370
+ job_result.result.jobId = _util.selector_for(job_state.job_id)
401
371
 
402
372
  if job_state.actor_id is not None:
403
- job_result.statusCode = _meta.JobStatusCode.RUNNING
373
+ job_result.result.statusCode = _meta.JobStatusCode.RUNNING
404
374
 
405
375
  elif job_state.job_result is not None:
406
- job_result.statusCode = job_state.job_result.statusCode
407
- job_result.statusMessage = job_state.job_result.statusMessage
376
+ job_result.result.statusCode = job_state.job_result.result.statusCode
377
+ job_result.result.statusMessage = job_state.job_result.result.statusMessage
408
378
  if details:
409
- job_result.results = job_state.job_result.results or dict()
379
+ job_result.objectIds = job_state.job_result.objectIds or list()
380
+ job_result.objects = job_state.job_result.objects or dict()
410
381
 
411
382
  elif job_state.job_error is not None:
412
- job_result.statusCode = _meta.JobStatusCode.FAILED
413
- job_result.statusMessage = str(job_state.job_error.args[0])
383
+ job_result.result.statusCode = _meta.JobStatusCode.FAILED
384
+ job_result.result.statusMessage = str(job_state.job_error.args[0])
414
385
 
415
386
  else:
416
387
  # Alternatively return UNKNOWN status or throw an error here
417
- job_result.statusCode = _meta.JobStatusCode.FAILED
418
- job_result.statusMessage = "No details available"
388
+ job_result.result.statusCode = _meta.JobStatusCode.FAILED
389
+ job_result.result.statusMessage = "No details available"
419
390
 
420
391
  return job_result
421
392
 
@@ -458,8 +429,9 @@ class JobProcessor(_actors.Actor):
458
429
 
459
430
  def __init__(
460
431
  self, sys_config: _cfg.RuntimeConfig,
461
- models: _models.ModelLoader, storage: _storage.StorageManager, log_provider: _logging.LogProvider,
462
- job_key: str, job_config: tp.Optional[_cfg.JobConfig], graph_spec: tp.Optional[_graph.Graph]):
432
+ models: _models.ModelLoader, storage: _storage.StorageManager,
433
+ job_key: str, job_config: tp.Optional[_cfg.JobConfig], graph_spec: tp.Optional[_graph.Graph],
434
+ job_log: tp.Optional[_JobLog] = None):
463
435
 
464
436
  super().__init__()
465
437
 
@@ -473,9 +445,15 @@ class JobProcessor(_actors.Actor):
473
445
  self._sys_config = sys_config
474
446
  self._models = models
475
447
  self._storage = storage
476
- self._log_provider = log_provider
477
- self._resolver = _func.FunctionResolver(models, storage, log_provider)
478
- self._log = log_provider.logger_for_object(self)
448
+
449
+ self._job_log = job_log if job_log is not None else _JobLog()
450
+ self._log_provider = self._job_log.log_provider
451
+ self._log = self._job_log.log_provider.logger_for_object(self)
452
+
453
+ self._log.info(f"New job created for [{self.job_key}]")
454
+
455
+ self._resolver = _func.FunctionResolver(models, storage, self._log_provider)
456
+ self._preallocated_ids: tp.Dict[_meta.ObjectType, tp.List[_meta.TagHeader]] = dict()
479
457
 
480
458
  def on_start(self):
481
459
 
@@ -513,7 +491,10 @@ class JobProcessor(_actors.Actor):
513
491
  return super().on_signal(signal)
514
492
 
515
493
  @_actors.Message
516
- def build_graph_succeeded(self, graph_spec: _graph.Graph):
494
+ def build_graph_succeeded(self, graph_spec: _graph.Graph, unallocated_ids = None):
495
+
496
+ # Save any unallocated IDs to use later (needed for saving the log file)
497
+ self._preallocated_ids = unallocated_ids or dict()
517
498
 
518
499
  # Build a new engine context graph from the graph spec
519
500
  engine_id = self.actors().parent
@@ -524,6 +505,7 @@ class JobProcessor(_actors.Actor):
524
505
  graph.pending_nodes.update(graph.nodes.keys())
525
506
 
526
507
  self.actors().spawn(FunctionResolver(self._resolver, self._log_provider, graph))
508
+
527
509
  if self.actors().sender != self.actors().id and self.actors().sender != self.actors().parent:
528
510
  self.actors().stop(self.actors().sender)
529
511
 
@@ -531,20 +513,103 @@ class JobProcessor(_actors.Actor):
531
513
  def resolve_functions_succeeded(self, graph: _EngineContext):
532
514
 
533
515
  self.actors().spawn(GraphProcessor(graph, self._resolver, self._log_provider))
516
+
534
517
  if self.actors().sender != self.actors().id and self.actors().sender != self.actors().parent:
535
518
  self.actors().stop(self.actors().sender)
536
519
 
537
520
  @_actors.Message
538
521
  def job_succeeded(self, job_result: _cfg.JobResult):
539
- self._log.info(f"Job succeeded {self.job_key}")
522
+
523
+ # This will be the last message in the job log file
524
+ self._log.info(f"Job succeeded [{self.job_key}]")
525
+
526
+ if self._job_log.log_file_needed:
527
+ self._save_job_log_file(job_result)
528
+
540
529
  self.actors().stop(self.actors().sender)
541
530
  self.actors().send_parent("job_succeeded", self.job_key, job_result)
542
531
 
543
532
  @_actors.Message
544
533
  def job_failed(self, error: Exception):
545
- self._log.error(f"Job failed {self.job_key}")
534
+
535
+ # This will be the last message in the job log file
536
+ self._log.error(f"Job failed [{self.job_key}]")
537
+
546
538
  self.actors().stop(self.actors().sender)
547
- self.actors().send_parent("job_failed", self.job_key, error)
539
+
540
+ # For top level jobs, build a failed job result and save the log file
541
+ if self.job_config is not None:
542
+
543
+ job_id = self.job_config.jobId
544
+ result_id = self.job_config.resultId
545
+ result_def = _meta.ResultDefinition()
546
+ result_def.jobId = _util.selector_for(job_id)
547
+ result_def.statusCode = _meta.JobStatusCode.FAILED
548
+ result_def.statusMessage = str(error)
549
+ job_result = _cfg.JobResult(job_id, result_id, result_def)
550
+
551
+ if self._job_log.log_file_needed:
552
+ self._save_job_log_file(job_result)
553
+
554
+ self.actors().send_parent("job_failed", self.job_key, error, job_result)
555
+
556
+ # For child jobs, just send the error response
557
+ # Result and log file will be handled in the top level job
558
+ else:
559
+ self.actors().send_parent("job_failed", self.job_key, error)
560
+
561
+ def _save_job_log_file(self, job_result: _cfg.JobResult):
562
+
563
+ # Do not fail the job if log content is not available
564
+ if self._job_log.log_buffer is None:
565
+ self._log.warning(f"Job log not available for [{self.job_key}]")
566
+ return
567
+
568
+ # Saving log files could go into a separate actor
569
+
570
+ file_id = self._allocate_id(_meta.ObjectType.FILE)
571
+ storage_id = self._allocate_id(_meta.ObjectType.STORAGE)
572
+
573
+ self._log.info(f"Saving job log [{_util.object_key(file_id)}]")
574
+
575
+ file_name = "trac_job_log_file"
576
+ file_type = _meta.FileType("TXT", "text/plain")
577
+
578
+ file_spec = _data.build_file_spec(
579
+ file_id, storage_id,
580
+ file_name, file_type,
581
+ self._sys_config)
582
+
583
+ file_def = file_spec.definition
584
+ storage_def = file_spec.storage
585
+
586
+ storage_item = storage_def.dataItems[file_def.dataItem].incarnations[0].copies[0]
587
+ storage = self._storage.get_file_storage(storage_item.storageKey)
588
+
589
+ with storage.write_byte_stream(storage_item.storagePath) as stream:
590
+ stream.write(self._job_log.log_buffer.getbuffer())
591
+ file_def.size = stream.tell()
592
+
593
+ result_def = job_result.result
594
+ result_def.logFileId = _util.selector_for(file_id)
595
+
596
+ file_obj = _meta.ObjectDefinition(objectType=_meta.ObjectType.FILE, file=file_def)
597
+ storage_obj = _meta.ObjectDefinition(objectType=_meta.ObjectType.STORAGE, storage=storage_def)
598
+
599
+ job_result.objectIds.append(file_id)
600
+ job_result.objectIds.append(storage_id)
601
+ job_result.objects[_util.object_key(file_id)] = file_obj
602
+ job_result.objects[_util.object_key(storage_id)] = storage_obj
603
+
604
+ def _allocate_id(self, object_type: _meta.ObjectType):
605
+
606
+ preallocated_ids = self._preallocated_ids.get(object_type)
607
+
608
+ if preallocated_ids:
609
+ # Preallocated IDs have objectVersion = 0, use a new version to get objectVersion = 1
610
+ return _util.new_object_version(preallocated_ids.pop())
611
+ else:
612
+ return _util.new_object_id(object_type)
548
613
 
549
614
 
550
615
  class GraphBuilder(_actors.Actor):
@@ -570,8 +635,9 @@ class GraphBuilder(_actors.Actor):
570
635
 
571
636
  graph_builder = _graph.GraphBuilder(self.sys_config, job_config)
572
637
  graph_spec = graph_builder.build_job(job_config.job)
638
+ unallocated_ids = graph_builder.unallocated_ids()
573
639
 
574
- self.actors().reply("build_graph_succeeded", graph_spec)
640
+ self.actors().reply("build_graph_succeeded", graph_spec, unallocated_ids)
575
641
 
576
642
 
577
643
  class FunctionResolver(_actors.Actor):
@@ -704,23 +770,21 @@ class GraphProcessor(_actors.Actor):
704
770
  self.check_job_status(do_submit=False)
705
771
 
706
772
  @_actors.Message
707
- def update_graph(
708
- self, requestor_id: NodeId,
709
- new_nodes: tp.Dict[NodeId, _graph.Node],
710
- new_deps: tp.Dict[NodeId, tp.List[_graph.Dependency]]):
773
+ def update_graph(self, requestor_id: NodeId, update: _graph.GraphUpdate):
711
774
 
712
775
  new_graph = cp.copy(self.graph)
713
776
  new_graph.nodes = cp.copy(new_graph.nodes)
714
777
 
715
778
  # Attempt to insert a duplicate node is always an error
716
- node_collision = list(filter(lambda nid: nid in self.graph.nodes, new_nodes))
779
+ node_collision = list(filter(lambda nid: nid in self.graph.nodes, update.nodes))
717
780
 
718
781
  # Only allow adding deps to pending nodes for now (adding deps to active nodes will require more work)
719
- dep_collision = list(filter(lambda nid: nid not in self.graph.pending_nodes, new_deps))
782
+ dep_collision = list(filter(lambda nid: nid not in self.graph.pending_nodes, update.dependencies))
720
783
 
784
+ # Only allow adding deps to new nodes (deps to existing nodes should not be part of an update)
721
785
  dep_invalid = list(filter(
722
- lambda dds: any(filter(lambda dd: dd.node_id not in new_nodes, dds)),
723
- new_deps.values()))
786
+ lambda ds: any(filter(lambda d: d.node_id not in update.nodes, ds)),
787
+ update.dependencies.values()))
724
788
 
725
789
  if any(node_collision) or any(dep_collision) or any(dep_invalid):
726
790
 
@@ -736,18 +800,20 @@ class GraphProcessor(_actors.Actor):
736
800
  requestor.error = _ex.ETracInternal("Node collision during graph update")
737
801
  new_graph.nodes[requestor_id] = requestor
738
802
 
803
+ self.graph = new_graph
804
+
739
805
  return
740
806
 
741
807
  new_graph.pending_nodes = cp.copy(new_graph.pending_nodes)
742
808
 
743
- for node_id, node in new_nodes.items():
809
+ for node_id, node in update.nodes.items():
744
810
  self._graph_logger.log_node_add(node)
745
811
  node_func = self._resolver.resolve_node(node)
746
812
  new_node = _EngineNode(node, node_func)
747
813
  new_graph.nodes[node_id] = new_node
748
814
  new_graph.pending_nodes.add(node_id)
749
815
 
750
- for node_id, deps in new_deps.items():
816
+ for node_id, deps in update.dependencies.items():
751
817
  engine_node = cp.copy(new_graph.nodes[node_id])
752
818
  engine_node.dependencies = cp.copy(engine_node.dependencies)
753
819
  for dep in deps:
@@ -1302,8 +1368,5 @@ class NodeCallbackImpl(_func.NodeCallback):
1302
1368
  self.__actor_ctx = actor_ctx
1303
1369
  self.__node_id = node_id
1304
1370
 
1305
- def send_graph_updates(
1306
- self, new_nodes: tp.Dict[NodeId, _graph.Node],
1307
- new_deps: tp.Dict[NodeId, tp.List[_graph.Dependency]]):
1308
-
1309
- self.__actor_ctx.send_parent("update_graph", self.__node_id, new_nodes, new_deps)
1371
+ def send_graph_update(self, update: _graph.GraphUpdate):
1372
+ self.__actor_ctx.send_parent("update_graph", self.__node_id, update)