tracdap-runtime 0.6.5__py3-none-any.whl → 0.6.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -22,75 +22,111 @@ from .graph import *
22
22
 
23
23
  class GraphBuilder:
24
24
 
25
- __JOB_BUILD_FUNC = tp.Callable[
26
- [config.JobConfig, JobResultSpec, NodeNamespace, NodeId],
27
- GraphSection]
25
+ __JOB_DETAILS = tp.TypeVar(
26
+ "__JOB_DETAILS",
27
+ meta.RunModelJob,
28
+ meta.RunFlowJob,
29
+ meta.ImportModelJob,
30
+ meta.ImportDataJob,
31
+ meta.ExportDataJob)
28
32
 
29
- @classmethod
30
- def build_job(
31
- cls, job_config: config.JobConfig,
32
- result_spec: JobResultSpec) -> Graph:
33
+ __JOB_BUILD_FUNC = tp.Callable[[meta.JobDefinition, NodeId], GraphSection]
33
34
 
34
- if job_config.job.jobType == meta.JobType.IMPORT_MODEL:
35
- return cls.build_standard_job(job_config, result_spec, cls.build_import_model_job)
35
+ def __init__(self, job_config: config.JobConfig, result_spec: JobResultSpec):
36
36
 
37
- if job_config.job.jobType == meta.JobType.RUN_MODEL:
38
- return cls.build_standard_job(job_config, result_spec, cls.build_run_model_job)
37
+ self._job_config = job_config
38
+ self._result_spec = result_spec
39
39
 
40
- if job_config.job.jobType == meta.JobType.RUN_FLOW:
41
- return cls.build_standard_job(job_config, result_spec, cls.build_run_flow_job)
40
+ self._job_key = _util.object_key(job_config.jobId)
41
+ self._job_namespace = NodeNamespace(self._job_key)
42
42
 
43
- if job_config.job.jobType in [meta.JobType.IMPORT_DATA, meta.JobType.EXPORT_DATA]:
44
- return cls.build_standard_job(job_config, result_spec, cls.build_import_export_data_job)
43
+ self._errors = []
45
44
 
46
- raise _ex.EConfigParse(f"Job type [{job_config.job.jobType}] is not supported yet")
45
+ def _child_builder(self, job_id: meta.TagHeader) -> "GraphBuilder":
47
46
 
48
- @classmethod
49
- def build_standard_job(
50
- cls, job_config: config.JobConfig, result_spec: JobResultSpec,
51
- build_func: __JOB_BUILD_FUNC):
47
+ builder = GraphBuilder(self._job_config, JobResultSpec(save_result=False))
48
+ builder._job_key = _util.object_key(job_id)
49
+ builder._job_namespace = NodeNamespace(builder._job_key)
52
50
 
53
- # Set up the job context
51
+ return builder
52
+
53
+ def build_job(self, job_def: meta.JobDefinition,) -> Graph:
54
+
55
+ try:
56
+
57
+ if job_def.jobType == meta.JobType.IMPORT_MODEL:
58
+ return self.build_standard_job(job_def, self.build_import_model_job)
59
+
60
+ if job_def.jobType == meta.JobType.RUN_MODEL:
61
+ return self.build_standard_job(job_def, self.build_run_model_job)
62
+
63
+ if job_def.jobType == meta.JobType.RUN_FLOW:
64
+ return self.build_standard_job(job_def, self.build_run_flow_job)
65
+
66
+ if job_def.jobType in [meta.JobType.IMPORT_DATA, meta.JobType.EXPORT_DATA]:
67
+ return self.build_standard_job(job_def, self.build_import_export_data_job)
68
+
69
+ if job_def.jobType == meta.JobType.JOB_GROUP:
70
+ return self.build_standard_job(job_def, self.build_job_group)
71
+
72
+ self._error(_ex.EJobValidation(f"Job type [{job_def.jobType.name}] is not supported yet"))
54
73
 
55
- job_key = _util.object_key(job_config.jobId)
56
- job_namespace = NodeNamespace(job_key)
74
+ except Exception as e:
57
75
 
58
- push_id = NodeId("trac_job_push", job_namespace, Bundle[tp.Any])
59
- push_node = ContextPushNode(push_id, job_namespace)
76
+ # If there are recorded, errors, assume unhandled exceptions are a result of those
77
+ # Only report the recorded errors, to reduce noise
78
+ if any(self._errors):
79
+ pass
80
+
81
+ # If no errors are recorded, an exception here would be a bug
82
+ raise _ex.ETracInternal(f"Unexpected error preparing the job execution graph") from e
83
+
84
+ finally:
85
+
86
+ if any(self._errors):
87
+
88
+ if len(self._errors) == 1:
89
+ raise self._errors[0]
90
+ else:
91
+ err_text = "\n".join(map(str, self._errors))
92
+ raise _ex.EJobValidation("Invalid job configuration\n" + err_text)
93
+
94
+ def build_standard_job(self, job_def: meta.JobDefinition, build_func: __JOB_BUILD_FUNC):
95
+
96
+ # Set up the job context
97
+
98
+ push_id = NodeId("trac_job_push", self._job_namespace, Bundle[tp.Any])
99
+ push_node = ContextPushNode(push_id, self._job_namespace)
60
100
  push_section = GraphSection({push_id: push_node}, must_run=[push_id])
61
101
 
62
102
  # Build the execution graphs for the main job and results recording
63
103
 
64
- main_section = build_func(job_config, result_spec, job_namespace, push_id)
65
- main_result_id = NodeId.of("trac_build_result", job_namespace, config.JobResult)
104
+ main_section = build_func(job_def, push_id)
105
+ main_result_id = NodeId.of("trac_job_result", self._job_namespace, config.JobResult)
66
106
 
67
107
  # Clean up the job context
68
108
 
69
- global_result_id = NodeId.of(job_key, NodeNamespace.root(), config.JobResult)
109
+ global_result_id = NodeId.of(self._job_key, NodeNamespace.root(), config.JobResult)
70
110
 
71
- pop_id = NodeId("trac_job_pop", job_namespace, Bundle[tp.Any])
111
+ pop_id = NodeId("trac_job_pop", self._job_namespace, Bundle[tp.Any])
72
112
  pop_mapping = {main_result_id: global_result_id}
73
113
 
74
114
  pop_node = ContextPopNode(
75
- pop_id, job_namespace, pop_mapping,
115
+ pop_id, self._job_namespace, pop_mapping,
76
116
  explicit_deps=main_section.must_run,
77
117
  bundle=NodeNamespace.root())
78
118
 
79
- global_result_node = BundleItemNode(global_result_id, pop_id, job_key)
119
+ global_result_node = BundleItemNode(global_result_id, pop_id, self._job_key)
80
120
 
81
121
  pop_section = GraphSection({
82
122
  pop_id: pop_node,
83
123
  global_result_id: global_result_node})
84
124
 
85
- job = cls._join_sections(push_section, main_section, pop_section)
125
+ job = self._join_sections(push_section, main_section, pop_section)
86
126
 
87
127
  return Graph(job.nodes, global_result_id)
88
128
 
89
- @classmethod
90
- def build_import_model_job(
91
- cls, job_config: config.JobConfig, result_spec: JobResultSpec,
92
- job_namespace: NodeNamespace, job_push_id: NodeId) \
93
- -> GraphSection:
129
+ def build_import_model_job(self, job_def: meta.JobDefinition, job_push_id: NodeId) -> GraphSection:
94
130
 
95
131
  # Main section: run the model import
96
132
 
@@ -98,82 +134,142 @@ class GraphBuilder:
98
134
  new_model_id = _util.new_object_id(meta.ObjectType.MODEL)
99
135
  new_model_key = _util.object_key(new_model_id)
100
136
 
101
- model_scope = _util.object_key(job_config.jobId)
102
- import_details = job_config.job.importModel
137
+ model_scope = self._job_key
138
+ import_details = job_def.importModel
103
139
 
104
- import_id = NodeId.of("trac_import_model", job_namespace, meta.ObjectDefinition)
140
+ import_id = NodeId.of("trac_import_model", self._job_namespace, meta.ObjectDefinition)
105
141
  import_node = ImportModelNode(import_id, model_scope, import_details, explicit_deps=[job_push_id])
106
142
 
107
143
  main_section = GraphSection(nodes={import_id: import_node})
108
144
 
109
145
  # Build job-level metadata outputs
110
146
 
111
- result_section = cls.build_job_results(
112
- job_config, job_namespace, result_spec,
147
+ result_section = self.build_job_results(
113
148
  objects={new_model_key: import_id},
114
149
  explicit_deps=[job_push_id, *main_section.must_run])
115
150
 
116
- return cls._join_sections(main_section, result_section)
151
+ return self._join_sections(main_section, result_section)
117
152
 
118
- @classmethod
119
- def build_import_export_data_job(
120
- cls, job_config: config.JobConfig, result_spec: JobResultSpec,
121
- job_namespace: NodeNamespace, job_push_id: NodeId) \
122
- -> GraphSection:
153
+ def build_import_export_data_job(self, job_def: meta.JobDefinition, job_push_id: NodeId) -> GraphSection:
123
154
 
124
155
  # TODO: These are processed as regular calculation jobs for now
125
156
  # That might be ok, but is worth reviewing
126
157
 
127
- if job_config.job.jobType == meta.JobType.IMPORT_DATA:
128
- job_def = job_config.job.importData
158
+ if job_def.jobType == meta.JobType.IMPORT_DATA:
159
+ job_details = job_def.importData
129
160
  else:
130
- job_def = job_config.job.exportData
161
+ job_details = job_def.exportData
131
162
 
132
- target_selector = job_def.model
133
- target_obj = _util.get_job_resource(target_selector, job_config)
163
+ target_selector = job_details.model
164
+ target_obj = _util.get_job_resource(target_selector, self._job_config)
134
165
  target_def = target_obj.model
135
166
 
136
- return cls.build_calculation_job(
137
- job_config, result_spec, job_namespace, job_push_id,
138
- target_selector, target_def, job_def)
167
+ return self.build_calculation_job(
168
+ job_def, job_push_id,
169
+ target_selector, target_def,
170
+ job_details)
139
171
 
140
- @classmethod
141
- def build_run_model_job(
142
- cls, job_config: config.JobConfig, result_spec: JobResultSpec,
143
- job_namespace: NodeNamespace, job_push_id: NodeId) \
144
- -> GraphSection:
172
+ def build_run_model_job(self, job_def: meta.JobDefinition, job_push_id: NodeId) -> GraphSection:
173
+
174
+ job_details = job_def.runModel
145
175
 
146
- target_selector = job_config.job.runModel.model
147
- target_obj = _util.get_job_resource(target_selector, job_config)
176
+ target_selector = job_details.model
177
+ target_obj = _util.get_job_resource(target_selector, self._job_config)
148
178
  target_def = target_obj.model
149
- job_def = job_config.job.runModel
150
179
 
151
- return cls.build_calculation_job(
152
- job_config, result_spec, job_namespace, job_push_id,
153
- target_selector, target_def, job_def)
180
+ return self.build_calculation_job(
181
+ job_def, job_push_id,
182
+ target_selector, target_def,
183
+ job_details)
154
184
 
155
- @classmethod
156
- def build_run_flow_job(
157
- cls, job_config: config.JobConfig, result_spec: JobResultSpec,
158
- job_namespace: NodeNamespace, job_push_id: NodeId) \
159
- -> GraphSection:
185
+ def build_run_flow_job(self, job_def: meta.JobDefinition, job_push_id: NodeId) -> GraphSection:
186
+
187
+ job_details = job_def.runFlow
160
188
 
161
- target_selector = job_config.job.runFlow.flow
162
- target_obj = _util.get_job_resource(target_selector, job_config)
189
+ target_selector = job_details.flow
190
+ target_obj = _util.get_job_resource(target_selector, self._job_config)
163
191
  target_def = target_obj.flow
164
- job_def = job_config.job.runFlow
165
192
 
166
- return cls.build_calculation_job(
167
- job_config, result_spec, job_namespace, job_push_id,
168
- target_selector, target_def, job_def)
193
+ return self.build_calculation_job(
194
+ job_def, job_push_id,
195
+ target_selector, target_def,
196
+ job_details)
197
+
198
+ def build_job_group(self, job_def: meta.JobDefinition, job_push_id: NodeId) -> GraphSection:
199
+
200
+ job_group = job_def.jobGroup
201
+
202
+ if job_group.jobGroupType == meta.JobGroupType.SEQUENTIAL_JOB_GROUP:
203
+ return self.build_sequential_job_group(job_group, job_push_id)
204
+
205
+ if job_group.jobGroupType == meta.JobGroupType.PARALLEL_JOB_GROUP:
206
+ return self.build_parallel_job_group(job_group, job_push_id)
207
+
208
+ else:
209
+ self._error(_ex.EJobValidation(f"Job group type [{job_group.jobGroupType.name}] is not supported yet"))
210
+ return GraphSection(dict(), inputs={job_push_id})
211
+
212
+ def build_sequential_job_group(self, job_group: meta.JobGroup, job_push_id: NodeId) -> GraphSection:
213
+
214
+ nodes = dict()
215
+ prior_id = job_push_id
216
+
217
+ for child_def in job_group.sequential.jobs:
218
+
219
+ child_node = self.build_child_job(child_def, explicit_deps=[prior_id])
220
+ nodes[child_node.id] = child_node
221
+
222
+ prior_id = child_node.id
223
+
224
+ # No real results from job groups yet (they cannot be executed from the platform)
225
+ job_result = cfg.JobResult()
226
+ result_id = NodeId.of("trac_job_result", self._job_namespace, cfg.JobResult)
227
+ result_node = StaticValueNode(result_id, job_result, explicit_deps=[prior_id])
228
+ nodes[result_id] = result_node
229
+
230
+ return GraphSection(nodes, inputs={job_push_id}, outputs={result_id})
231
+
232
+ def build_parallel_job_group(self, job_group: meta.JobGroup, job_push_id: NodeId) -> GraphSection:
233
+
234
+ nodes = dict()
235
+ parallel_ids = [job_push_id]
236
+
237
+ for child_def in job_group.parallel.jobs:
238
+
239
+ child_node = self.build_child_job(child_def, explicit_deps=[job_push_id])
240
+ nodes[child_node.id] = child_node
241
+
242
+ parallel_ids.append(child_node.id)
243
+
244
+ # No real results from job groups yet (they cannot be executed from the platform)
245
+ job_result = cfg.JobResult()
246
+ result_id = NodeId.of("trac_job_result", self._job_namespace, cfg.JobResult)
247
+ result_node = StaticValueNode(result_id, job_result, explicit_deps=parallel_ids)
248
+ nodes[result_id] = result_node
249
+
250
+ return GraphSection(nodes, inputs={job_push_id}, outputs={result_id})
251
+
252
+ def build_child_job(self, child_job_def: meta.JobDefinition, explicit_deps) -> Node[config.JobResult]:
253
+
254
+ child_job_id = _util.new_object_id(meta.ObjectType.JOB)
255
+
256
+ child_builder = self._child_builder(child_job_id)
257
+ child_graph = child_builder.build_job(child_job_def)
258
+
259
+ child_node_name = _util.object_key(child_job_id)
260
+ child_node_id = NodeId.of(child_node_name, self._job_namespace, cfg.JobResult)
261
+
262
+ child_node = ChildJobNode(
263
+ child_node_id, child_job_id, child_job_def,
264
+ child_graph, explicit_deps)
265
+
266
+ return child_node
169
267
 
170
- @classmethod
171
268
  def build_calculation_job(
172
- cls, job_config: config.JobConfig, result_spec: JobResultSpec,
173
- job_namespace: NodeNamespace, job_push_id: NodeId,
269
+ self, job_def: meta.JobDefinition, job_push_id: NodeId,
174
270
  target_selector: meta.TagSelector,
175
271
  target_def: tp.Union[meta.ModelDefinition, meta.FlowDefinition],
176
- job_def: tp.Union[meta.RunModelJob, meta.RunFlowJob]) \
272
+ job_details: __JOB_DETAILS) \
177
273
  -> GraphSection:
178
274
 
179
275
  # The main execution graph can run directly in the job context, no need to do a context push
@@ -185,29 +281,30 @@ class GraphBuilder:
185
281
  required_inputs = target_def.inputs
186
282
  required_outputs = target_def.outputs
187
283
 
188
- provided_params = job_def.parameters
189
- provided_inputs = job_def.inputs
190
- provided_outputs = job_def.outputs
284
+ provided_params = job_details.parameters
285
+ provided_inputs = job_details.inputs
286
+ provided_outputs = job_details.outputs
191
287
 
192
- params_section = cls.build_job_parameters(
193
- job_namespace, required_params, provided_params,
288
+ params_section = self.build_job_parameters(
289
+ required_params, provided_params,
194
290
  explicit_deps=[job_push_id])
195
291
 
196
- input_section = cls.build_job_inputs(
197
- job_config, job_namespace, required_inputs, provided_inputs,
292
+ input_section = self.build_job_inputs(
293
+ required_inputs, provided_inputs,
198
294
  explicit_deps=[job_push_id])
199
295
 
200
- exec_obj = _util.get_job_resource(target_selector, job_config)
296
+ exec_namespace = self._job_namespace
297
+ exec_obj = _util.get_job_resource(target_selector, self._job_config)
201
298
 
202
- exec_section = cls.build_model_or_flow(
203
- job_config, job_namespace, exec_obj,
299
+ exec_section = self.build_model_or_flow(
300
+ exec_namespace, job_def, exec_obj,
204
301
  explicit_deps=[job_push_id])
205
302
 
206
- output_section = cls.build_job_outputs(
207
- job_config, job_namespace, required_outputs, provided_outputs,
303
+ output_section = self.build_job_outputs(
304
+ required_outputs, provided_outputs,
208
305
  explicit_deps=[job_push_id])
209
306
 
210
- main_section = cls._join_sections(params_section, input_section, exec_section, output_section)
307
+ main_section = self._join_sections(params_section, input_section, exec_section, output_section)
211
308
 
212
309
  # Build job-level metadata outputs
213
310
 
@@ -215,16 +312,14 @@ class GraphBuilder:
215
312
  nid for nid, n in main_section.nodes.items()
216
313
  if isinstance(n, DataResultNode))
217
314
 
218
- result_section = cls.build_job_results(
219
- job_config, job_namespace,
220
- result_spec, bundles=data_result_ids,
315
+ result_section = self.build_job_results(
316
+ bundles=data_result_ids,
221
317
  explicit_deps=[job_push_id, *main_section.must_run])
222
318
 
223
- return cls._join_sections(main_section, result_section)
319
+ return self._join_sections(main_section, result_section)
224
320
 
225
- @classmethod
226
321
  def build_job_parameters(
227
- cls, job_namespace: NodeNamespace,
322
+ self,
228
323
  required_params: tp.Dict[str, meta.ModelParameter],
229
324
  supplied_params: tp.Dict[str, meta.Value],
230
325
  explicit_deps: tp.Optional[tp.List[NodeId]] = None) \
@@ -240,18 +335,18 @@ class GraphBuilder:
240
335
  if param_schema.defaultValue is not None:
241
336
  param_def = param_schema.defaultValue
242
337
  else:
243
- raise _ex.EJobValidation(f"Missing required parameter: [{param_name}]")
338
+ self._error(_ex.EJobValidation(f"Missing required parameter: [{param_name}]"))
339
+ continue
244
340
 
245
- param_id = NodeId(param_name, job_namespace, meta.Value)
341
+ param_id = NodeId(param_name, self._job_namespace, meta.Value)
246
342
  param_node = StaticValueNode(param_id, param_def, explicit_deps=explicit_deps)
247
343
 
248
344
  nodes[param_id] = param_node
249
345
 
250
346
  return GraphSection(nodes, outputs=set(nodes.keys()), must_run=list(nodes.keys()))
251
347
 
252
- @classmethod
253
348
  def build_job_inputs(
254
- cls, job_config: config.JobConfig, job_namespace: NodeNamespace,
349
+ self,
255
350
  required_inputs: tp.Dict[str, meta.ModelInputSchema],
256
351
  supplied_inputs: tp.Dict[str, meta.TagSelector],
257
352
  explicit_deps: tp.Optional[tp.List[NodeId]] = None) \
@@ -267,20 +362,21 @@ class GraphBuilder:
267
362
 
268
363
  if data_selector is None:
269
364
  if input_schema.optional:
270
- data_view_id = NodeId.of(input_name, job_namespace, _data.DataView)
365
+ data_view_id = NodeId.of(input_name, self._job_namespace, _data.DataView)
271
366
  nodes[data_view_id] = StaticValueNode(data_view_id, _data.DataView.create_empty())
272
367
  outputs.add(data_view_id)
273
368
  continue
274
369
  else:
275
- raise _ex.EJobValidation(f"Missing required input: [{input_name}]")
370
+ self._error(_ex.EJobValidation(f"Missing required input: [{input_name}]"))
371
+ continue
276
372
 
277
373
  # Build a data spec using metadata from the job config
278
374
  # For now we are always loading the root part, snap 0, delta 0
279
- data_def = _util.get_job_resource(data_selector, job_config).data
280
- storage_def = _util.get_job_resource(data_def.storageId, job_config).storage
375
+ data_def = _util.get_job_resource(data_selector, self._job_config).data
376
+ storage_def = _util.get_job_resource(data_def.storageId, self._job_config).storage
281
377
 
282
378
  if data_def.schemaId:
283
- schema_def = _util.get_job_resource(data_def.schemaId, job_config).schema
379
+ schema_def = _util.get_job_resource(data_def.schemaId, self._job_config).schema
284
380
  else:
285
381
  schema_def = data_def.schema
286
382
 
@@ -289,16 +385,16 @@ class GraphBuilder:
289
385
  data_spec = _data.DataSpec(data_item, data_def, storage_def, schema_def)
290
386
 
291
387
  # Data spec node is static, using the assembled data spec
292
- data_spec_id = NodeId.of(f"{input_name}:SPEC", job_namespace, _data.DataSpec)
388
+ data_spec_id = NodeId.of(f"{input_name}:SPEC", self._job_namespace, _data.DataSpec)
293
389
  data_spec_node = StaticValueNode(data_spec_id, data_spec, explicit_deps=explicit_deps)
294
390
 
295
391
  # Physical load of data items from disk
296
392
  # Currently one item per input, since inputs are single part/delta
297
- data_load_id = NodeId.of(f"{input_name}:LOAD", job_namespace, _data.DataItem)
393
+ data_load_id = NodeId.of(f"{input_name}:LOAD", self._job_namespace, _data.DataItem)
298
394
  data_load_node = LoadDataNode(data_load_id, data_spec_id, explicit_deps=explicit_deps)
299
395
 
300
396
  # Input views assembled by mapping one root part to each view
301
- data_view_id = NodeId.of(input_name, job_namespace, _data.DataView)
397
+ data_view_id = NodeId.of(input_name, self._job_namespace, _data.DataView)
302
398
  data_view_node = DataViewNode(data_view_id, schema_def, data_load_id)
303
399
 
304
400
  nodes[data_spec_id] = data_spec_node
@@ -311,9 +407,8 @@ class GraphBuilder:
311
407
 
312
408
  return GraphSection(nodes, outputs=outputs, must_run=must_run)
313
409
 
314
- @classmethod
315
410
  def build_job_outputs(
316
- cls, job_config: config.JobConfig, job_namespace: NodeNamespace,
411
+ self,
317
412
  required_outputs: tp.Dict[str, meta.ModelOutputSchema],
318
413
  supplied_outputs: tp.Dict[str, meta.TagSelector],
319
414
  explicit_deps: tp.Optional[tp.List[NodeId]] = None) \
@@ -329,25 +424,27 @@ class GraphBuilder:
329
424
  if data_selector is None:
330
425
  if output_schema.optional:
331
426
  optional_info = "(configuration is required for all optional outputs, in case they are produced)"
332
- raise _ex.EJobValidation(f"Missing optional output: [{output_name}] {optional_info}")
427
+ self._error(_ex.EJobValidation(f"Missing optional output: [{output_name}] {optional_info}"))
428
+ continue
333
429
  else:
334
- raise _ex.EJobValidation(f"Missing required output: [{output_name}]")
430
+ self._error(_ex.EJobValidation(f"Missing required output: [{output_name}]"))
431
+ continue
335
432
 
336
433
  # Output data view must already exist in the namespace
337
- data_view_id = NodeId.of(output_name, job_namespace, _data.DataView)
338
- data_spec_id = NodeId.of(f"{output_name}:SPEC", job_namespace, _data.DataSpec)
434
+ data_view_id = NodeId.of(output_name, self._job_namespace, _data.DataView)
435
+ data_spec_id = NodeId.of(f"{output_name}:SPEC", self._job_namespace, _data.DataSpec)
339
436
 
340
- data_obj = _util.get_job_resource(data_selector, job_config, optional=True)
437
+ data_obj = _util.get_job_resource(data_selector, self._job_config, optional=True)
341
438
 
342
439
  if data_obj is not None:
343
440
 
344
441
  # If data def for the output has been built in advance, use a static data spec
345
442
 
346
443
  data_def = data_obj.data
347
- storage_def = _util.get_job_resource(data_def.storageId, job_config).storage
444
+ storage_def = _util.get_job_resource(data_def.storageId, self._job_config).storage
348
445
 
349
446
  if data_def.schemaId:
350
- schema_def = _util.get_job_resource(data_def.schemaId, job_config).schema
447
+ schema_def = _util.get_job_resource(data_def.schemaId, self._job_config).schema
351
448
  else:
352
449
  schema_def = data_def.schema
353
450
 
@@ -366,28 +463,28 @@ class GraphBuilder:
366
463
  # Dynamic data def will always use an embedded schema (this is no ID for an external schema)
367
464
 
368
465
  data_key = output_name + ":DATA"
369
- data_id = job_config.resultMapping[data_key]
466
+ data_id = self._job_config.resultMapping[data_key]
370
467
  storage_key = output_name + ":STORAGE"
371
- storage_id = job_config.resultMapping[storage_key]
468
+ storage_id = self._job_config.resultMapping[storage_key]
372
469
 
373
470
  data_spec_node = DynamicDataSpecNode(
374
- data_spec_id, data_view_id,
375
- data_id, storage_id,
376
- prior_data_spec=None,
377
- explicit_deps=explicit_deps)
471
+ data_spec_id, data_view_id,
472
+ data_id, storage_id,
473
+ prior_data_spec=None,
474
+ explicit_deps=explicit_deps)
378
475
 
379
476
  output_data_key = _util.object_key(data_id)
380
477
  output_storage_key = _util.object_key(storage_id)
381
478
 
382
479
  # Map one data item from each view, since outputs are single part/delta
383
- data_item_id = NodeId(f"{output_name}:ITEM", job_namespace, _data.DataItem)
480
+ data_item_id = NodeId(f"{output_name}:ITEM", self._job_namespace, _data.DataItem)
384
481
  data_item_node = DataItemNode(data_item_id, data_view_id)
385
482
 
386
483
  # Create a physical save operation for the data item
387
- data_save_id = NodeId.of(f"{output_name}:SAVE", job_namespace, None)
484
+ data_save_id = NodeId.of(f"{output_name}:SAVE", self._job_namespace, None)
388
485
  data_save_node = SaveDataNode(data_save_id, data_spec_id, data_item_id)
389
486
 
390
- data_result_id = NodeId.of(f"{output_name}:RESULT", job_namespace, ObjectBundle)
487
+ data_result_id = NodeId.of(f"{output_name}:RESULT", self._job_namespace, ObjectBundle)
391
488
  data_result_node = DataResultNode(
392
489
  data_result_id, output_name,
393
490
  data_item_id, data_spec_id, data_save_id,
@@ -406,6 +503,9 @@ class GraphBuilder:
406
503
  @classmethod
407
504
  def build_runtime_outputs(cls, output_names: tp.List[str], job_namespace: NodeNamespace):
408
505
 
506
+ # This method is called dynamically during job execution
507
+ # So it cannot use stateful information like self._job_config or self._job_namespace
508
+
409
509
  # TODO: Factor out common logic with regular job outputs (including static / dynamic)
410
510
 
411
511
  nodes = {}
@@ -462,22 +562,21 @@ class GraphBuilder:
462
562
 
463
563
  return GraphSection(nodes, inputs=inputs, outputs={runtime_outputs_id})
464
564
 
465
- @classmethod
466
565
  def build_job_results(
467
- cls, job_config: cfg.JobConfig, job_namespace: NodeNamespace, result_spec: JobResultSpec,
566
+ self,
468
567
  objects: tp.Dict[str, NodeId[meta.ObjectDefinition]] = None,
469
568
  bundles: tp.List[NodeId[ObjectBundle]] = None,
470
569
  explicit_deps: tp.Optional[tp.List[NodeId]] = None) \
471
570
  -> GraphSection:
472
571
 
473
- build_result_id = NodeId.of("trac_build_result", job_namespace, cfg.JobResult)
572
+ build_result_id = NodeId.of("trac_job_result", self._job_namespace, cfg.JobResult)
474
573
 
475
574
  if objects is not None:
476
575
 
477
576
  results_inputs = set(objects.values())
478
577
 
479
578
  build_result_node = BuildJobResultNode(
480
- build_result_id, job_config.jobId,
579
+ build_result_id, self._job_config.jobId,
481
580
  outputs = JobOutputs(objects=objects),
482
581
  explicit_deps=explicit_deps)
483
582
 
@@ -486,17 +585,16 @@ class GraphBuilder:
486
585
  results_inputs = set(bundles)
487
586
 
488
587
  build_result_node = BuildJobResultNode(
489
- build_result_id, job_config.jobId,
588
+ build_result_id, self._job_config.jobId,
490
589
  outputs = JobOutputs(bundles=bundles),
491
590
  explicit_deps=explicit_deps)
492
591
 
493
592
  else:
494
593
  raise _ex.EUnexpected()
495
594
 
496
- save_result_id = NodeId("trac_save_result", job_namespace)
497
- save_result_node = SaveJobResultNode(save_result_id, build_result_id, result_spec)
498
-
499
- if result_spec.save_result:
595
+ if self._result_spec.save_result:
596
+ save_result_id = NodeId("trac_save_result", self._job_namespace)
597
+ save_result_node = SaveJobResultNode(save_result_id, build_result_id, self._result_spec)
500
598
  result_nodes = {build_result_id: build_result_node, save_result_id: save_result_node}
501
599
  job_result_id = save_result_id
502
600
  else:
@@ -505,10 +603,9 @@ class GraphBuilder:
505
603
 
506
604
  return GraphSection(result_nodes, inputs=results_inputs, must_run=[job_result_id])
507
605
 
508
- @classmethod
509
606
  def build_model_or_flow_with_context(
510
- cls, job_config: config.JobConfig, namespace: NodeNamespace,
511
- model_or_flow_name: str, model_or_flow: meta.ObjectDefinition,
607
+ self, namespace: NodeNamespace, model_or_flow_name: str,
608
+ job_def: meta.JobDefinition, model_or_flow: meta.ObjectDefinition,
512
609
  input_mapping: tp.Dict[str, NodeId], output_mapping: tp.Dict[str, NodeId],
513
610
  explicit_deps: tp.Optional[tp.List[NodeId]] = None) \
514
611
  -> GraphSection:
@@ -521,44 +618,45 @@ class GraphBuilder:
521
618
  # Execute in the sub-context by doing PUSH, EXEC, POP
522
619
  # Note that POP node must be in the sub namespace too
523
620
 
524
- push_section = cls.build_context_push(
621
+ push_section = self.build_context_push(
525
622
  sub_namespace, input_mapping,
526
623
  explicit_deps)
527
624
 
528
- exec_section = cls.build_model_or_flow(
529
- job_config, sub_namespace, model_or_flow,
625
+ exec_section = self.build_model_or_flow(
626
+ sub_namespace, job_def, model_or_flow,
530
627
  explicit_deps=push_section.must_run)
531
628
 
532
- pop_section = cls.build_context_pop(
629
+ pop_section = self.build_context_pop(
533
630
  sub_namespace, output_mapping,
534
631
  explicit_deps=exec_section.must_run)
535
632
 
536
- return cls._join_sections(push_section, exec_section, pop_section)
633
+ return self._join_sections(push_section, exec_section, pop_section)
537
634
 
538
- @classmethod
539
635
  def build_model_or_flow(
540
- cls, job_config: config.JobConfig, namespace: NodeNamespace,
636
+ self, namespace: NodeNamespace,
637
+ job_def: meta.JobDefinition,
541
638
  model_or_flow: meta.ObjectDefinition,
542
639
  explicit_deps: tp.Optional[tp.List[NodeId]] = None) \
543
640
  -> GraphSection:
544
641
 
545
642
  if model_or_flow.objectType == meta.ObjectType.MODEL:
546
- return cls.build_model(job_config, namespace, model_or_flow.model, explicit_deps)
643
+ return self.build_model(namespace, job_def, model_or_flow.model, explicit_deps)
547
644
 
548
645
  elif model_or_flow.objectType == meta.ObjectType.FLOW:
549
- return cls.build_flow(job_config, namespace, model_or_flow.flow)
646
+ return self.build_flow(namespace, job_def, model_or_flow.flow)
550
647
 
551
648
  else:
552
- raise _ex.EConfigParse("Invalid job config given to the execution engine")
649
+ message = f"Invalid job config, expected model or flow, got [{model_or_flow.objectType}]"
650
+ self._error(_ex.EJobValidation(message))
553
651
 
554
- @classmethod
555
652
  def build_model(
556
- cls, job_config: config.JobConfig, namespace: NodeNamespace,
653
+ self, namespace: NodeNamespace,
654
+ job_def: meta.JobDefinition,
557
655
  model_def: meta.ModelDefinition,
558
656
  explicit_deps: tp.Optional[tp.List[NodeId]] = None) \
559
657
  -> GraphSection:
560
658
 
561
- cls.check_model_type(job_config, model_def)
659
+ self.check_model_type(job_def, model_def)
562
660
 
563
661
  def param_id(node_name):
564
662
  return NodeId(node_name, namespace, meta.Value)
@@ -572,10 +670,10 @@ class GraphBuilder:
572
670
  output_ids = set(map(data_id, model_def.outputs))
573
671
 
574
672
  # Set up storage access for import / export data jobs
575
- if job_config.job.jobType == meta.JobType.IMPORT_DATA:
576
- storage_access = job_config.job.importData.storageAccess
577
- elif job_config.job.jobType == meta.JobType.EXPORT_DATA:
578
- storage_access = job_config.job.exportData.storageAccess
673
+ if job_def.jobType == meta.JobType.IMPORT_DATA:
674
+ storage_access = job_def.importData.storageAccess
675
+ elif job_def.jobType == meta.JobType.EXPORT_DATA:
676
+ storage_access = job_def.exportData.storageAccess
579
677
  else:
580
678
  storage_access = None
581
679
 
@@ -615,9 +713,9 @@ class GraphBuilder:
615
713
  # Assemble a graph to include the model and its outputs
616
714
  return GraphSection(nodes, inputs={*parameter_ids, *input_ids}, outputs=output_ids, must_run=[model_result_id])
617
715
 
618
- @classmethod
619
716
  def build_flow(
620
- cls, job_config: config.JobConfig, namespace: NodeNamespace,
717
+ self, namespace: NodeNamespace,
718
+ job_def: meta.JobDefinition,
621
719
  flow_def: meta.FlowDefinition,
622
720
  explicit_deps: tp.Optional[tp.List[NodeId]] = None) \
623
721
  -> GraphSection:
@@ -650,11 +748,11 @@ class GraphBuilder:
650
748
 
651
749
  node_name, node = reachable_nodes.popitem()
652
750
 
653
- sub_section = cls.build_flow_node(
654
- job_config, namespace, target_edges,
751
+ sub_section = self.build_flow_node(
752
+ namespace, job_def, target_edges,
655
753
  node_name, node, explicit_deps)
656
754
 
657
- graph_section = cls._join_sections(graph_section, sub_section, allow_partial_inputs=True)
755
+ graph_section = self._join_sections(graph_section, sub_section, allow_partial_inputs=True)
658
756
 
659
757
  if node.nodeType != meta.FlowNodeType.OUTPUT_NODE:
660
758
 
@@ -674,20 +772,18 @@ class GraphBuilder:
674
772
  missing_targets = [edge.target for node in remaining_edges_by_target.values() for edge in node]
675
773
  missing_target_names = [f"{t.node}.{t.socket}" if t.socket else t.node for t in missing_targets]
676
774
  missing_nodes = list(map(lambda n: NodeId(n, namespace), missing_target_names))
677
- cls._invalid_graph_error(missing_nodes)
775
+ self._invalid_graph_error(missing_nodes)
678
776
 
679
777
  return graph_section
680
778
 
681
- @classmethod
682
779
  def build_flow_node(
683
- cls, job_config: config.JobConfig, namespace: NodeNamespace,
780
+ self, namespace: NodeNamespace,
781
+ job_def: meta.JobDefinition,
684
782
  target_edges: tp.Dict[meta.FlowSocket, meta.FlowEdge],
685
783
  node_name: str, node: meta.FlowNode,
686
784
  explicit_deps: tp.Optional[tp.List[NodeId]] = None) \
687
785
  -> GraphSection:
688
786
 
689
- flow_job = job_config.job.runFlow
690
-
691
787
  def socket_key(socket):
692
788
  return f"{socket.node}.{socket.socket}" if socket.socket else socket.node
693
789
 
@@ -700,7 +796,7 @@ class GraphBuilder:
700
796
  edge = target_edges.get(socket)
701
797
  # Report missing edges as a job consistency error (this might happen sometimes in dev mode)
702
798
  if edge is None:
703
- raise _ex.EJobValidation(f"Inconsistent flow: Socket [{socket}] is not connected")
799
+ self._error(_ex.EJobValidation(f"Inconsistent flow: Socket [{socket}] is not connected"))
704
800
  return socket_id(edge.source.node, edge.source.socket, result_type)
705
801
 
706
802
  if node.nodeType == meta.FlowNodeType.PARAMETER_NODE:
@@ -723,27 +819,27 @@ class GraphBuilder:
723
819
  push_mapping = {**input_mapping, **param_mapping}
724
820
  pop_mapping = output_mapping
725
821
 
726
- model_selector = flow_job.models.get(node_name)
727
- model_obj = _util.get_job_resource(model_selector, job_config)
822
+ model_selector = job_def.runFlow.models.get(node_name)
823
+ model_obj = _util.get_job_resource(model_selector, self._job_config)
728
824
 
729
825
  # Missing models in the job config is a job consistency error
730
826
  if model_obj is None or model_obj.objectType != meta.ObjectType.MODEL:
731
- raise _ex.EJobValidation(f"No model was provided for flow node [{node_name}]")
827
+ self._error(_ex.EJobValidation(f"No model was provided for flow node [{node_name}]"))
732
828
 
733
829
  # Explicit check for model compatibility - report an error now, do not try build_model()
734
- cls.check_model_compatibility(model_selector, model_obj.model, node_name, node)
735
- cls.check_model_type(job_config, model_obj.model)
830
+ self.check_model_compatibility(model_selector, model_obj.model, node_name, node)
831
+ self.check_model_type(job_def, model_obj.model)
736
832
 
737
- return cls.build_model_or_flow_with_context(
738
- job_config, namespace, node_name, model_obj,
739
- push_mapping, pop_mapping, explicit_deps)
833
+ return self.build_model_or_flow_with_context(
834
+ namespace, node_name,
835
+ job_def, model_obj,
836
+ push_mapping, pop_mapping,
837
+ explicit_deps)
740
838
 
741
- # Missing / invalid node type - should be caught in static validation
742
- raise _ex.ETracInternal(f"Flow node [{node_name}] has invalid node type [{node.nodeType}]")
839
+ self._error(_ex.EJobValidation(f"Flow node [{node_name}] has invalid node type [{node.nodeType}]"))
743
840
 
744
- @classmethod
745
841
  def check_model_compatibility(
746
- cls, model_selector: meta.TagSelector,
842
+ self, model_selector: meta.TagSelector,
747
843
  model_def: meta.ModelDefinition, node_name: str, flow_node: meta.FlowNode):
748
844
 
749
845
  model_params = list(sorted(model_def.parameters.keys()))
@@ -756,22 +852,21 @@ class GraphBuilder:
756
852
 
757
853
  if model_params != node_params or model_inputs != node_inputs or model_outputs != node_outputs:
758
854
  model_key = _util.object_key(model_selector)
759
- raise _ex.EJobValidation(f"Incompatible model for flow node [{node_name}] (Model: [{model_key}])")
855
+ self._error(_ex.EJobValidation(f"Incompatible model for flow node [{node_name}] (Model: [{model_key}])"))
760
856
 
761
- @classmethod
762
- def check_model_type(cls, job_config: config.JobConfig, model_def: meta.ModelDefinition):
857
+ def check_model_type(self, job_def: meta.JobDefinition, model_def: meta.ModelDefinition):
763
858
 
764
- if job_config.job.jobType == meta.JobType.IMPORT_DATA:
859
+ if job_def.jobType == meta.JobType.IMPORT_DATA:
765
860
  allowed_model_types = [meta.ModelType.DATA_IMPORT_MODEL]
766
- elif job_config.job.jobType == meta.JobType.EXPORT_DATA:
861
+ elif job_def.jobType == meta.JobType.EXPORT_DATA:
767
862
  allowed_model_types = [meta.ModelType.DATA_EXPORT_MODEL]
768
863
  else:
769
864
  allowed_model_types = [meta.ModelType.STANDARD_MODEL]
770
865
 
771
866
  if model_def.modelType not in allowed_model_types:
772
- job_type = job_config.job.jobType.name
867
+ job_type = job_def.jobType.name
773
868
  model_type = model_def.modelType.name
774
- raise _ex.EJobValidation(f"Job type [{job_type}] cannot use model type [{model_type}]")
869
+ self._error(_ex.EJobValidation(f"Job type [{job_type}] cannot use model type [{model_type}]"))
775
870
 
776
871
  @staticmethod
777
872
  def build_context_push(
@@ -833,8 +928,7 @@ class GraphBuilder:
833
928
  outputs={*pop_mapping.values()},
834
929
  must_run=[pop_id])
835
930
 
836
- @classmethod
837
- def _join_sections(cls, *sections: GraphSection, allow_partial_inputs: bool = False):
931
+ def _join_sections(self, *sections: GraphSection, allow_partial_inputs: bool = False):
838
932
 
839
933
  n_sections = len(sections)
840
934
  first_section = sections[0]
@@ -856,7 +950,7 @@ class GraphBuilder:
856
950
  if allow_partial_inputs:
857
951
  inputs.update(requirements_not_met)
858
952
  else:
859
- cls._invalid_graph_error(requirements_not_met)
953
+ self._invalid_graph_error(requirements_not_met)
860
954
 
861
955
  nodes.update(current_section.nodes)
862
956
 
@@ -865,13 +959,12 @@ class GraphBuilder:
865
959
 
866
960
  return GraphSection(nodes, inputs, last_section.outputs, must_run)
867
961
 
868
- @classmethod
869
- def _invalid_graph_error(cls, missing_dependencies: tp.Iterable[NodeId]):
962
+ def _invalid_graph_error(self, missing_dependencies: tp.Iterable[NodeId]):
870
963
 
871
- missing_ids = ", ".join(map(cls._missing_item_display_name, missing_dependencies))
872
- message = f"Invalid job config: The execution graph has unsatisfied dependencies: [{missing_ids}]"
964
+ missing_ids = ", ".join(map(self._missing_item_display_name, missing_dependencies))
965
+ message = f"The execution graph has unsatisfied dependencies: [{missing_ids}]"
873
966
 
874
- raise _ex.EJobValidation(message)
967
+ self._error(_ex.EJobValidation(message))
875
968
 
876
969
  @classmethod
877
970
  def _missing_item_display_name(cls, node_id: NodeId):
@@ -886,3 +979,7 @@ class GraphBuilder:
886
979
  return node_id.name
887
980
  else:
888
981
  return f"{node_id.name} / {', '.join(components[:-1])}"
982
+
983
+ def _error(self, error: Exception):
984
+
985
+ self._errors.append(error)