tracdap-runtime 0.6.4__py3-none-any.whl → 0.6.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. tracdap/rt/_exec/context.py +556 -36
  2. tracdap/rt/_exec/dev_mode.py +320 -198
  3. tracdap/rt/_exec/engine.py +331 -62
  4. tracdap/rt/_exec/functions.py +151 -22
  5. tracdap/rt/_exec/graph.py +47 -13
  6. tracdap/rt/_exec/graph_builder.py +383 -175
  7. tracdap/rt/_exec/runtime.py +7 -5
  8. tracdap/rt/_impl/config_parser.py +11 -4
  9. tracdap/rt/_impl/data.py +329 -152
  10. tracdap/rt/_impl/ext/__init__.py +13 -0
  11. tracdap/rt/_impl/ext/sql.py +116 -0
  12. tracdap/rt/_impl/ext/storage.py +57 -0
  13. tracdap/rt/_impl/grpc/tracdap/metadata/job_pb2.py +82 -30
  14. tracdap/rt/_impl/grpc/tracdap/metadata/job_pb2.pyi +155 -2
  15. tracdap/rt/_impl/grpc/tracdap/metadata/model_pb2.py +12 -10
  16. tracdap/rt/_impl/grpc/tracdap/metadata/model_pb2.pyi +14 -2
  17. tracdap/rt/_impl/grpc/tracdap/metadata/resource_pb2.py +29 -0
  18. tracdap/rt/_impl/grpc/tracdap/metadata/resource_pb2.pyi +16 -0
  19. tracdap/rt/_impl/models.py +8 -0
  20. tracdap/rt/_impl/static_api.py +29 -0
  21. tracdap/rt/_impl/storage.py +39 -27
  22. tracdap/rt/_impl/util.py +10 -0
  23. tracdap/rt/_impl/validation.py +140 -18
  24. tracdap/rt/_plugins/repo_git.py +1 -1
  25. tracdap/rt/_plugins/storage_sql.py +417 -0
  26. tracdap/rt/_plugins/storage_sql_dialects.py +117 -0
  27. tracdap/rt/_version.py +1 -1
  28. tracdap/rt/api/experimental.py +267 -0
  29. tracdap/rt/api/hook.py +14 -0
  30. tracdap/rt/api/model_api.py +48 -6
  31. tracdap/rt/config/__init__.py +2 -2
  32. tracdap/rt/config/common.py +6 -0
  33. tracdap/rt/metadata/__init__.py +29 -20
  34. tracdap/rt/metadata/job.py +99 -0
  35. tracdap/rt/metadata/model.py +18 -0
  36. tracdap/rt/metadata/resource.py +24 -0
  37. {tracdap_runtime-0.6.4.dist-info → tracdap_runtime-0.6.6.dist-info}/METADATA +5 -1
  38. {tracdap_runtime-0.6.4.dist-info → tracdap_runtime-0.6.6.dist-info}/RECORD +41 -32
  39. {tracdap_runtime-0.6.4.dist-info → tracdap_runtime-0.6.6.dist-info}/WHEEL +1 -1
  40. {tracdap_runtime-0.6.4.dist-info → tracdap_runtime-0.6.6.dist-info}/LICENSE +0 -0
  41. {tracdap_runtime-0.6.4.dist-info → tracdap_runtime-0.6.6.dist-info}/top_level.txt +0 -0
@@ -12,8 +12,6 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from __future__ import annotations
16
-
17
15
  import tracdap.rt.config as config
18
16
  import tracdap.rt.exceptions as _ex
19
17
  import tracdap.rt._impl.data as _data # noqa
@@ -24,72 +22,111 @@ from .graph import *
24
22
 
25
23
  class GraphBuilder:
26
24
 
27
- __JOB_BUILD_FUNC = tp.Callable[
28
- [config.JobConfig, JobResultSpec, NodeNamespace, NodeId],
29
- GraphSection]
25
+ __JOB_DETAILS = tp.TypeVar(
26
+ "__JOB_DETAILS",
27
+ meta.RunModelJob,
28
+ meta.RunFlowJob,
29
+ meta.ImportModelJob,
30
+ meta.ImportDataJob,
31
+ meta.ExportDataJob)
30
32
 
31
- @classmethod
32
- def build_job(
33
- cls, job_config: config.JobConfig,
34
- result_spec: JobResultSpec) -> Graph:
33
+ __JOB_BUILD_FUNC = tp.Callable[[meta.JobDefinition, NodeId], GraphSection]
35
34
 
36
- if job_config.job.jobType == meta.JobType.IMPORT_MODEL:
37
- return cls.build_standard_job(job_config, result_spec, cls.build_import_model_job)
35
+ def __init__(self, job_config: config.JobConfig, result_spec: JobResultSpec):
38
36
 
39
- if job_config.job.jobType == meta.JobType.RUN_MODEL:
40
- return cls.build_standard_job(job_config, result_spec, cls.build_run_model_job)
37
+ self._job_config = job_config
38
+ self._result_spec = result_spec
41
39
 
42
- if job_config.job.jobType == meta.JobType.RUN_FLOW:
43
- return cls.build_standard_job(job_config, result_spec, cls.build_run_flow_job)
40
+ self._job_key = _util.object_key(job_config.jobId)
41
+ self._job_namespace = NodeNamespace(self._job_key)
44
42
 
45
- raise _ex.EConfigParse(f"Job type [{job_config.job.jobType}] is not supported yet")
43
+ self._errors = []
46
44
 
47
- @classmethod
48
- def build_standard_job(
49
- cls, job_config: config.JobConfig, result_spec: JobResultSpec,
50
- build_func: __JOB_BUILD_FUNC):
45
+ def _child_builder(self, job_id: meta.TagHeader) -> "GraphBuilder":
51
46
 
52
- # Set up the job context
47
+ builder = GraphBuilder(self._job_config, JobResultSpec(save_result=False))
48
+ builder._job_key = _util.object_key(job_id)
49
+ builder._job_namespace = NodeNamespace(builder._job_key)
50
+
51
+ return builder
52
+
53
+ def build_job(self, job_def: meta.JobDefinition,) -> Graph:
54
+
55
+ try:
56
+
57
+ if job_def.jobType == meta.JobType.IMPORT_MODEL:
58
+ return self.build_standard_job(job_def, self.build_import_model_job)
59
+
60
+ if job_def.jobType == meta.JobType.RUN_MODEL:
61
+ return self.build_standard_job(job_def, self.build_run_model_job)
62
+
63
+ if job_def.jobType == meta.JobType.RUN_FLOW:
64
+ return self.build_standard_job(job_def, self.build_run_flow_job)
65
+
66
+ if job_def.jobType in [meta.JobType.IMPORT_DATA, meta.JobType.EXPORT_DATA]:
67
+ return self.build_standard_job(job_def, self.build_import_export_data_job)
68
+
69
+ if job_def.jobType == meta.JobType.JOB_GROUP:
70
+ return self.build_standard_job(job_def, self.build_job_group)
71
+
72
+ self._error(_ex.EJobValidation(f"Job type [{job_def.jobType.name}] is not supported yet"))
73
+
74
+ except Exception as e:
75
+
76
+ # If there are recorded, errors, assume unhandled exceptions are a result of those
77
+ # Only report the recorded errors, to reduce noise
78
+ if any(self._errors):
79
+ pass
80
+
81
+ # If no errors are recorded, an exception here would be a bug
82
+ raise _ex.ETracInternal(f"Unexpected error preparing the job execution graph") from e
83
+
84
+ finally:
53
85
 
54
- job_key = _util.object_key(job_config.jobId)
55
- job_namespace = NodeNamespace(job_key)
86
+ if any(self._errors):
56
87
 
57
- push_id = NodeId("trac_job_push", job_namespace, Bundle[tp.Any])
58
- push_node = ContextPushNode(push_id, job_namespace)
88
+ if len(self._errors) == 1:
89
+ raise self._errors[0]
90
+ else:
91
+ err_text = "\n".join(map(str, self._errors))
92
+ raise _ex.EJobValidation("Invalid job configuration\n" + err_text)
93
+
94
+ def build_standard_job(self, job_def: meta.JobDefinition, build_func: __JOB_BUILD_FUNC):
95
+
96
+ # Set up the job context
97
+
98
+ push_id = NodeId("trac_job_push", self._job_namespace, Bundle[tp.Any])
99
+ push_node = ContextPushNode(push_id, self._job_namespace)
59
100
  push_section = GraphSection({push_id: push_node}, must_run=[push_id])
60
101
 
61
102
  # Build the execution graphs for the main job and results recording
62
103
 
63
- main_section = build_func(job_config, result_spec, job_namespace, push_id)
64
- main_result_id = NodeId.of("trac_build_result", job_namespace, config.JobResult)
104
+ main_section = build_func(job_def, push_id)
105
+ main_result_id = NodeId.of("trac_job_result", self._job_namespace, config.JobResult)
65
106
 
66
107
  # Clean up the job context
67
108
 
68
- global_result_id = NodeId.of(job_key, NodeNamespace.root(), config.JobResult)
109
+ global_result_id = NodeId.of(self._job_key, NodeNamespace.root(), config.JobResult)
69
110
 
70
- pop_id = NodeId("trac_job_pop", job_namespace, Bundle[tp.Any])
111
+ pop_id = NodeId("trac_job_pop", self._job_namespace, Bundle[tp.Any])
71
112
  pop_mapping = {main_result_id: global_result_id}
72
113
 
73
114
  pop_node = ContextPopNode(
74
- pop_id, job_namespace, pop_mapping,
115
+ pop_id, self._job_namespace, pop_mapping,
75
116
  explicit_deps=main_section.must_run,
76
117
  bundle=NodeNamespace.root())
77
118
 
78
- global_result_node = BundleItemNode(global_result_id, pop_id, job_key)
119
+ global_result_node = BundleItemNode(global_result_id, pop_id, self._job_key)
79
120
 
80
121
  pop_section = GraphSection({
81
122
  pop_id: pop_node,
82
123
  global_result_id: global_result_node})
83
124
 
84
- job = cls._join_sections(push_section, main_section, pop_section)
125
+ job = self._join_sections(push_section, main_section, pop_section)
85
126
 
86
127
  return Graph(job.nodes, global_result_id)
87
128
 
88
- @classmethod
89
- def build_import_model_job(
90
- cls, job_config: config.JobConfig, result_spec: JobResultSpec,
91
- job_namespace: NodeNamespace, job_push_id: NodeId) \
92
- -> GraphSection:
129
+ def build_import_model_job(self, job_def: meta.JobDefinition, job_push_id: NodeId) -> GraphSection:
93
130
 
94
131
  # Main section: run the model import
95
132
 
@@ -97,60 +134,142 @@ class GraphBuilder:
97
134
  new_model_id = _util.new_object_id(meta.ObjectType.MODEL)
98
135
  new_model_key = _util.object_key(new_model_id)
99
136
 
100
- model_scope = _util.object_key(job_config.jobId)
101
- import_details = job_config.job.importModel
137
+ model_scope = self._job_key
138
+ import_details = job_def.importModel
102
139
 
103
- import_id = NodeId.of("trac_import_model", job_namespace, meta.ObjectDefinition)
140
+ import_id = NodeId.of("trac_import_model", self._job_namespace, meta.ObjectDefinition)
104
141
  import_node = ImportModelNode(import_id, model_scope, import_details, explicit_deps=[job_push_id])
105
142
 
106
143
  main_section = GraphSection(nodes={import_id: import_node})
107
144
 
108
145
  # Build job-level metadata outputs
109
146
 
110
- result_section = cls.build_job_results(
111
- job_config, job_namespace, result_spec,
147
+ result_section = self.build_job_results(
112
148
  objects={new_model_key: import_id},
113
149
  explicit_deps=[job_push_id, *main_section.must_run])
114
150
 
115
- return cls._join_sections(main_section, result_section)
151
+ return self._join_sections(main_section, result_section)
116
152
 
117
- @classmethod
118
- def build_run_model_job(
119
- cls, job_config: config.JobConfig, result_spec: JobResultSpec,
120
- job_namespace: NodeNamespace, job_push_id: NodeId) \
121
- -> GraphSection:
153
+ def build_import_export_data_job(self, job_def: meta.JobDefinition, job_push_id: NodeId) -> GraphSection:
122
154
 
123
- target_selector = job_config.job.runModel.model
124
- target_obj = _util.get_job_resource(target_selector, job_config)
155
+ # TODO: These are processed as regular calculation jobs for now
156
+ # That might be ok, but is worth reviewing
157
+
158
+ if job_def.jobType == meta.JobType.IMPORT_DATA:
159
+ job_details = job_def.importData
160
+ else:
161
+ job_details = job_def.exportData
162
+
163
+ target_selector = job_details.model
164
+ target_obj = _util.get_job_resource(target_selector, self._job_config)
125
165
  target_def = target_obj.model
126
- job_def = job_config.job.runModel
127
166
 
128
- return cls.build_calculation_job(
129
- job_config, result_spec, job_namespace, job_push_id,
130
- target_selector, target_def, job_def)
167
+ return self.build_calculation_job(
168
+ job_def, job_push_id,
169
+ target_selector, target_def,
170
+ job_details)
131
171
 
132
- @classmethod
133
- def build_run_flow_job(
134
- cls, job_config: config.JobConfig, result_spec: JobResultSpec,
135
- job_namespace: NodeNamespace, job_push_id: NodeId) \
136
- -> GraphSection:
172
+ def build_run_model_job(self, job_def: meta.JobDefinition, job_push_id: NodeId) -> GraphSection:
173
+
174
+ job_details = job_def.runModel
175
+
176
+ target_selector = job_details.model
177
+ target_obj = _util.get_job_resource(target_selector, self._job_config)
178
+ target_def = target_obj.model
137
179
 
138
- target_selector = job_config.job.runFlow.flow
139
- target_obj = _util.get_job_resource(target_selector, job_config)
180
+ return self.build_calculation_job(
181
+ job_def, job_push_id,
182
+ target_selector, target_def,
183
+ job_details)
184
+
185
+ def build_run_flow_job(self, job_def: meta.JobDefinition, job_push_id: NodeId) -> GraphSection:
186
+
187
+ job_details = job_def.runFlow
188
+
189
+ target_selector = job_details.flow
190
+ target_obj = _util.get_job_resource(target_selector, self._job_config)
140
191
  target_def = target_obj.flow
141
- job_def = job_config.job.runFlow
142
192
 
143
- return cls.build_calculation_job(
144
- job_config, result_spec, job_namespace, job_push_id,
145
- target_selector, target_def, job_def)
193
+ return self.build_calculation_job(
194
+ job_def, job_push_id,
195
+ target_selector, target_def,
196
+ job_details)
197
+
198
+ def build_job_group(self, job_def: meta.JobDefinition, job_push_id: NodeId) -> GraphSection:
199
+
200
+ job_group = job_def.jobGroup
201
+
202
+ if job_group.jobGroupType == meta.JobGroupType.SEQUENTIAL_JOB_GROUP:
203
+ return self.build_sequential_job_group(job_group, job_push_id)
204
+
205
+ if job_group.jobGroupType == meta.JobGroupType.PARALLEL_JOB_GROUP:
206
+ return self.build_parallel_job_group(job_group, job_push_id)
207
+
208
+ else:
209
+ self._error(_ex.EJobValidation(f"Job group type [{job_group.jobGroupType.name}] is not supported yet"))
210
+ return GraphSection(dict(), inputs={job_push_id})
211
+
212
+ def build_sequential_job_group(self, job_group: meta.JobGroup, job_push_id: NodeId) -> GraphSection:
213
+
214
+ nodes = dict()
215
+ prior_id = job_push_id
216
+
217
+ for child_def in job_group.sequential.jobs:
218
+
219
+ child_node = self.build_child_job(child_def, explicit_deps=[prior_id])
220
+ nodes[child_node.id] = child_node
221
+
222
+ prior_id = child_node.id
223
+
224
+ # No real results from job groups yet (they cannot be executed from the platform)
225
+ job_result = cfg.JobResult()
226
+ result_id = NodeId.of("trac_job_result", self._job_namespace, cfg.JobResult)
227
+ result_node = StaticValueNode(result_id, job_result, explicit_deps=[prior_id])
228
+ nodes[result_id] = result_node
229
+
230
+ return GraphSection(nodes, inputs={job_push_id}, outputs={result_id})
231
+
232
+ def build_parallel_job_group(self, job_group: meta.JobGroup, job_push_id: NodeId) -> GraphSection:
233
+
234
+ nodes = dict()
235
+ parallel_ids = [job_push_id]
236
+
237
+ for child_def in job_group.parallel.jobs:
238
+
239
+ child_node = self.build_child_job(child_def, explicit_deps=[job_push_id])
240
+ nodes[child_node.id] = child_node
241
+
242
+ parallel_ids.append(child_node.id)
243
+
244
+ # No real results from job groups yet (they cannot be executed from the platform)
245
+ job_result = cfg.JobResult()
246
+ result_id = NodeId.of("trac_job_result", self._job_namespace, cfg.JobResult)
247
+ result_node = StaticValueNode(result_id, job_result, explicit_deps=parallel_ids)
248
+ nodes[result_id] = result_node
249
+
250
+ return GraphSection(nodes, inputs={job_push_id}, outputs={result_id})
251
+
252
+ def build_child_job(self, child_job_def: meta.JobDefinition, explicit_deps) -> Node[config.JobResult]:
253
+
254
+ child_job_id = _util.new_object_id(meta.ObjectType.JOB)
255
+
256
+ child_builder = self._child_builder(child_job_id)
257
+ child_graph = child_builder.build_job(child_job_def)
258
+
259
+ child_node_name = _util.object_key(child_job_id)
260
+ child_node_id = NodeId.of(child_node_name, self._job_namespace, cfg.JobResult)
261
+
262
+ child_node = ChildJobNode(
263
+ child_node_id, child_job_id, child_job_def,
264
+ child_graph, explicit_deps)
265
+
266
+ return child_node
146
267
 
147
- @classmethod
148
268
  def build_calculation_job(
149
- cls, job_config: config.JobConfig, result_spec: JobResultSpec,
150
- job_namespace: NodeNamespace, job_push_id: NodeId,
269
+ self, job_def: meta.JobDefinition, job_push_id: NodeId,
151
270
  target_selector: meta.TagSelector,
152
271
  target_def: tp.Union[meta.ModelDefinition, meta.FlowDefinition],
153
- job_def: tp.Union[meta.RunModelJob, meta.RunFlowJob]) \
272
+ job_details: __JOB_DETAILS) \
154
273
  -> GraphSection:
155
274
 
156
275
  # The main execution graph can run directly in the job context, no need to do a context push
@@ -162,29 +281,30 @@ class GraphBuilder:
162
281
  required_inputs = target_def.inputs
163
282
  required_outputs = target_def.outputs
164
283
 
165
- provided_params = job_def.parameters
166
- provided_inputs = job_def.inputs
167
- provided_outputs = job_def.outputs
284
+ provided_params = job_details.parameters
285
+ provided_inputs = job_details.inputs
286
+ provided_outputs = job_details.outputs
168
287
 
169
- params_section = cls.build_job_parameters(
170
- job_namespace, required_params, provided_params,
288
+ params_section = self.build_job_parameters(
289
+ required_params, provided_params,
171
290
  explicit_deps=[job_push_id])
172
291
 
173
- input_section = cls.build_job_inputs(
174
- job_config, job_namespace, required_inputs, provided_inputs,
292
+ input_section = self.build_job_inputs(
293
+ required_inputs, provided_inputs,
175
294
  explicit_deps=[job_push_id])
176
295
 
177
- exec_obj = _util.get_job_resource(target_selector, job_config)
296
+ exec_namespace = self._job_namespace
297
+ exec_obj = _util.get_job_resource(target_selector, self._job_config)
178
298
 
179
- exec_section = cls.build_model_or_flow(
180
- job_config, job_namespace, exec_obj,
299
+ exec_section = self.build_model_or_flow(
300
+ exec_namespace, job_def, exec_obj,
181
301
  explicit_deps=[job_push_id])
182
302
 
183
- output_section = cls.build_job_outputs(
184
- job_config, job_namespace, required_outputs, provided_outputs,
303
+ output_section = self.build_job_outputs(
304
+ required_outputs, provided_outputs,
185
305
  explicit_deps=[job_push_id])
186
306
 
187
- main_section = cls._join_sections(params_section, input_section, exec_section, output_section)
307
+ main_section = self._join_sections(params_section, input_section, exec_section, output_section)
188
308
 
189
309
  # Build job-level metadata outputs
190
310
 
@@ -192,16 +312,14 @@ class GraphBuilder:
192
312
  nid for nid, n in main_section.nodes.items()
193
313
  if isinstance(n, DataResultNode))
194
314
 
195
- result_section = cls.build_job_results(
196
- job_config, job_namespace,
197
- result_spec, bundles=data_result_ids,
315
+ result_section = self.build_job_results(
316
+ bundles=data_result_ids,
198
317
  explicit_deps=[job_push_id, *main_section.must_run])
199
318
 
200
- return cls._join_sections(main_section, result_section)
319
+ return self._join_sections(main_section, result_section)
201
320
 
202
- @classmethod
203
321
  def build_job_parameters(
204
- cls, job_namespace: NodeNamespace,
322
+ self,
205
323
  required_params: tp.Dict[str, meta.ModelParameter],
206
324
  supplied_params: tp.Dict[str, meta.Value],
207
325
  explicit_deps: tp.Optional[tp.List[NodeId]] = None) \
@@ -217,18 +335,18 @@ class GraphBuilder:
217
335
  if param_schema.defaultValue is not None:
218
336
  param_def = param_schema.defaultValue
219
337
  else:
220
- raise _ex.EJobValidation(f"Missing required parameter: [{param_name}]")
338
+ self._error(_ex.EJobValidation(f"Missing required parameter: [{param_name}]"))
339
+ continue
221
340
 
222
- param_id = NodeId(param_name, job_namespace, meta.Value)
341
+ param_id = NodeId(param_name, self._job_namespace, meta.Value)
223
342
  param_node = StaticValueNode(param_id, param_def, explicit_deps=explicit_deps)
224
343
 
225
344
  nodes[param_id] = param_node
226
345
 
227
346
  return GraphSection(nodes, outputs=set(nodes.keys()), must_run=list(nodes.keys()))
228
347
 
229
- @classmethod
230
348
  def build_job_inputs(
231
- cls, job_config: config.JobConfig, job_namespace: NodeNamespace,
349
+ self,
232
350
  required_inputs: tp.Dict[str, meta.ModelInputSchema],
233
351
  supplied_inputs: tp.Dict[str, meta.TagSelector],
234
352
  explicit_deps: tp.Optional[tp.List[NodeId]] = None) \
@@ -244,20 +362,21 @@ class GraphBuilder:
244
362
 
245
363
  if data_selector is None:
246
364
  if input_schema.optional:
247
- data_view_id = NodeId.of(input_name, job_namespace, _data.DataView)
365
+ data_view_id = NodeId.of(input_name, self._job_namespace, _data.DataView)
248
366
  nodes[data_view_id] = StaticValueNode(data_view_id, _data.DataView.create_empty())
249
367
  outputs.add(data_view_id)
250
368
  continue
251
369
  else:
252
- raise _ex.EJobValidation(f"Missing required input: [{input_name}]")
370
+ self._error(_ex.EJobValidation(f"Missing required input: [{input_name}]"))
371
+ continue
253
372
 
254
373
  # Build a data spec using metadata from the job config
255
374
  # For now we are always loading the root part, snap 0, delta 0
256
- data_def = _util.get_job_resource(data_selector, job_config).data
257
- storage_def = _util.get_job_resource(data_def.storageId, job_config).storage
375
+ data_def = _util.get_job_resource(data_selector, self._job_config).data
376
+ storage_def = _util.get_job_resource(data_def.storageId, self._job_config).storage
258
377
 
259
378
  if data_def.schemaId:
260
- schema_def = _util.get_job_resource(data_def.schemaId, job_config).schema
379
+ schema_def = _util.get_job_resource(data_def.schemaId, self._job_config).schema
261
380
  else:
262
381
  schema_def = data_def.schema
263
382
 
@@ -266,16 +385,16 @@ class GraphBuilder:
266
385
  data_spec = _data.DataSpec(data_item, data_def, storage_def, schema_def)
267
386
 
268
387
  # Data spec node is static, using the assembled data spec
269
- data_spec_id = NodeId.of(f"{input_name}:SPEC", job_namespace, _data.DataSpec)
388
+ data_spec_id = NodeId.of(f"{input_name}:SPEC", self._job_namespace, _data.DataSpec)
270
389
  data_spec_node = StaticValueNode(data_spec_id, data_spec, explicit_deps=explicit_deps)
271
390
 
272
391
  # Physical load of data items from disk
273
392
  # Currently one item per input, since inputs are single part/delta
274
- data_load_id = NodeId.of(f"{input_name}:LOAD", job_namespace, _data.DataItem)
393
+ data_load_id = NodeId.of(f"{input_name}:LOAD", self._job_namespace, _data.DataItem)
275
394
  data_load_node = LoadDataNode(data_load_id, data_spec_id, explicit_deps=explicit_deps)
276
395
 
277
396
  # Input views assembled by mapping one root part to each view
278
- data_view_id = NodeId.of(input_name, job_namespace, _data.DataView)
397
+ data_view_id = NodeId.of(input_name, self._job_namespace, _data.DataView)
279
398
  data_view_node = DataViewNode(data_view_id, schema_def, data_load_id)
280
399
 
281
400
  nodes[data_spec_id] = data_spec_node
@@ -288,9 +407,8 @@ class GraphBuilder:
288
407
 
289
408
  return GraphSection(nodes, outputs=outputs, must_run=must_run)
290
409
 
291
- @classmethod
292
410
  def build_job_outputs(
293
- cls, job_config: config.JobConfig, job_namespace: NodeNamespace,
411
+ self,
294
412
  required_outputs: tp.Dict[str, meta.ModelOutputSchema],
295
413
  supplied_outputs: tp.Dict[str, meta.TagSelector],
296
414
  explicit_deps: tp.Optional[tp.List[NodeId]] = None) \
@@ -306,25 +424,27 @@ class GraphBuilder:
306
424
  if data_selector is None:
307
425
  if output_schema.optional:
308
426
  optional_info = "(configuration is required for all optional outputs, in case they are produced)"
309
- raise _ex.EJobValidation(f"Missing optional output: [{output_name}] {optional_info}")
427
+ self._error(_ex.EJobValidation(f"Missing optional output: [{output_name}] {optional_info}"))
428
+ continue
310
429
  else:
311
- raise _ex.EJobValidation(f"Missing required output: [{output_name}]")
430
+ self._error(_ex.EJobValidation(f"Missing required output: [{output_name}]"))
431
+ continue
312
432
 
313
433
  # Output data view must already exist in the namespace
314
- data_view_id = NodeId.of(output_name, job_namespace, _data.DataView)
315
- data_spec_id = NodeId.of(f"{output_name}:SPEC", job_namespace, _data.DataSpec)
434
+ data_view_id = NodeId.of(output_name, self._job_namespace, _data.DataView)
435
+ data_spec_id = NodeId.of(f"{output_name}:SPEC", self._job_namespace, _data.DataSpec)
316
436
 
317
- data_obj = _util.get_job_resource(data_selector, job_config, optional=True)
437
+ data_obj = _util.get_job_resource(data_selector, self._job_config, optional=True)
318
438
 
319
439
  if data_obj is not None:
320
440
 
321
441
  # If data def for the output has been built in advance, use a static data spec
322
442
 
323
443
  data_def = data_obj.data
324
- storage_def = _util.get_job_resource(data_def.storageId, job_config).storage
444
+ storage_def = _util.get_job_resource(data_def.storageId, self._job_config).storage
325
445
 
326
446
  if data_def.schemaId:
327
- schema_def = _util.get_job_resource(data_def.schemaId, job_config).schema
447
+ schema_def = _util.get_job_resource(data_def.schemaId, self._job_config).schema
328
448
  else:
329
449
  schema_def = data_def.schema
330
450
 
@@ -343,19 +463,74 @@ class GraphBuilder:
343
463
  # Dynamic data def will always use an embedded schema (this is no ID for an external schema)
344
464
 
345
465
  data_key = output_name + ":DATA"
346
- data_id = job_config.resultMapping[data_key]
466
+ data_id = self._job_config.resultMapping[data_key]
347
467
  storage_key = output_name + ":STORAGE"
348
- storage_id = job_config.resultMapping[storage_key]
468
+ storage_id = self._job_config.resultMapping[storage_key]
349
469
 
350
470
  data_spec_node = DynamicDataSpecNode(
351
- data_spec_id, data_view_id,
352
- data_id, storage_id,
353
- prior_data_spec=None,
354
- explicit_deps=explicit_deps)
471
+ data_spec_id, data_view_id,
472
+ data_id, storage_id,
473
+ prior_data_spec=None,
474
+ explicit_deps=explicit_deps)
355
475
 
356
476
  output_data_key = _util.object_key(data_id)
357
477
  output_storage_key = _util.object_key(storage_id)
358
478
 
479
+ # Map one data item from each view, since outputs are single part/delta
480
+ data_item_id = NodeId(f"{output_name}:ITEM", self._job_namespace, _data.DataItem)
481
+ data_item_node = DataItemNode(data_item_id, data_view_id)
482
+
483
+ # Create a physical save operation for the data item
484
+ data_save_id = NodeId.of(f"{output_name}:SAVE", self._job_namespace, None)
485
+ data_save_node = SaveDataNode(data_save_id, data_spec_id, data_item_id)
486
+
487
+ data_result_id = NodeId.of(f"{output_name}:RESULT", self._job_namespace, ObjectBundle)
488
+ data_result_node = DataResultNode(
489
+ data_result_id, output_name,
490
+ data_item_id, data_spec_id, data_save_id,
491
+ output_data_key, output_storage_key)
492
+
493
+ nodes[data_spec_id] = data_spec_node
494
+ nodes[data_item_id] = data_item_node
495
+ nodes[data_save_id] = data_save_node
496
+ nodes[data_result_id] = data_result_node
497
+
498
+ # Job-level data view is an input to the save operation
499
+ inputs.add(data_view_id)
500
+
501
+ return GraphSection(nodes, inputs=inputs)
502
+
503
+ @classmethod
504
+ def build_runtime_outputs(cls, output_names: tp.List[str], job_namespace: NodeNamespace):
505
+
506
+ # This method is called dynamically during job execution
507
+ # So it cannot use stateful information like self._job_config or self._job_namespace
508
+
509
+ # TODO: Factor out common logic with regular job outputs (including static / dynamic)
510
+
511
+ nodes = {}
512
+ inputs = set()
513
+ outputs = list()
514
+
515
+ for output_name in output_names:
516
+
517
+ # Output data view must already exist in the namespace
518
+ data_view_id = NodeId.of(output_name, job_namespace, _data.DataView)
519
+ data_spec_id = NodeId.of(f"{output_name}:SPEC", job_namespace, _data.DataSpec)
520
+
521
+ data_key = output_name + ":DATA"
522
+ data_id = _util.new_object_id(meta.ObjectType.DATA)
523
+ storage_key = output_name + ":STORAGE"
524
+ storage_id = _util.new_object_id(meta.ObjectType.STORAGE)
525
+
526
+ data_spec_node = DynamicDataSpecNode(
527
+ data_spec_id, data_view_id,
528
+ data_id, storage_id,
529
+ prior_data_spec=None)
530
+
531
+ output_data_key = _util.object_key(data_id)
532
+ output_storage_key = _util.object_key(storage_id)
533
+
359
534
  # Map one data item from each view, since outputs are single part/delta
360
535
  data_item_id = NodeId(f"{output_name}:ITEM", job_namespace, _data.DataItem)
361
536
  data_item_node = DataItemNode(data_item_id, data_view_id)
@@ -377,42 +552,49 @@ class GraphBuilder:
377
552
 
378
553
  # Job-level data view is an input to the save operation
379
554
  inputs.add(data_view_id)
555
+ outputs.append(data_result_id)
380
556
 
381
- return GraphSection(nodes, inputs=inputs)
557
+ runtime_outputs = JobOutputs(bundles=outputs)
558
+ runtime_outputs_id = NodeId.of("trac_runtime_outputs", job_namespace, JobOutputs)
559
+ runtime_outputs_node = RuntimeOutputsNode(runtime_outputs_id, runtime_outputs)
560
+
561
+ nodes[runtime_outputs_id] = runtime_outputs_node
562
+
563
+ return GraphSection(nodes, inputs=inputs, outputs={runtime_outputs_id})
382
564
 
383
- @classmethod
384
565
  def build_job_results(
385
- cls, job_config: cfg.JobConfig, job_namespace: NodeNamespace, result_spec: JobResultSpec,
566
+ self,
386
567
  objects: tp.Dict[str, NodeId[meta.ObjectDefinition]] = None,
387
568
  bundles: tp.List[NodeId[ObjectBundle]] = None,
388
569
  explicit_deps: tp.Optional[tp.List[NodeId]] = None) \
389
570
  -> GraphSection:
390
571
 
391
- build_result_id = NodeId.of("trac_build_result", job_namespace, cfg.JobResult)
572
+ build_result_id = NodeId.of("trac_job_result", self._job_namespace, cfg.JobResult)
392
573
 
393
574
  if objects is not None:
394
575
 
395
576
  results_inputs = set(objects.values())
396
577
 
397
578
  build_result_node = BuildJobResultNode(
398
- build_result_id, job_config.jobId,
399
- objects=objects, explicit_deps=explicit_deps)
579
+ build_result_id, self._job_config.jobId,
580
+ outputs = JobOutputs(objects=objects),
581
+ explicit_deps=explicit_deps)
400
582
 
401
583
  elif bundles is not None:
402
584
 
403
585
  results_inputs = set(bundles)
404
586
 
405
587
  build_result_node = BuildJobResultNode(
406
- build_result_id, job_config.jobId,
407
- bundles=bundles, explicit_deps=explicit_deps)
588
+ build_result_id, self._job_config.jobId,
589
+ outputs = JobOutputs(bundles=bundles),
590
+ explicit_deps=explicit_deps)
408
591
 
409
592
  else:
410
593
  raise _ex.EUnexpected()
411
594
 
412
- save_result_id = NodeId("trac_save_result", job_namespace)
413
- save_result_node = SaveJobResultNode(save_result_id, build_result_id, result_spec)
414
-
415
- if result_spec.save_result:
595
+ if self._result_spec.save_result:
596
+ save_result_id = NodeId("trac_save_result", self._job_namespace)
597
+ save_result_node = SaveJobResultNode(save_result_id, build_result_id, self._result_spec)
416
598
  result_nodes = {build_result_id: build_result_node, save_result_id: save_result_node}
417
599
  job_result_id = save_result_id
418
600
  else:
@@ -421,10 +603,9 @@ class GraphBuilder:
421
603
 
422
604
  return GraphSection(result_nodes, inputs=results_inputs, must_run=[job_result_id])
423
605
 
424
- @classmethod
425
606
  def build_model_or_flow_with_context(
426
- cls, job_config: config.JobConfig, namespace: NodeNamespace,
427
- model_or_flow_name: str, model_or_flow: meta.ObjectDefinition,
607
+ self, namespace: NodeNamespace, model_or_flow_name: str,
608
+ job_def: meta.JobDefinition, model_or_flow: meta.ObjectDefinition,
428
609
  input_mapping: tp.Dict[str, NodeId], output_mapping: tp.Dict[str, NodeId],
429
610
  explicit_deps: tp.Optional[tp.List[NodeId]] = None) \
430
611
  -> GraphSection:
@@ -437,43 +618,46 @@ class GraphBuilder:
437
618
  # Execute in the sub-context by doing PUSH, EXEC, POP
438
619
  # Note that POP node must be in the sub namespace too
439
620
 
440
- push_section = cls.build_context_push(
621
+ push_section = self.build_context_push(
441
622
  sub_namespace, input_mapping,
442
623
  explicit_deps)
443
624
 
444
- exec_section = cls.build_model_or_flow(
445
- job_config, sub_namespace, model_or_flow,
625
+ exec_section = self.build_model_or_flow(
626
+ sub_namespace, job_def, model_or_flow,
446
627
  explicit_deps=push_section.must_run)
447
628
 
448
- pop_section = cls.build_context_pop(
629
+ pop_section = self.build_context_pop(
449
630
  sub_namespace, output_mapping,
450
631
  explicit_deps=exec_section.must_run)
451
632
 
452
- return cls._join_sections(push_section, exec_section, pop_section)
633
+ return self._join_sections(push_section, exec_section, pop_section)
453
634
 
454
- @classmethod
455
635
  def build_model_or_flow(
456
- cls, job_config: config.JobConfig, namespace: NodeNamespace,
636
+ self, namespace: NodeNamespace,
637
+ job_def: meta.JobDefinition,
457
638
  model_or_flow: meta.ObjectDefinition,
458
639
  explicit_deps: tp.Optional[tp.List[NodeId]] = None) \
459
640
  -> GraphSection:
460
641
 
461
642
  if model_or_flow.objectType == meta.ObjectType.MODEL:
462
- return cls.build_model(namespace, model_or_flow.model, explicit_deps)
643
+ return self.build_model(namespace, job_def, model_or_flow.model, explicit_deps)
463
644
 
464
645
  elif model_or_flow.objectType == meta.ObjectType.FLOW:
465
- return cls.build_flow(job_config, namespace, model_or_flow.flow)
646
+ return self.build_flow(namespace, job_def, model_or_flow.flow)
466
647
 
467
648
  else:
468
- raise _ex.EConfigParse("Invalid job config given to the execution engine")
649
+ message = f"Invalid job config, expected model or flow, got [{model_or_flow.objectType}]"
650
+ self._error(_ex.EJobValidation(message))
469
651
 
470
- @classmethod
471
652
  def build_model(
472
- cls, namespace: NodeNamespace,
653
+ self, namespace: NodeNamespace,
654
+ job_def: meta.JobDefinition,
473
655
  model_def: meta.ModelDefinition,
474
656
  explicit_deps: tp.Optional[tp.List[NodeId]] = None) \
475
657
  -> GraphSection:
476
658
 
659
+ self.check_model_type(job_def, model_def)
660
+
477
661
  def param_id(node_name):
478
662
  return NodeId(node_name, namespace, meta.Value)
479
663
 
@@ -485,6 +669,14 @@ class GraphBuilder:
485
669
  input_ids = set(map(data_id, model_def.inputs))
486
670
  output_ids = set(map(data_id, model_def.outputs))
487
671
 
672
+ # Set up storage access for import / export data jobs
673
+ if job_def.jobType == meta.JobType.IMPORT_DATA:
674
+ storage_access = job_def.importData.storageAccess
675
+ elif job_def.jobType == meta.JobType.EXPORT_DATA:
676
+ storage_access = job_def.exportData.storageAccess
677
+ else:
678
+ storage_access = None
679
+
488
680
  # Create the model node
489
681
  # Always add the prior graph root ID as a dependency
490
682
  # This is to ensure dependencies are still pulled in for models with no inputs!
@@ -500,7 +692,8 @@ class GraphBuilder:
500
692
  model_node = RunModelNode(
501
693
  model_id, model_scope, model_def,
502
694
  frozenset(parameter_ids), frozenset(input_ids),
503
- explicit_deps=explicit_deps, bundle=model_id.namespace)
695
+ explicit_deps=explicit_deps, bundle=model_id.namespace,
696
+ storage_access=storage_access)
504
697
 
505
698
  model_result_id = NodeId(f"{model_name}:RESULT", namespace)
506
699
  model_result_node = RunModelResultNode(model_result_id, model_id)
@@ -520,9 +713,9 @@ class GraphBuilder:
520
713
  # Assemble a graph to include the model and its outputs
521
714
  return GraphSection(nodes, inputs={*parameter_ids, *input_ids}, outputs=output_ids, must_run=[model_result_id])
522
715
 
523
- @classmethod
524
716
  def build_flow(
525
- cls, job_config: config.JobConfig, namespace: NodeNamespace,
717
+ self, namespace: NodeNamespace,
718
+ job_def: meta.JobDefinition,
526
719
  flow_def: meta.FlowDefinition,
527
720
  explicit_deps: tp.Optional[tp.List[NodeId]] = None) \
528
721
  -> GraphSection:
@@ -555,11 +748,11 @@ class GraphBuilder:
555
748
 
556
749
  node_name, node = reachable_nodes.popitem()
557
750
 
558
- sub_section = cls.build_flow_node(
559
- job_config, namespace, target_edges,
751
+ sub_section = self.build_flow_node(
752
+ namespace, job_def, target_edges,
560
753
  node_name, node, explicit_deps)
561
754
 
562
- graph_section = cls._join_sections(graph_section, sub_section, allow_partial_inputs=True)
755
+ graph_section = self._join_sections(graph_section, sub_section, allow_partial_inputs=True)
563
756
 
564
757
  if node.nodeType != meta.FlowNodeType.OUTPUT_NODE:
565
758
 
@@ -579,20 +772,18 @@ class GraphBuilder:
579
772
  missing_targets = [edge.target for node in remaining_edges_by_target.values() for edge in node]
580
773
  missing_target_names = [f"{t.node}.{t.socket}" if t.socket else t.node for t in missing_targets]
581
774
  missing_nodes = list(map(lambda n: NodeId(n, namespace), missing_target_names))
582
- cls._invalid_graph_error(missing_nodes)
775
+ self._invalid_graph_error(missing_nodes)
583
776
 
584
777
  return graph_section
585
778
 
586
- @classmethod
587
779
  def build_flow_node(
588
- cls, job_config: config.JobConfig, namespace: NodeNamespace,
780
+ self, namespace: NodeNamespace,
781
+ job_def: meta.JobDefinition,
589
782
  target_edges: tp.Dict[meta.FlowSocket, meta.FlowEdge],
590
783
  node_name: str, node: meta.FlowNode,
591
784
  explicit_deps: tp.Optional[tp.List[NodeId]] = None) \
592
785
  -> GraphSection:
593
786
 
594
- flow_job = job_config.job.runFlow
595
-
596
787
  def socket_key(socket):
597
788
  return f"{socket.node}.{socket.socket}" if socket.socket else socket.node
598
789
 
@@ -605,7 +796,7 @@ class GraphBuilder:
605
796
  edge = target_edges.get(socket)
606
797
  # Report missing edges as a job consistency error (this might happen sometimes in dev mode)
607
798
  if edge is None:
608
- raise _ex.EJobValidation(f"Inconsistent flow: Socket [{socket}] is not connected")
799
+ self._error(_ex.EJobValidation(f"Inconsistent flow: Socket [{socket}] is not connected"))
609
800
  return socket_id(edge.source.node, edge.source.socket, result_type)
610
801
 
611
802
  if node.nodeType == meta.FlowNodeType.PARAMETER_NODE:
@@ -628,27 +819,28 @@ class GraphBuilder:
628
819
  push_mapping = {**input_mapping, **param_mapping}
629
820
  pop_mapping = output_mapping
630
821
 
631
- model_selector = flow_job.models.get(node_name)
632
- model_obj = _util.get_job_resource(model_selector, job_config)
822
+ model_selector = job_def.runFlow.models.get(node_name)
823
+ model_obj = _util.get_job_resource(model_selector, self._job_config)
633
824
 
634
825
  # Missing models in the job config is a job consistency error
635
826
  if model_obj is None or model_obj.objectType != meta.ObjectType.MODEL:
636
- raise _ex.EJobValidation(f"No model was provided for flow node [{node_name}]")
827
+ self._error(_ex.EJobValidation(f"No model was provided for flow node [{node_name}]"))
637
828
 
638
829
  # Explicit check for model compatibility - report an error now, do not try build_model()
639
- cls.check_model_compatibility(model_selector, model_obj.model, node_name, node)
830
+ self.check_model_compatibility(model_selector, model_obj.model, node_name, node)
831
+ self.check_model_type(job_def, model_obj.model)
640
832
 
641
- return cls.build_model_or_flow_with_context(
642
- job_config, namespace, node_name, model_obj,
643
- push_mapping, pop_mapping, explicit_deps)
833
+ return self.build_model_or_flow_with_context(
834
+ namespace, node_name,
835
+ job_def, model_obj,
836
+ push_mapping, pop_mapping,
837
+ explicit_deps)
644
838
 
645
- # Missing / invalid node type - should be caught in static validation
646
- raise _ex.ETracInternal(f"Flow node [{node_name}] has invalid node type [{node.nodeType}]")
839
+ self._error(_ex.EJobValidation(f"Flow node [{node_name}] has invalid node type [{node.nodeType}]"))
647
840
 
648
- @classmethod
649
841
  def check_model_compatibility(
650
- cls, model_selector: meta.TagSelector, model_def: meta.ModelDefinition,
651
- node_name: str, flow_node: meta.FlowNode):
842
+ self, model_selector: meta.TagSelector,
843
+ model_def: meta.ModelDefinition, node_name: str, flow_node: meta.FlowNode):
652
844
 
653
845
  model_params = list(sorted(model_def.parameters.keys()))
654
846
  model_inputs = list(sorted(model_def.inputs.keys()))
@@ -660,7 +852,21 @@ class GraphBuilder:
660
852
 
661
853
  if model_params != node_params or model_inputs != node_inputs or model_outputs != node_outputs:
662
854
  model_key = _util.object_key(model_selector)
663
- raise _ex.EJobValidation(f"Incompatible model for flow node [{node_name}] (Model: [{model_key}])")
855
+ self._error(_ex.EJobValidation(f"Incompatible model for flow node [{node_name}] (Model: [{model_key}])"))
856
+
857
+ def check_model_type(self, job_def: meta.JobDefinition, model_def: meta.ModelDefinition):
858
+
859
+ if job_def.jobType == meta.JobType.IMPORT_DATA:
860
+ allowed_model_types = [meta.ModelType.DATA_IMPORT_MODEL]
861
+ elif job_def.jobType == meta.JobType.EXPORT_DATA:
862
+ allowed_model_types = [meta.ModelType.DATA_EXPORT_MODEL]
863
+ else:
864
+ allowed_model_types = [meta.ModelType.STANDARD_MODEL]
865
+
866
+ if model_def.modelType not in allowed_model_types:
867
+ job_type = job_def.jobType.name
868
+ model_type = model_def.modelType.name
869
+ self._error(_ex.EJobValidation(f"Job type [{job_type}] cannot use model type [{model_type}]"))
664
870
 
665
871
  @staticmethod
666
872
  def build_context_push(
@@ -722,8 +928,7 @@ class GraphBuilder:
722
928
  outputs={*pop_mapping.values()},
723
929
  must_run=[pop_id])
724
930
 
725
- @classmethod
726
- def _join_sections(cls, *sections: GraphSection, allow_partial_inputs: bool = False):
931
+ def _join_sections(self, *sections: GraphSection, allow_partial_inputs: bool = False):
727
932
 
728
933
  n_sections = len(sections)
729
934
  first_section = sections[0]
@@ -745,7 +950,7 @@ class GraphBuilder:
745
950
  if allow_partial_inputs:
746
951
  inputs.update(requirements_not_met)
747
952
  else:
748
- cls._invalid_graph_error(requirements_not_met)
953
+ self._invalid_graph_error(requirements_not_met)
749
954
 
750
955
  nodes.update(current_section.nodes)
751
956
 
@@ -754,13 +959,12 @@ class GraphBuilder:
754
959
 
755
960
  return GraphSection(nodes, inputs, last_section.outputs, must_run)
756
961
 
757
- @classmethod
758
- def _invalid_graph_error(cls, missing_dependencies: tp.Iterable[NodeId]):
962
+ def _invalid_graph_error(self, missing_dependencies: tp.Iterable[NodeId]):
759
963
 
760
- missing_ids = ", ".join(map(cls._missing_item_display_name, missing_dependencies))
761
- message = f"Invalid job config: The execution graph has unsatisfied dependencies: [{missing_ids}]"
964
+ missing_ids = ", ".join(map(self._missing_item_display_name, missing_dependencies))
965
+ message = f"The execution graph has unsatisfied dependencies: [{missing_ids}]"
762
966
 
763
- raise _ex.EJobValidation(message)
967
+ self._error(_ex.EJobValidation(message))
764
968
 
765
969
  @classmethod
766
970
  def _missing_item_display_name(cls, node_id: NodeId):
@@ -775,3 +979,7 @@ class GraphBuilder:
775
979
  return node_id.name
776
980
  else:
777
981
  return f"{node_id.name} / {', '.join(components[:-1])}"
982
+
983
+ def _error(self, error: Exception):
984
+
985
+ self._errors.append(error)