tracdap-runtime 0.6.5__py3-none-any.whl → 0.6.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tracdap/rt/_exec/context.py +272 -105
- tracdap/rt/_exec/dev_mode.py +231 -138
- tracdap/rt/_exec/engine.py +217 -59
- tracdap/rt/_exec/functions.py +25 -1
- tracdap/rt/_exec/graph.py +9 -0
- tracdap/rt/_exec/graph_builder.py +295 -198
- tracdap/rt/_exec/runtime.py +7 -5
- tracdap/rt/_impl/config_parser.py +11 -4
- tracdap/rt/_impl/data.py +278 -167
- tracdap/rt/_impl/ext/__init__.py +13 -0
- tracdap/rt/_impl/ext/sql.py +116 -0
- tracdap/rt/_impl/ext/storage.py +57 -0
- tracdap/rt/_impl/grpc/tracdap/metadata/job_pb2.py +62 -54
- tracdap/rt/_impl/grpc/tracdap/metadata/job_pb2.pyi +37 -2
- tracdap/rt/_impl/static_api.py +24 -11
- tracdap/rt/_impl/storage.py +2 -2
- tracdap/rt/_impl/util.py +10 -0
- tracdap/rt/_impl/validation.py +66 -13
- tracdap/rt/_plugins/storage_sql.py +417 -0
- tracdap/rt/_plugins/storage_sql_dialects.py +117 -0
- tracdap/rt/_version.py +1 -1
- tracdap/rt/api/experimental.py +79 -32
- tracdap/rt/api/hook.py +10 -0
- tracdap/rt/metadata/__init__.py +4 -0
- tracdap/rt/metadata/job.py +45 -0
- {tracdap_runtime-0.6.5.dist-info → tracdap_runtime-0.6.6.dist-info}/METADATA +3 -1
- {tracdap_runtime-0.6.5.dist-info → tracdap_runtime-0.6.6.dist-info}/RECORD +30 -25
- {tracdap_runtime-0.6.5.dist-info → tracdap_runtime-0.6.6.dist-info}/WHEEL +1 -1
- {tracdap_runtime-0.6.5.dist-info → tracdap_runtime-0.6.6.dist-info}/LICENSE +0 -0
- {tracdap_runtime-0.6.5.dist-info → tracdap_runtime-0.6.6.dist-info}/top_level.txt +0 -0
@@ -22,75 +22,111 @@ from .graph import *
|
|
22
22
|
|
23
23
|
class GraphBuilder:
|
24
24
|
|
25
|
-
|
26
|
-
|
27
|
-
|
25
|
+
__JOB_DETAILS = tp.TypeVar(
|
26
|
+
"__JOB_DETAILS",
|
27
|
+
meta.RunModelJob,
|
28
|
+
meta.RunFlowJob,
|
29
|
+
meta.ImportModelJob,
|
30
|
+
meta.ImportDataJob,
|
31
|
+
meta.ExportDataJob)
|
28
32
|
|
29
|
-
|
30
|
-
def build_job(
|
31
|
-
cls, job_config: config.JobConfig,
|
32
|
-
result_spec: JobResultSpec) -> Graph:
|
33
|
+
__JOB_BUILD_FUNC = tp.Callable[[meta.JobDefinition, NodeId], GraphSection]
|
33
34
|
|
34
|
-
|
35
|
-
return cls.build_standard_job(job_config, result_spec, cls.build_import_model_job)
|
35
|
+
def __init__(self, job_config: config.JobConfig, result_spec: JobResultSpec):
|
36
36
|
|
37
|
-
|
38
|
-
|
37
|
+
self._job_config = job_config
|
38
|
+
self._result_spec = result_spec
|
39
39
|
|
40
|
-
|
41
|
-
|
40
|
+
self._job_key = _util.object_key(job_config.jobId)
|
41
|
+
self._job_namespace = NodeNamespace(self._job_key)
|
42
42
|
|
43
|
-
|
44
|
-
return cls.build_standard_job(job_config, result_spec, cls.build_import_export_data_job)
|
43
|
+
self._errors = []
|
45
44
|
|
46
|
-
|
45
|
+
def _child_builder(self, job_id: meta.TagHeader) -> "GraphBuilder":
|
47
46
|
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
build_func: __JOB_BUILD_FUNC):
|
47
|
+
builder = GraphBuilder(self._job_config, JobResultSpec(save_result=False))
|
48
|
+
builder._job_key = _util.object_key(job_id)
|
49
|
+
builder._job_namespace = NodeNamespace(builder._job_key)
|
52
50
|
|
53
|
-
|
51
|
+
return builder
|
52
|
+
|
53
|
+
def build_job(self, job_def: meta.JobDefinition,) -> Graph:
|
54
|
+
|
55
|
+
try:
|
56
|
+
|
57
|
+
if job_def.jobType == meta.JobType.IMPORT_MODEL:
|
58
|
+
return self.build_standard_job(job_def, self.build_import_model_job)
|
59
|
+
|
60
|
+
if job_def.jobType == meta.JobType.RUN_MODEL:
|
61
|
+
return self.build_standard_job(job_def, self.build_run_model_job)
|
62
|
+
|
63
|
+
if job_def.jobType == meta.JobType.RUN_FLOW:
|
64
|
+
return self.build_standard_job(job_def, self.build_run_flow_job)
|
65
|
+
|
66
|
+
if job_def.jobType in [meta.JobType.IMPORT_DATA, meta.JobType.EXPORT_DATA]:
|
67
|
+
return self.build_standard_job(job_def, self.build_import_export_data_job)
|
68
|
+
|
69
|
+
if job_def.jobType == meta.JobType.JOB_GROUP:
|
70
|
+
return self.build_standard_job(job_def, self.build_job_group)
|
71
|
+
|
72
|
+
self._error(_ex.EJobValidation(f"Job type [{job_def.jobType.name}] is not supported yet"))
|
54
73
|
|
55
|
-
|
56
|
-
job_namespace = NodeNamespace(job_key)
|
74
|
+
except Exception as e:
|
57
75
|
|
58
|
-
|
59
|
-
|
76
|
+
# If there are recorded, errors, assume unhandled exceptions are a result of those
|
77
|
+
# Only report the recorded errors, to reduce noise
|
78
|
+
if any(self._errors):
|
79
|
+
pass
|
80
|
+
|
81
|
+
# If no errors are recorded, an exception here would be a bug
|
82
|
+
raise _ex.ETracInternal(f"Unexpected error preparing the job execution graph") from e
|
83
|
+
|
84
|
+
finally:
|
85
|
+
|
86
|
+
if any(self._errors):
|
87
|
+
|
88
|
+
if len(self._errors) == 1:
|
89
|
+
raise self._errors[0]
|
90
|
+
else:
|
91
|
+
err_text = "\n".join(map(str, self._errors))
|
92
|
+
raise _ex.EJobValidation("Invalid job configuration\n" + err_text)
|
93
|
+
|
94
|
+
def build_standard_job(self, job_def: meta.JobDefinition, build_func: __JOB_BUILD_FUNC):
|
95
|
+
|
96
|
+
# Set up the job context
|
97
|
+
|
98
|
+
push_id = NodeId("trac_job_push", self._job_namespace, Bundle[tp.Any])
|
99
|
+
push_node = ContextPushNode(push_id, self._job_namespace)
|
60
100
|
push_section = GraphSection({push_id: push_node}, must_run=[push_id])
|
61
101
|
|
62
102
|
# Build the execution graphs for the main job and results recording
|
63
103
|
|
64
|
-
main_section = build_func(
|
65
|
-
main_result_id = NodeId.of("
|
104
|
+
main_section = build_func(job_def, push_id)
|
105
|
+
main_result_id = NodeId.of("trac_job_result", self._job_namespace, config.JobResult)
|
66
106
|
|
67
107
|
# Clean up the job context
|
68
108
|
|
69
|
-
global_result_id = NodeId.of(
|
109
|
+
global_result_id = NodeId.of(self._job_key, NodeNamespace.root(), config.JobResult)
|
70
110
|
|
71
|
-
pop_id = NodeId("trac_job_pop",
|
111
|
+
pop_id = NodeId("trac_job_pop", self._job_namespace, Bundle[tp.Any])
|
72
112
|
pop_mapping = {main_result_id: global_result_id}
|
73
113
|
|
74
114
|
pop_node = ContextPopNode(
|
75
|
-
pop_id,
|
115
|
+
pop_id, self._job_namespace, pop_mapping,
|
76
116
|
explicit_deps=main_section.must_run,
|
77
117
|
bundle=NodeNamespace.root())
|
78
118
|
|
79
|
-
global_result_node = BundleItemNode(global_result_id, pop_id,
|
119
|
+
global_result_node = BundleItemNode(global_result_id, pop_id, self._job_key)
|
80
120
|
|
81
121
|
pop_section = GraphSection({
|
82
122
|
pop_id: pop_node,
|
83
123
|
global_result_id: global_result_node})
|
84
124
|
|
85
|
-
job =
|
125
|
+
job = self._join_sections(push_section, main_section, pop_section)
|
86
126
|
|
87
127
|
return Graph(job.nodes, global_result_id)
|
88
128
|
|
89
|
-
|
90
|
-
def build_import_model_job(
|
91
|
-
cls, job_config: config.JobConfig, result_spec: JobResultSpec,
|
92
|
-
job_namespace: NodeNamespace, job_push_id: NodeId) \
|
93
|
-
-> GraphSection:
|
129
|
+
def build_import_model_job(self, job_def: meta.JobDefinition, job_push_id: NodeId) -> GraphSection:
|
94
130
|
|
95
131
|
# Main section: run the model import
|
96
132
|
|
@@ -98,82 +134,142 @@ class GraphBuilder:
|
|
98
134
|
new_model_id = _util.new_object_id(meta.ObjectType.MODEL)
|
99
135
|
new_model_key = _util.object_key(new_model_id)
|
100
136
|
|
101
|
-
model_scope =
|
102
|
-
import_details =
|
137
|
+
model_scope = self._job_key
|
138
|
+
import_details = job_def.importModel
|
103
139
|
|
104
|
-
import_id = NodeId.of("trac_import_model",
|
140
|
+
import_id = NodeId.of("trac_import_model", self._job_namespace, meta.ObjectDefinition)
|
105
141
|
import_node = ImportModelNode(import_id, model_scope, import_details, explicit_deps=[job_push_id])
|
106
142
|
|
107
143
|
main_section = GraphSection(nodes={import_id: import_node})
|
108
144
|
|
109
145
|
# Build job-level metadata outputs
|
110
146
|
|
111
|
-
result_section =
|
112
|
-
job_config, job_namespace, result_spec,
|
147
|
+
result_section = self.build_job_results(
|
113
148
|
objects={new_model_key: import_id},
|
114
149
|
explicit_deps=[job_push_id, *main_section.must_run])
|
115
150
|
|
116
|
-
return
|
151
|
+
return self._join_sections(main_section, result_section)
|
117
152
|
|
118
|
-
|
119
|
-
def build_import_export_data_job(
|
120
|
-
cls, job_config: config.JobConfig, result_spec: JobResultSpec,
|
121
|
-
job_namespace: NodeNamespace, job_push_id: NodeId) \
|
122
|
-
-> GraphSection:
|
153
|
+
def build_import_export_data_job(self, job_def: meta.JobDefinition, job_push_id: NodeId) -> GraphSection:
|
123
154
|
|
124
155
|
# TODO: These are processed as regular calculation jobs for now
|
125
156
|
# That might be ok, but is worth reviewing
|
126
157
|
|
127
|
-
if
|
128
|
-
|
158
|
+
if job_def.jobType == meta.JobType.IMPORT_DATA:
|
159
|
+
job_details = job_def.importData
|
129
160
|
else:
|
130
|
-
|
161
|
+
job_details = job_def.exportData
|
131
162
|
|
132
|
-
target_selector =
|
133
|
-
target_obj = _util.get_job_resource(target_selector,
|
163
|
+
target_selector = job_details.model
|
164
|
+
target_obj = _util.get_job_resource(target_selector, self._job_config)
|
134
165
|
target_def = target_obj.model
|
135
166
|
|
136
|
-
return
|
137
|
-
|
138
|
-
target_selector, target_def,
|
167
|
+
return self.build_calculation_job(
|
168
|
+
job_def, job_push_id,
|
169
|
+
target_selector, target_def,
|
170
|
+
job_details)
|
139
171
|
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
job_namespace: NodeNamespace, job_push_id: NodeId) \
|
144
|
-
-> GraphSection:
|
172
|
+
def build_run_model_job(self, job_def: meta.JobDefinition, job_push_id: NodeId) -> GraphSection:
|
173
|
+
|
174
|
+
job_details = job_def.runModel
|
145
175
|
|
146
|
-
target_selector =
|
147
|
-
target_obj = _util.get_job_resource(target_selector,
|
176
|
+
target_selector = job_details.model
|
177
|
+
target_obj = _util.get_job_resource(target_selector, self._job_config)
|
148
178
|
target_def = target_obj.model
|
149
|
-
job_def = job_config.job.runModel
|
150
179
|
|
151
|
-
return
|
152
|
-
|
153
|
-
target_selector, target_def,
|
180
|
+
return self.build_calculation_job(
|
181
|
+
job_def, job_push_id,
|
182
|
+
target_selector, target_def,
|
183
|
+
job_details)
|
154
184
|
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
job_namespace: NodeNamespace, job_push_id: NodeId) \
|
159
|
-
-> GraphSection:
|
185
|
+
def build_run_flow_job(self, job_def: meta.JobDefinition, job_push_id: NodeId) -> GraphSection:
|
186
|
+
|
187
|
+
job_details = job_def.runFlow
|
160
188
|
|
161
|
-
target_selector =
|
162
|
-
target_obj = _util.get_job_resource(target_selector,
|
189
|
+
target_selector = job_details.flow
|
190
|
+
target_obj = _util.get_job_resource(target_selector, self._job_config)
|
163
191
|
target_def = target_obj.flow
|
164
|
-
job_def = job_config.job.runFlow
|
165
192
|
|
166
|
-
return
|
167
|
-
|
168
|
-
target_selector, target_def,
|
193
|
+
return self.build_calculation_job(
|
194
|
+
job_def, job_push_id,
|
195
|
+
target_selector, target_def,
|
196
|
+
job_details)
|
197
|
+
|
198
|
+
def build_job_group(self, job_def: meta.JobDefinition, job_push_id: NodeId) -> GraphSection:
|
199
|
+
|
200
|
+
job_group = job_def.jobGroup
|
201
|
+
|
202
|
+
if job_group.jobGroupType == meta.JobGroupType.SEQUENTIAL_JOB_GROUP:
|
203
|
+
return self.build_sequential_job_group(job_group, job_push_id)
|
204
|
+
|
205
|
+
if job_group.jobGroupType == meta.JobGroupType.PARALLEL_JOB_GROUP:
|
206
|
+
return self.build_parallel_job_group(job_group, job_push_id)
|
207
|
+
|
208
|
+
else:
|
209
|
+
self._error(_ex.EJobValidation(f"Job group type [{job_group.jobGroupType.name}] is not supported yet"))
|
210
|
+
return GraphSection(dict(), inputs={job_push_id})
|
211
|
+
|
212
|
+
def build_sequential_job_group(self, job_group: meta.JobGroup, job_push_id: NodeId) -> GraphSection:
|
213
|
+
|
214
|
+
nodes = dict()
|
215
|
+
prior_id = job_push_id
|
216
|
+
|
217
|
+
for child_def in job_group.sequential.jobs:
|
218
|
+
|
219
|
+
child_node = self.build_child_job(child_def, explicit_deps=[prior_id])
|
220
|
+
nodes[child_node.id] = child_node
|
221
|
+
|
222
|
+
prior_id = child_node.id
|
223
|
+
|
224
|
+
# No real results from job groups yet (they cannot be executed from the platform)
|
225
|
+
job_result = cfg.JobResult()
|
226
|
+
result_id = NodeId.of("trac_job_result", self._job_namespace, cfg.JobResult)
|
227
|
+
result_node = StaticValueNode(result_id, job_result, explicit_deps=[prior_id])
|
228
|
+
nodes[result_id] = result_node
|
229
|
+
|
230
|
+
return GraphSection(nodes, inputs={job_push_id}, outputs={result_id})
|
231
|
+
|
232
|
+
def build_parallel_job_group(self, job_group: meta.JobGroup, job_push_id: NodeId) -> GraphSection:
|
233
|
+
|
234
|
+
nodes = dict()
|
235
|
+
parallel_ids = [job_push_id]
|
236
|
+
|
237
|
+
for child_def in job_group.parallel.jobs:
|
238
|
+
|
239
|
+
child_node = self.build_child_job(child_def, explicit_deps=[job_push_id])
|
240
|
+
nodes[child_node.id] = child_node
|
241
|
+
|
242
|
+
parallel_ids.append(child_node.id)
|
243
|
+
|
244
|
+
# No real results from job groups yet (they cannot be executed from the platform)
|
245
|
+
job_result = cfg.JobResult()
|
246
|
+
result_id = NodeId.of("trac_job_result", self._job_namespace, cfg.JobResult)
|
247
|
+
result_node = StaticValueNode(result_id, job_result, explicit_deps=parallel_ids)
|
248
|
+
nodes[result_id] = result_node
|
249
|
+
|
250
|
+
return GraphSection(nodes, inputs={job_push_id}, outputs={result_id})
|
251
|
+
|
252
|
+
def build_child_job(self, child_job_def: meta.JobDefinition, explicit_deps) -> Node[config.JobResult]:
|
253
|
+
|
254
|
+
child_job_id = _util.new_object_id(meta.ObjectType.JOB)
|
255
|
+
|
256
|
+
child_builder = self._child_builder(child_job_id)
|
257
|
+
child_graph = child_builder.build_job(child_job_def)
|
258
|
+
|
259
|
+
child_node_name = _util.object_key(child_job_id)
|
260
|
+
child_node_id = NodeId.of(child_node_name, self._job_namespace, cfg.JobResult)
|
261
|
+
|
262
|
+
child_node = ChildJobNode(
|
263
|
+
child_node_id, child_job_id, child_job_def,
|
264
|
+
child_graph, explicit_deps)
|
265
|
+
|
266
|
+
return child_node
|
169
267
|
|
170
|
-
@classmethod
|
171
268
|
def build_calculation_job(
|
172
|
-
|
173
|
-
job_namespace: NodeNamespace, job_push_id: NodeId,
|
269
|
+
self, job_def: meta.JobDefinition, job_push_id: NodeId,
|
174
270
|
target_selector: meta.TagSelector,
|
175
271
|
target_def: tp.Union[meta.ModelDefinition, meta.FlowDefinition],
|
176
|
-
|
272
|
+
job_details: __JOB_DETAILS) \
|
177
273
|
-> GraphSection:
|
178
274
|
|
179
275
|
# The main execution graph can run directly in the job context, no need to do a context push
|
@@ -185,29 +281,30 @@ class GraphBuilder:
|
|
185
281
|
required_inputs = target_def.inputs
|
186
282
|
required_outputs = target_def.outputs
|
187
283
|
|
188
|
-
provided_params =
|
189
|
-
provided_inputs =
|
190
|
-
provided_outputs =
|
284
|
+
provided_params = job_details.parameters
|
285
|
+
provided_inputs = job_details.inputs
|
286
|
+
provided_outputs = job_details.outputs
|
191
287
|
|
192
|
-
params_section =
|
193
|
-
|
288
|
+
params_section = self.build_job_parameters(
|
289
|
+
required_params, provided_params,
|
194
290
|
explicit_deps=[job_push_id])
|
195
291
|
|
196
|
-
input_section =
|
197
|
-
|
292
|
+
input_section = self.build_job_inputs(
|
293
|
+
required_inputs, provided_inputs,
|
198
294
|
explicit_deps=[job_push_id])
|
199
295
|
|
200
|
-
|
296
|
+
exec_namespace = self._job_namespace
|
297
|
+
exec_obj = _util.get_job_resource(target_selector, self._job_config)
|
201
298
|
|
202
|
-
exec_section =
|
203
|
-
|
299
|
+
exec_section = self.build_model_or_flow(
|
300
|
+
exec_namespace, job_def, exec_obj,
|
204
301
|
explicit_deps=[job_push_id])
|
205
302
|
|
206
|
-
output_section =
|
207
|
-
|
303
|
+
output_section = self.build_job_outputs(
|
304
|
+
required_outputs, provided_outputs,
|
208
305
|
explicit_deps=[job_push_id])
|
209
306
|
|
210
|
-
main_section =
|
307
|
+
main_section = self._join_sections(params_section, input_section, exec_section, output_section)
|
211
308
|
|
212
309
|
# Build job-level metadata outputs
|
213
310
|
|
@@ -215,16 +312,14 @@ class GraphBuilder:
|
|
215
312
|
nid for nid, n in main_section.nodes.items()
|
216
313
|
if isinstance(n, DataResultNode))
|
217
314
|
|
218
|
-
result_section =
|
219
|
-
|
220
|
-
result_spec, bundles=data_result_ids,
|
315
|
+
result_section = self.build_job_results(
|
316
|
+
bundles=data_result_ids,
|
221
317
|
explicit_deps=[job_push_id, *main_section.must_run])
|
222
318
|
|
223
|
-
return
|
319
|
+
return self._join_sections(main_section, result_section)
|
224
320
|
|
225
|
-
@classmethod
|
226
321
|
def build_job_parameters(
|
227
|
-
|
322
|
+
self,
|
228
323
|
required_params: tp.Dict[str, meta.ModelParameter],
|
229
324
|
supplied_params: tp.Dict[str, meta.Value],
|
230
325
|
explicit_deps: tp.Optional[tp.List[NodeId]] = None) \
|
@@ -240,18 +335,18 @@ class GraphBuilder:
|
|
240
335
|
if param_schema.defaultValue is not None:
|
241
336
|
param_def = param_schema.defaultValue
|
242
337
|
else:
|
243
|
-
|
338
|
+
self._error(_ex.EJobValidation(f"Missing required parameter: [{param_name}]"))
|
339
|
+
continue
|
244
340
|
|
245
|
-
param_id = NodeId(param_name,
|
341
|
+
param_id = NodeId(param_name, self._job_namespace, meta.Value)
|
246
342
|
param_node = StaticValueNode(param_id, param_def, explicit_deps=explicit_deps)
|
247
343
|
|
248
344
|
nodes[param_id] = param_node
|
249
345
|
|
250
346
|
return GraphSection(nodes, outputs=set(nodes.keys()), must_run=list(nodes.keys()))
|
251
347
|
|
252
|
-
@classmethod
|
253
348
|
def build_job_inputs(
|
254
|
-
|
349
|
+
self,
|
255
350
|
required_inputs: tp.Dict[str, meta.ModelInputSchema],
|
256
351
|
supplied_inputs: tp.Dict[str, meta.TagSelector],
|
257
352
|
explicit_deps: tp.Optional[tp.List[NodeId]] = None) \
|
@@ -267,20 +362,21 @@ class GraphBuilder:
|
|
267
362
|
|
268
363
|
if data_selector is None:
|
269
364
|
if input_schema.optional:
|
270
|
-
data_view_id = NodeId.of(input_name,
|
365
|
+
data_view_id = NodeId.of(input_name, self._job_namespace, _data.DataView)
|
271
366
|
nodes[data_view_id] = StaticValueNode(data_view_id, _data.DataView.create_empty())
|
272
367
|
outputs.add(data_view_id)
|
273
368
|
continue
|
274
369
|
else:
|
275
|
-
|
370
|
+
self._error(_ex.EJobValidation(f"Missing required input: [{input_name}]"))
|
371
|
+
continue
|
276
372
|
|
277
373
|
# Build a data spec using metadata from the job config
|
278
374
|
# For now we are always loading the root part, snap 0, delta 0
|
279
|
-
data_def = _util.get_job_resource(data_selector,
|
280
|
-
storage_def = _util.get_job_resource(data_def.storageId,
|
375
|
+
data_def = _util.get_job_resource(data_selector, self._job_config).data
|
376
|
+
storage_def = _util.get_job_resource(data_def.storageId, self._job_config).storage
|
281
377
|
|
282
378
|
if data_def.schemaId:
|
283
|
-
schema_def = _util.get_job_resource(data_def.schemaId,
|
379
|
+
schema_def = _util.get_job_resource(data_def.schemaId, self._job_config).schema
|
284
380
|
else:
|
285
381
|
schema_def = data_def.schema
|
286
382
|
|
@@ -289,16 +385,16 @@ class GraphBuilder:
|
|
289
385
|
data_spec = _data.DataSpec(data_item, data_def, storage_def, schema_def)
|
290
386
|
|
291
387
|
# Data spec node is static, using the assembled data spec
|
292
|
-
data_spec_id = NodeId.of(f"{input_name}:SPEC",
|
388
|
+
data_spec_id = NodeId.of(f"{input_name}:SPEC", self._job_namespace, _data.DataSpec)
|
293
389
|
data_spec_node = StaticValueNode(data_spec_id, data_spec, explicit_deps=explicit_deps)
|
294
390
|
|
295
391
|
# Physical load of data items from disk
|
296
392
|
# Currently one item per input, since inputs are single part/delta
|
297
|
-
data_load_id = NodeId.of(f"{input_name}:LOAD",
|
393
|
+
data_load_id = NodeId.of(f"{input_name}:LOAD", self._job_namespace, _data.DataItem)
|
298
394
|
data_load_node = LoadDataNode(data_load_id, data_spec_id, explicit_deps=explicit_deps)
|
299
395
|
|
300
396
|
# Input views assembled by mapping one root part to each view
|
301
|
-
data_view_id = NodeId.of(input_name,
|
397
|
+
data_view_id = NodeId.of(input_name, self._job_namespace, _data.DataView)
|
302
398
|
data_view_node = DataViewNode(data_view_id, schema_def, data_load_id)
|
303
399
|
|
304
400
|
nodes[data_spec_id] = data_spec_node
|
@@ -311,9 +407,8 @@ class GraphBuilder:
|
|
311
407
|
|
312
408
|
return GraphSection(nodes, outputs=outputs, must_run=must_run)
|
313
409
|
|
314
|
-
@classmethod
|
315
410
|
def build_job_outputs(
|
316
|
-
|
411
|
+
self,
|
317
412
|
required_outputs: tp.Dict[str, meta.ModelOutputSchema],
|
318
413
|
supplied_outputs: tp.Dict[str, meta.TagSelector],
|
319
414
|
explicit_deps: tp.Optional[tp.List[NodeId]] = None) \
|
@@ -329,25 +424,27 @@ class GraphBuilder:
|
|
329
424
|
if data_selector is None:
|
330
425
|
if output_schema.optional:
|
331
426
|
optional_info = "(configuration is required for all optional outputs, in case they are produced)"
|
332
|
-
|
427
|
+
self._error(_ex.EJobValidation(f"Missing optional output: [{output_name}] {optional_info}"))
|
428
|
+
continue
|
333
429
|
else:
|
334
|
-
|
430
|
+
self._error(_ex.EJobValidation(f"Missing required output: [{output_name}]"))
|
431
|
+
continue
|
335
432
|
|
336
433
|
# Output data view must already exist in the namespace
|
337
|
-
data_view_id = NodeId.of(output_name,
|
338
|
-
data_spec_id = NodeId.of(f"{output_name}:SPEC",
|
434
|
+
data_view_id = NodeId.of(output_name, self._job_namespace, _data.DataView)
|
435
|
+
data_spec_id = NodeId.of(f"{output_name}:SPEC", self._job_namespace, _data.DataSpec)
|
339
436
|
|
340
|
-
data_obj = _util.get_job_resource(data_selector,
|
437
|
+
data_obj = _util.get_job_resource(data_selector, self._job_config, optional=True)
|
341
438
|
|
342
439
|
if data_obj is not None:
|
343
440
|
|
344
441
|
# If data def for the output has been built in advance, use a static data spec
|
345
442
|
|
346
443
|
data_def = data_obj.data
|
347
|
-
storage_def = _util.get_job_resource(data_def.storageId,
|
444
|
+
storage_def = _util.get_job_resource(data_def.storageId, self._job_config).storage
|
348
445
|
|
349
446
|
if data_def.schemaId:
|
350
|
-
schema_def = _util.get_job_resource(data_def.schemaId,
|
447
|
+
schema_def = _util.get_job_resource(data_def.schemaId, self._job_config).schema
|
351
448
|
else:
|
352
449
|
schema_def = data_def.schema
|
353
450
|
|
@@ -366,28 +463,28 @@ class GraphBuilder:
|
|
366
463
|
# Dynamic data def will always use an embedded schema (this is no ID for an external schema)
|
367
464
|
|
368
465
|
data_key = output_name + ":DATA"
|
369
|
-
data_id =
|
466
|
+
data_id = self._job_config.resultMapping[data_key]
|
370
467
|
storage_key = output_name + ":STORAGE"
|
371
|
-
storage_id =
|
468
|
+
storage_id = self._job_config.resultMapping[storage_key]
|
372
469
|
|
373
470
|
data_spec_node = DynamicDataSpecNode(
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
471
|
+
data_spec_id, data_view_id,
|
472
|
+
data_id, storage_id,
|
473
|
+
prior_data_spec=None,
|
474
|
+
explicit_deps=explicit_deps)
|
378
475
|
|
379
476
|
output_data_key = _util.object_key(data_id)
|
380
477
|
output_storage_key = _util.object_key(storage_id)
|
381
478
|
|
382
479
|
# Map one data item from each view, since outputs are single part/delta
|
383
|
-
data_item_id = NodeId(f"{output_name}:ITEM",
|
480
|
+
data_item_id = NodeId(f"{output_name}:ITEM", self._job_namespace, _data.DataItem)
|
384
481
|
data_item_node = DataItemNode(data_item_id, data_view_id)
|
385
482
|
|
386
483
|
# Create a physical save operation for the data item
|
387
|
-
data_save_id = NodeId.of(f"{output_name}:SAVE",
|
484
|
+
data_save_id = NodeId.of(f"{output_name}:SAVE", self._job_namespace, None)
|
388
485
|
data_save_node = SaveDataNode(data_save_id, data_spec_id, data_item_id)
|
389
486
|
|
390
|
-
data_result_id = NodeId.of(f"{output_name}:RESULT",
|
487
|
+
data_result_id = NodeId.of(f"{output_name}:RESULT", self._job_namespace, ObjectBundle)
|
391
488
|
data_result_node = DataResultNode(
|
392
489
|
data_result_id, output_name,
|
393
490
|
data_item_id, data_spec_id, data_save_id,
|
@@ -406,6 +503,9 @@ class GraphBuilder:
|
|
406
503
|
@classmethod
|
407
504
|
def build_runtime_outputs(cls, output_names: tp.List[str], job_namespace: NodeNamespace):
|
408
505
|
|
506
|
+
# This method is called dynamically during job execution
|
507
|
+
# So it cannot use stateful information like self._job_config or self._job_namespace
|
508
|
+
|
409
509
|
# TODO: Factor out common logic with regular job outputs (including static / dynamic)
|
410
510
|
|
411
511
|
nodes = {}
|
@@ -462,22 +562,21 @@ class GraphBuilder:
|
|
462
562
|
|
463
563
|
return GraphSection(nodes, inputs=inputs, outputs={runtime_outputs_id})
|
464
564
|
|
465
|
-
@classmethod
|
466
565
|
def build_job_results(
|
467
|
-
|
566
|
+
self,
|
468
567
|
objects: tp.Dict[str, NodeId[meta.ObjectDefinition]] = None,
|
469
568
|
bundles: tp.List[NodeId[ObjectBundle]] = None,
|
470
569
|
explicit_deps: tp.Optional[tp.List[NodeId]] = None) \
|
471
570
|
-> GraphSection:
|
472
571
|
|
473
|
-
build_result_id = NodeId.of("
|
572
|
+
build_result_id = NodeId.of("trac_job_result", self._job_namespace, cfg.JobResult)
|
474
573
|
|
475
574
|
if objects is not None:
|
476
575
|
|
477
576
|
results_inputs = set(objects.values())
|
478
577
|
|
479
578
|
build_result_node = BuildJobResultNode(
|
480
|
-
build_result_id,
|
579
|
+
build_result_id, self._job_config.jobId,
|
481
580
|
outputs = JobOutputs(objects=objects),
|
482
581
|
explicit_deps=explicit_deps)
|
483
582
|
|
@@ -486,17 +585,16 @@ class GraphBuilder:
|
|
486
585
|
results_inputs = set(bundles)
|
487
586
|
|
488
587
|
build_result_node = BuildJobResultNode(
|
489
|
-
build_result_id,
|
588
|
+
build_result_id, self._job_config.jobId,
|
490
589
|
outputs = JobOutputs(bundles=bundles),
|
491
590
|
explicit_deps=explicit_deps)
|
492
591
|
|
493
592
|
else:
|
494
593
|
raise _ex.EUnexpected()
|
495
594
|
|
496
|
-
|
497
|
-
|
498
|
-
|
499
|
-
if result_spec.save_result:
|
595
|
+
if self._result_spec.save_result:
|
596
|
+
save_result_id = NodeId("trac_save_result", self._job_namespace)
|
597
|
+
save_result_node = SaveJobResultNode(save_result_id, build_result_id, self._result_spec)
|
500
598
|
result_nodes = {build_result_id: build_result_node, save_result_id: save_result_node}
|
501
599
|
job_result_id = save_result_id
|
502
600
|
else:
|
@@ -505,10 +603,9 @@ class GraphBuilder:
|
|
505
603
|
|
506
604
|
return GraphSection(result_nodes, inputs=results_inputs, must_run=[job_result_id])
|
507
605
|
|
508
|
-
@classmethod
|
509
606
|
def build_model_or_flow_with_context(
|
510
|
-
|
511
|
-
|
607
|
+
self, namespace: NodeNamespace, model_or_flow_name: str,
|
608
|
+
job_def: meta.JobDefinition, model_or_flow: meta.ObjectDefinition,
|
512
609
|
input_mapping: tp.Dict[str, NodeId], output_mapping: tp.Dict[str, NodeId],
|
513
610
|
explicit_deps: tp.Optional[tp.List[NodeId]] = None) \
|
514
611
|
-> GraphSection:
|
@@ -521,44 +618,45 @@ class GraphBuilder:
|
|
521
618
|
# Execute in the sub-context by doing PUSH, EXEC, POP
|
522
619
|
# Note that POP node must be in the sub namespace too
|
523
620
|
|
524
|
-
push_section =
|
621
|
+
push_section = self.build_context_push(
|
525
622
|
sub_namespace, input_mapping,
|
526
623
|
explicit_deps)
|
527
624
|
|
528
|
-
exec_section =
|
529
|
-
|
625
|
+
exec_section = self.build_model_or_flow(
|
626
|
+
sub_namespace, job_def, model_or_flow,
|
530
627
|
explicit_deps=push_section.must_run)
|
531
628
|
|
532
|
-
pop_section =
|
629
|
+
pop_section = self.build_context_pop(
|
533
630
|
sub_namespace, output_mapping,
|
534
631
|
explicit_deps=exec_section.must_run)
|
535
632
|
|
536
|
-
return
|
633
|
+
return self._join_sections(push_section, exec_section, pop_section)
|
537
634
|
|
538
|
-
@classmethod
|
539
635
|
def build_model_or_flow(
|
540
|
-
|
636
|
+
self, namespace: NodeNamespace,
|
637
|
+
job_def: meta.JobDefinition,
|
541
638
|
model_or_flow: meta.ObjectDefinition,
|
542
639
|
explicit_deps: tp.Optional[tp.List[NodeId]] = None) \
|
543
640
|
-> GraphSection:
|
544
641
|
|
545
642
|
if model_or_flow.objectType == meta.ObjectType.MODEL:
|
546
|
-
return
|
643
|
+
return self.build_model(namespace, job_def, model_or_flow.model, explicit_deps)
|
547
644
|
|
548
645
|
elif model_or_flow.objectType == meta.ObjectType.FLOW:
|
549
|
-
return
|
646
|
+
return self.build_flow(namespace, job_def, model_or_flow.flow)
|
550
647
|
|
551
648
|
else:
|
552
|
-
|
649
|
+
message = f"Invalid job config, expected model or flow, got [{model_or_flow.objectType}]"
|
650
|
+
self._error(_ex.EJobValidation(message))
|
553
651
|
|
554
|
-
@classmethod
|
555
652
|
def build_model(
|
556
|
-
|
653
|
+
self, namespace: NodeNamespace,
|
654
|
+
job_def: meta.JobDefinition,
|
557
655
|
model_def: meta.ModelDefinition,
|
558
656
|
explicit_deps: tp.Optional[tp.List[NodeId]] = None) \
|
559
657
|
-> GraphSection:
|
560
658
|
|
561
|
-
|
659
|
+
self.check_model_type(job_def, model_def)
|
562
660
|
|
563
661
|
def param_id(node_name):
|
564
662
|
return NodeId(node_name, namespace, meta.Value)
|
@@ -572,10 +670,10 @@ class GraphBuilder:
|
|
572
670
|
output_ids = set(map(data_id, model_def.outputs))
|
573
671
|
|
574
672
|
# Set up storage access for import / export data jobs
|
575
|
-
if
|
576
|
-
storage_access =
|
577
|
-
elif
|
578
|
-
storage_access =
|
673
|
+
if job_def.jobType == meta.JobType.IMPORT_DATA:
|
674
|
+
storage_access = job_def.importData.storageAccess
|
675
|
+
elif job_def.jobType == meta.JobType.EXPORT_DATA:
|
676
|
+
storage_access = job_def.exportData.storageAccess
|
579
677
|
else:
|
580
678
|
storage_access = None
|
581
679
|
|
@@ -615,9 +713,9 @@ class GraphBuilder:
|
|
615
713
|
# Assemble a graph to include the model and its outputs
|
616
714
|
return GraphSection(nodes, inputs={*parameter_ids, *input_ids}, outputs=output_ids, must_run=[model_result_id])
|
617
715
|
|
618
|
-
@classmethod
|
619
716
|
def build_flow(
|
620
|
-
|
717
|
+
self, namespace: NodeNamespace,
|
718
|
+
job_def: meta.JobDefinition,
|
621
719
|
flow_def: meta.FlowDefinition,
|
622
720
|
explicit_deps: tp.Optional[tp.List[NodeId]] = None) \
|
623
721
|
-> GraphSection:
|
@@ -650,11 +748,11 @@ class GraphBuilder:
|
|
650
748
|
|
651
749
|
node_name, node = reachable_nodes.popitem()
|
652
750
|
|
653
|
-
sub_section =
|
654
|
-
|
751
|
+
sub_section = self.build_flow_node(
|
752
|
+
namespace, job_def, target_edges,
|
655
753
|
node_name, node, explicit_deps)
|
656
754
|
|
657
|
-
graph_section =
|
755
|
+
graph_section = self._join_sections(graph_section, sub_section, allow_partial_inputs=True)
|
658
756
|
|
659
757
|
if node.nodeType != meta.FlowNodeType.OUTPUT_NODE:
|
660
758
|
|
@@ -674,20 +772,18 @@ class GraphBuilder:
|
|
674
772
|
missing_targets = [edge.target for node in remaining_edges_by_target.values() for edge in node]
|
675
773
|
missing_target_names = [f"{t.node}.{t.socket}" if t.socket else t.node for t in missing_targets]
|
676
774
|
missing_nodes = list(map(lambda n: NodeId(n, namespace), missing_target_names))
|
677
|
-
|
775
|
+
self._invalid_graph_error(missing_nodes)
|
678
776
|
|
679
777
|
return graph_section
|
680
778
|
|
681
|
-
@classmethod
|
682
779
|
def build_flow_node(
|
683
|
-
|
780
|
+
self, namespace: NodeNamespace,
|
781
|
+
job_def: meta.JobDefinition,
|
684
782
|
target_edges: tp.Dict[meta.FlowSocket, meta.FlowEdge],
|
685
783
|
node_name: str, node: meta.FlowNode,
|
686
784
|
explicit_deps: tp.Optional[tp.List[NodeId]] = None) \
|
687
785
|
-> GraphSection:
|
688
786
|
|
689
|
-
flow_job = job_config.job.runFlow
|
690
|
-
|
691
787
|
def socket_key(socket):
|
692
788
|
return f"{socket.node}.{socket.socket}" if socket.socket else socket.node
|
693
789
|
|
@@ -700,7 +796,7 @@ class GraphBuilder:
|
|
700
796
|
edge = target_edges.get(socket)
|
701
797
|
# Report missing edges as a job consistency error (this might happen sometimes in dev mode)
|
702
798
|
if edge is None:
|
703
|
-
|
799
|
+
self._error(_ex.EJobValidation(f"Inconsistent flow: Socket [{socket}] is not connected"))
|
704
800
|
return socket_id(edge.source.node, edge.source.socket, result_type)
|
705
801
|
|
706
802
|
if node.nodeType == meta.FlowNodeType.PARAMETER_NODE:
|
@@ -723,27 +819,27 @@ class GraphBuilder:
|
|
723
819
|
push_mapping = {**input_mapping, **param_mapping}
|
724
820
|
pop_mapping = output_mapping
|
725
821
|
|
726
|
-
model_selector =
|
727
|
-
model_obj = _util.get_job_resource(model_selector,
|
822
|
+
model_selector = job_def.runFlow.models.get(node_name)
|
823
|
+
model_obj = _util.get_job_resource(model_selector, self._job_config)
|
728
824
|
|
729
825
|
# Missing models in the job config is a job consistency error
|
730
826
|
if model_obj is None or model_obj.objectType != meta.ObjectType.MODEL:
|
731
|
-
|
827
|
+
self._error(_ex.EJobValidation(f"No model was provided for flow node [{node_name}]"))
|
732
828
|
|
733
829
|
# Explicit check for model compatibility - report an error now, do not try build_model()
|
734
|
-
|
735
|
-
|
830
|
+
self.check_model_compatibility(model_selector, model_obj.model, node_name, node)
|
831
|
+
self.check_model_type(job_def, model_obj.model)
|
736
832
|
|
737
|
-
return
|
738
|
-
|
739
|
-
|
833
|
+
return self.build_model_or_flow_with_context(
|
834
|
+
namespace, node_name,
|
835
|
+
job_def, model_obj,
|
836
|
+
push_mapping, pop_mapping,
|
837
|
+
explicit_deps)
|
740
838
|
|
741
|
-
|
742
|
-
raise _ex.ETracInternal(f"Flow node [{node_name}] has invalid node type [{node.nodeType}]")
|
839
|
+
self._error(_ex.EJobValidation(f"Flow node [{node_name}] has invalid node type [{node.nodeType}]"))
|
743
840
|
|
744
|
-
@classmethod
|
745
841
|
def check_model_compatibility(
|
746
|
-
|
842
|
+
self, model_selector: meta.TagSelector,
|
747
843
|
model_def: meta.ModelDefinition, node_name: str, flow_node: meta.FlowNode):
|
748
844
|
|
749
845
|
model_params = list(sorted(model_def.parameters.keys()))
|
@@ -756,22 +852,21 @@ class GraphBuilder:
|
|
756
852
|
|
757
853
|
if model_params != node_params or model_inputs != node_inputs or model_outputs != node_outputs:
|
758
854
|
model_key = _util.object_key(model_selector)
|
759
|
-
|
855
|
+
self._error(_ex.EJobValidation(f"Incompatible model for flow node [{node_name}] (Model: [{model_key}])"))
|
760
856
|
|
761
|
-
|
762
|
-
def check_model_type(cls, job_config: config.JobConfig, model_def: meta.ModelDefinition):
|
857
|
+
def check_model_type(self, job_def: meta.JobDefinition, model_def: meta.ModelDefinition):
|
763
858
|
|
764
|
-
if
|
859
|
+
if job_def.jobType == meta.JobType.IMPORT_DATA:
|
765
860
|
allowed_model_types = [meta.ModelType.DATA_IMPORT_MODEL]
|
766
|
-
elif
|
861
|
+
elif job_def.jobType == meta.JobType.EXPORT_DATA:
|
767
862
|
allowed_model_types = [meta.ModelType.DATA_EXPORT_MODEL]
|
768
863
|
else:
|
769
864
|
allowed_model_types = [meta.ModelType.STANDARD_MODEL]
|
770
865
|
|
771
866
|
if model_def.modelType not in allowed_model_types:
|
772
|
-
job_type =
|
867
|
+
job_type = job_def.jobType.name
|
773
868
|
model_type = model_def.modelType.name
|
774
|
-
|
869
|
+
self._error(_ex.EJobValidation(f"Job type [{job_type}] cannot use model type [{model_type}]"))
|
775
870
|
|
776
871
|
@staticmethod
|
777
872
|
def build_context_push(
|
@@ -833,8 +928,7 @@ class GraphBuilder:
|
|
833
928
|
outputs={*pop_mapping.values()},
|
834
929
|
must_run=[pop_id])
|
835
930
|
|
836
|
-
|
837
|
-
def _join_sections(cls, *sections: GraphSection, allow_partial_inputs: bool = False):
|
931
|
+
def _join_sections(self, *sections: GraphSection, allow_partial_inputs: bool = False):
|
838
932
|
|
839
933
|
n_sections = len(sections)
|
840
934
|
first_section = sections[0]
|
@@ -856,7 +950,7 @@ class GraphBuilder:
|
|
856
950
|
if allow_partial_inputs:
|
857
951
|
inputs.update(requirements_not_met)
|
858
952
|
else:
|
859
|
-
|
953
|
+
self._invalid_graph_error(requirements_not_met)
|
860
954
|
|
861
955
|
nodes.update(current_section.nodes)
|
862
956
|
|
@@ -865,13 +959,12 @@ class GraphBuilder:
|
|
865
959
|
|
866
960
|
return GraphSection(nodes, inputs, last_section.outputs, must_run)
|
867
961
|
|
868
|
-
|
869
|
-
def _invalid_graph_error(cls, missing_dependencies: tp.Iterable[NodeId]):
|
962
|
+
def _invalid_graph_error(self, missing_dependencies: tp.Iterable[NodeId]):
|
870
963
|
|
871
|
-
missing_ids = ", ".join(map(
|
872
|
-
message = f"
|
964
|
+
missing_ids = ", ".join(map(self._missing_item_display_name, missing_dependencies))
|
965
|
+
message = f"The execution graph has unsatisfied dependencies: [{missing_ids}]"
|
873
966
|
|
874
|
-
|
967
|
+
self._error(_ex.EJobValidation(message))
|
875
968
|
|
876
969
|
@classmethod
|
877
970
|
def _missing_item_display_name(cls, node_id: NodeId):
|
@@ -886,3 +979,7 @@ class GraphBuilder:
|
|
886
979
|
return node_id.name
|
887
980
|
else:
|
888
981
|
return f"{node_id.name} / {', '.join(components[:-1])}"
|
982
|
+
|
983
|
+
def _error(self, error: Exception):
|
984
|
+
|
985
|
+
self._errors.append(error)
|