tracdap-runtime 0.8.0rc2__py3-none-any.whl → 0.9.0b2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tracdap/rt/_impl/core/config_parser.py +29 -3
- tracdap/rt/_impl/core/data.py +627 -40
- tracdap/rt/_impl/core/repos.py +17 -8
- tracdap/rt/_impl/core/storage.py +25 -13
- tracdap/rt/_impl/core/struct.py +254 -60
- tracdap/rt/_impl/core/util.py +125 -11
- tracdap/rt/_impl/exec/context.py +35 -8
- tracdap/rt/_impl/exec/dev_mode.py +169 -127
- tracdap/rt/_impl/exec/engine.py +203 -140
- tracdap/rt/_impl/exec/functions.py +228 -263
- tracdap/rt/_impl/exec/graph.py +141 -126
- tracdap/rt/_impl/exec/graph_builder.py +428 -449
- tracdap/rt/_impl/grpc/codec.py +8 -13
- tracdap/rt/_impl/grpc/server.py +7 -7
- tracdap/rt/_impl/grpc/tracdap/api/internal/runtime_pb2.py +25 -18
- tracdap/rt/_impl/grpc/tracdap/api/internal/runtime_pb2.pyi +27 -9
- tracdap/rt/_impl/grpc/tracdap/metadata/common_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/config_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/custom_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/data_pb2.py +37 -35
- tracdap/rt/_impl/grpc/tracdap/metadata/data_pb2.pyi +37 -43
- tracdap/rt/_impl/grpc/tracdap/metadata/file_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/flow_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/job_pb2.py +67 -63
- tracdap/rt/_impl/grpc/tracdap/metadata/job_pb2.pyi +11 -2
- tracdap/rt/_impl/grpc/tracdap/metadata/model_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/object_id_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/object_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/resource_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/search_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/storage_pb2.py +11 -9
- tracdap/rt/_impl/grpc/tracdap/metadata/storage_pb2.pyi +11 -2
- tracdap/rt/_impl/grpc/tracdap/metadata/tag_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/tag_update_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/type_pb2.py +23 -19
- tracdap/rt/_impl/grpc/tracdap/metadata/type_pb2.pyi +15 -2
- tracdap/rt/_impl/runtime.py +3 -9
- tracdap/rt/_impl/static_api.py +5 -6
- tracdap/rt/_plugins/format_csv.py +2 -2
- tracdap/rt/_plugins/repo_git.py +56 -11
- tracdap/rt/_plugins/storage_aws.py +165 -150
- tracdap/rt/_plugins/storage_azure.py +17 -11
- tracdap/rt/_plugins/storage_gcp.py +35 -18
- tracdap/rt/_version.py +1 -1
- tracdap/rt/api/model_api.py +45 -0
- tracdap/rt/config/__init__.py +7 -9
- tracdap/rt/config/common.py +3 -14
- tracdap/rt/config/job.py +17 -3
- tracdap/rt/config/platform.py +9 -32
- tracdap/rt/config/result.py +8 -4
- tracdap/rt/config/runtime.py +5 -10
- tracdap/rt/config/tenant.py +28 -0
- tracdap/rt/launch/cli.py +0 -8
- tracdap/rt/launch/launch.py +1 -3
- tracdap/rt/metadata/__init__.py +35 -35
- tracdap/rt/metadata/data.py +19 -31
- tracdap/rt/metadata/job.py +3 -1
- tracdap/rt/metadata/storage.py +9 -0
- tracdap/rt/metadata/type.py +9 -5
- {tracdap_runtime-0.8.0rc2.dist-info → tracdap_runtime-0.9.0b2.dist-info}/METADATA +5 -3
- {tracdap_runtime-0.8.0rc2.dist-info → tracdap_runtime-0.9.0b2.dist-info}/RECORD +64 -63
- {tracdap_runtime-0.8.0rc2.dist-info → tracdap_runtime-0.9.0b2.dist-info}/WHEEL +1 -1
- {tracdap_runtime-0.8.0rc2.dist-info → tracdap_runtime-0.9.0b2.dist-info}/licenses/LICENSE +0 -0
- {tracdap_runtime-0.8.0rc2.dist-info → tracdap_runtime-0.9.0b2.dist-info}/top_level.txt +0 -0
@@ -13,29 +13,41 @@
|
|
13
13
|
# See the License for the specific language governing permissions and
|
14
14
|
# limitations under the License.
|
15
15
|
|
16
|
-
import
|
16
|
+
import itertools as _itr
|
17
|
+
import typing as _tp
|
17
18
|
|
18
|
-
import tracdap.rt.
|
19
|
+
import tracdap.rt.metadata as _meta
|
20
|
+
import tracdap.rt.config as _cfg
|
19
21
|
import tracdap.rt.exceptions as _ex
|
20
22
|
import tracdap.rt._impl.core.data as _data
|
23
|
+
import tracdap.rt._impl.core.type_system as _type_system
|
21
24
|
import tracdap.rt._impl.core.util as _util
|
25
|
+
import tracdap.rt.api as _api
|
22
26
|
|
23
27
|
from .graph import *
|
24
28
|
|
25
29
|
|
26
30
|
class GraphBuilder:
|
27
31
|
|
28
|
-
__JOB_DETAILS =
|
32
|
+
__JOB_DETAILS = _tp.TypeVar(
|
29
33
|
"__JOB_DETAILS",
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
34
|
+
_meta.RunModelJob,
|
35
|
+
_meta.RunFlowJob,
|
36
|
+
_meta.ImportModelJob,
|
37
|
+
_meta.ImportDataJob,
|
38
|
+
_meta.ExportDataJob)
|
35
39
|
|
36
|
-
__JOB_BUILD_FUNC =
|
40
|
+
__JOB_BUILD_FUNC = _tp.Callable[[_meta.JobDefinition, NodeId], GraphSection]
|
37
41
|
|
38
|
-
|
42
|
+
@classmethod
|
43
|
+
def dynamic(cls, context: GraphContext) -> "GraphBuilder":
|
44
|
+
|
45
|
+
sys_config = context.sys_config
|
46
|
+
job_config = _cfg.JobConfig(context.job_id)
|
47
|
+
|
48
|
+
return GraphBuilder(sys_config, job_config)
|
49
|
+
|
50
|
+
def __init__(self, sys_config: _cfg.RuntimeConfig, job_config: _cfg.JobConfig):
|
39
51
|
|
40
52
|
self._sys_config = sys_config
|
41
53
|
self._job_config = job_config
|
@@ -43,80 +55,97 @@ class GraphBuilder:
|
|
43
55
|
self._job_key = _util.object_key(job_config.jobId)
|
44
56
|
self._job_namespace = NodeNamespace(self._job_key)
|
45
57
|
|
46
|
-
|
58
|
+
# Dictionary of object type to preallocated IDs
|
59
|
+
self._preallocated_ids = dict(
|
60
|
+
(k, list(v)) for k, v in _itr.groupby(
|
61
|
+
sorted(job_config.preallocatedIds, key=lambda oid: oid.objectType.value),
|
62
|
+
lambda oid: oid.objectType))
|
63
|
+
|
64
|
+
self._errors = list()
|
65
|
+
|
66
|
+
def unallocated_ids(self) -> _tp.Dict[_meta.ObjectType, _meta.TagHeader]:
|
67
|
+
return self._preallocated_ids
|
47
68
|
|
48
|
-
def _child_builder(self, job_id:
|
69
|
+
def _child_builder(self, job_id: _meta.TagHeader) -> "GraphBuilder":
|
49
70
|
|
50
71
|
builder = GraphBuilder(self._sys_config, self._job_config)
|
51
72
|
builder._job_key = _util.object_key(job_id)
|
52
73
|
builder._job_namespace = NodeNamespace(builder._job_key)
|
53
74
|
|
75
|
+
# Do not share preallocated IDs with the child graph
|
76
|
+
builder._preallocated_ids = dict()
|
77
|
+
|
54
78
|
return builder
|
55
79
|
|
56
|
-
def build_job(self, job_def:
|
80
|
+
def build_job(self, job_def: _meta.JobDefinition, ) -> Graph:
|
57
81
|
|
58
82
|
try:
|
59
83
|
|
60
|
-
if job_def.jobType ==
|
61
|
-
|
84
|
+
if job_def.jobType == _meta.JobType.IMPORT_MODEL:
|
85
|
+
graph = self.build_standard_job(job_def, self.build_import_model_job)
|
86
|
+
|
87
|
+
elif job_def.jobType == _meta.JobType.RUN_MODEL:
|
88
|
+
graph = self.build_standard_job(job_def, self.build_run_model_job)
|
62
89
|
|
63
|
-
|
64
|
-
|
90
|
+
elif job_def.jobType == _meta.JobType.RUN_FLOW:
|
91
|
+
graph = self.build_standard_job(job_def, self.build_run_flow_job)
|
65
92
|
|
66
|
-
|
67
|
-
|
93
|
+
elif job_def.jobType in [_meta.JobType.IMPORT_DATA, _meta.JobType.EXPORT_DATA]:
|
94
|
+
graph = self.build_standard_job(job_def, self.build_import_export_data_job)
|
68
95
|
|
69
|
-
|
70
|
-
|
96
|
+
elif job_def.jobType == _meta.JobType.JOB_GROUP:
|
97
|
+
graph = self.build_standard_job(job_def, self.build_job_group)
|
71
98
|
|
72
|
-
|
73
|
-
|
99
|
+
else:
|
100
|
+
self._error(_ex.EJobValidation(f"Job type [{job_def.jobType.name}] is not supported yet"))
|
101
|
+
raise self._error_summary()
|
74
102
|
|
75
|
-
self.
|
103
|
+
if any(self._errors):
|
104
|
+
raise self._error_summary()
|
105
|
+
else:
|
106
|
+
return graph
|
76
107
|
|
77
108
|
except Exception as e:
|
78
109
|
|
79
110
|
# If there are recorded, errors, assume unhandled exceptions are a result of those
|
80
111
|
# Only report the recorded errors, to reduce noise
|
81
112
|
if any(self._errors):
|
82
|
-
|
113
|
+
raise self._error_summary()
|
83
114
|
|
84
115
|
# If no errors are recorded, an exception here would be a bug
|
85
116
|
raise _ex.ETracInternal(f"Unexpected error preparing the job execution graph") from e
|
86
117
|
|
87
|
-
|
88
|
-
|
89
|
-
if any(self._errors):
|
118
|
+
def _error_summary(self) -> Exception:
|
90
119
|
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
120
|
+
if len(self._errors) == 1:
|
121
|
+
return self._errors[0]
|
122
|
+
else:
|
123
|
+
err_text = "\n".join(map(str, self._errors))
|
124
|
+
return _ex.EJobValidation("Invalid job configuration\n" + err_text)
|
96
125
|
|
97
|
-
def build_standard_job(self, job_def:
|
126
|
+
def build_standard_job(self, job_def: _meta.JobDefinition, build_func: __JOB_BUILD_FUNC):
|
98
127
|
|
99
128
|
# Set up the job context
|
100
129
|
|
101
|
-
push_id = NodeId("trac_job_push", self._job_namespace, Bundle[
|
130
|
+
push_id = NodeId("trac_job_push", self._job_namespace, Bundle[_tp.Any])
|
102
131
|
push_node = ContextPushNode(push_id, self._job_namespace)
|
103
132
|
push_section = GraphSection({push_id: push_node}, must_run=[push_id])
|
104
133
|
|
105
134
|
# Build the execution graphs for the main job and results recording
|
106
135
|
|
107
136
|
main_section = build_func(job_def, push_id)
|
108
|
-
main_result_id = NodeId.of("trac_job_result", self._job_namespace,
|
137
|
+
main_result_id = NodeId.of("trac_job_result", self._job_namespace, _cfg.JobResult)
|
109
138
|
|
110
139
|
# Clean up the job context
|
111
140
|
|
112
|
-
global_result_id = NodeId.of(self._job_key, NodeNamespace.root(),
|
141
|
+
global_result_id = NodeId.of(self._job_key, NodeNamespace.root(), _cfg.JobResult)
|
113
142
|
|
114
|
-
pop_id = NodeId("trac_job_pop", self._job_namespace, Bundle[
|
143
|
+
pop_id = NodeId("trac_job_pop", self._job_namespace, Bundle[_tp.Any])
|
115
144
|
pop_mapping = {main_result_id: global_result_id}
|
116
145
|
|
117
146
|
pop_node = ContextPopNode(
|
118
147
|
pop_id, self._job_namespace, pop_mapping,
|
119
|
-
explicit_deps=main_section.must_run,
|
148
|
+
explicit_deps=[push_id, *main_section.must_run],
|
120
149
|
bundle=NodeNamespace.root())
|
121
150
|
|
122
151
|
global_result_node = BundleItemNode(global_result_id, pop_id, self._job_key)
|
@@ -129,42 +158,41 @@ class GraphBuilder:
|
|
129
158
|
|
130
159
|
return Graph(job.nodes, global_result_id)
|
131
160
|
|
132
|
-
def build_import_model_job(self, job_def:
|
161
|
+
def build_import_model_job(self, job_def: _meta.JobDefinition, job_push_id: NodeId) -> GraphSection:
|
133
162
|
|
134
|
-
#
|
163
|
+
# TRAC object ID for the new model
|
164
|
+
model_id = self._allocate_id(_meta.ObjectType.MODEL)
|
135
165
|
|
136
|
-
# TODO: Import model job should pre-allocate an ID, then model ID comes from job_config.resultMapping
|
137
|
-
new_model_id = _util.new_object_id(meta.ObjectType.MODEL)
|
138
|
-
new_model_key = _util.object_key(new_model_id)
|
139
|
-
|
140
|
-
model_scope = self._job_key
|
141
166
|
import_details = job_def.importModel
|
167
|
+
import_scope = self._job_key
|
142
168
|
|
143
|
-
|
144
|
-
|
169
|
+
# Graph node ID for the import operation
|
170
|
+
import_id = NodeId.of("trac_import_model", self._job_namespace, GraphOutput)
|
145
171
|
|
146
|
-
|
172
|
+
import_node = ImportModelNode(
|
173
|
+
import_id, model_id,
|
174
|
+
import_details, import_scope,
|
175
|
+
explicit_deps=[job_push_id])
|
147
176
|
|
148
|
-
|
177
|
+
main_section = GraphSection(nodes={import_id: import_node})
|
149
178
|
|
150
|
-
|
151
|
-
|
152
|
-
explicit_deps=[job_push_id, *main_section.must_run])
|
179
|
+
# RESULT will have a single (unnamed) output
|
180
|
+
result_section = self.build_job_result([import_id], explicit_deps=[job_push_id, *main_section.must_run])
|
153
181
|
|
154
182
|
return self._join_sections(main_section, result_section)
|
155
183
|
|
156
|
-
def build_import_export_data_job(self, job_def:
|
184
|
+
def build_import_export_data_job(self, job_def: _meta.JobDefinition, job_push_id: NodeId) -> GraphSection:
|
157
185
|
|
158
186
|
# TODO: These are processed as regular calculation jobs for now
|
159
187
|
# That might be ok, but is worth reviewing
|
160
188
|
|
161
|
-
if job_def.jobType ==
|
189
|
+
if job_def.jobType == _meta.JobType.IMPORT_DATA:
|
162
190
|
job_details = job_def.importData
|
163
191
|
else:
|
164
192
|
job_details = job_def.exportData
|
165
193
|
|
166
194
|
target_selector = job_details.model
|
167
|
-
target_obj = _util.
|
195
|
+
target_obj = _util.get_job_metadata(target_selector, self._job_config)
|
168
196
|
target_def = target_obj.model
|
169
197
|
|
170
198
|
return self.build_calculation_job(
|
@@ -172,12 +200,12 @@ class GraphBuilder:
|
|
172
200
|
target_selector, target_def,
|
173
201
|
job_details)
|
174
202
|
|
175
|
-
def build_run_model_job(self, job_def:
|
203
|
+
def build_run_model_job(self, job_def: _meta.JobDefinition, job_push_id: NodeId) -> GraphSection:
|
176
204
|
|
177
205
|
job_details = job_def.runModel
|
178
206
|
|
179
207
|
target_selector = job_details.model
|
180
|
-
target_obj = _util.
|
208
|
+
target_obj = _util.get_job_metadata(target_selector, self._job_config)
|
181
209
|
target_def = target_obj.model
|
182
210
|
|
183
211
|
return self.build_calculation_job(
|
@@ -185,12 +213,12 @@ class GraphBuilder:
|
|
185
213
|
target_selector, target_def,
|
186
214
|
job_details)
|
187
215
|
|
188
|
-
def build_run_flow_job(self, job_def:
|
216
|
+
def build_run_flow_job(self, job_def: _meta.JobDefinition, job_push_id: NodeId) -> GraphSection:
|
189
217
|
|
190
218
|
job_details = job_def.runFlow
|
191
219
|
|
192
220
|
target_selector = job_details.flow
|
193
|
-
target_obj = _util.
|
221
|
+
target_obj = _util.get_job_metadata(target_selector, self._job_config)
|
194
222
|
target_def = target_obj.flow
|
195
223
|
|
196
224
|
return self.build_calculation_job(
|
@@ -198,21 +226,21 @@ class GraphBuilder:
|
|
198
226
|
target_selector, target_def,
|
199
227
|
job_details)
|
200
228
|
|
201
|
-
def build_job_group(self, job_def:
|
229
|
+
def build_job_group(self, job_def: _meta.JobDefinition, job_push_id: NodeId) -> GraphSection:
|
202
230
|
|
203
231
|
job_group = job_def.jobGroup
|
204
232
|
|
205
|
-
if job_group.jobGroupType ==
|
233
|
+
if job_group.jobGroupType == _meta.JobGroupType.SEQUENTIAL_JOB_GROUP:
|
206
234
|
return self.build_sequential_job_group(job_group, job_push_id)
|
207
235
|
|
208
|
-
if job_group.jobGroupType ==
|
236
|
+
if job_group.jobGroupType == _meta.JobGroupType.PARALLEL_JOB_GROUP:
|
209
237
|
return self.build_parallel_job_group(job_group, job_push_id)
|
210
238
|
|
211
239
|
else:
|
212
240
|
self._error(_ex.EJobValidation(f"Job group type [{job_group.jobGroupType.name}] is not supported yet"))
|
213
241
|
return GraphSection(dict(), inputs={job_push_id})
|
214
242
|
|
215
|
-
def build_sequential_job_group(self, job_group:
|
243
|
+
def build_sequential_job_group(self, job_group: _meta.JobGroup, job_push_id: NodeId) -> GraphSection:
|
216
244
|
|
217
245
|
nodes = dict()
|
218
246
|
prior_id = job_push_id
|
@@ -225,14 +253,14 @@ class GraphBuilder:
|
|
225
253
|
prior_id = child_node.id
|
226
254
|
|
227
255
|
# No real results from job groups yet (they cannot be executed from the platform)
|
228
|
-
job_result =
|
229
|
-
result_id = NodeId.of("trac_job_result", self._job_namespace,
|
256
|
+
job_result = _cfg.JobResult()
|
257
|
+
result_id = NodeId.of("trac_job_result", self._job_namespace, _cfg.JobResult)
|
230
258
|
result_node = StaticValueNode(result_id, job_result, explicit_deps=[prior_id])
|
231
259
|
nodes[result_id] = result_node
|
232
260
|
|
233
261
|
return GraphSection(nodes, inputs={job_push_id}, outputs={result_id})
|
234
262
|
|
235
|
-
def build_parallel_job_group(self, job_group:
|
263
|
+
def build_parallel_job_group(self, job_group: _meta.JobGroup, job_push_id: NodeId) -> GraphSection:
|
236
264
|
|
237
265
|
nodes = dict()
|
238
266
|
parallel_ids = [job_push_id]
|
@@ -245,22 +273,22 @@ class GraphBuilder:
|
|
245
273
|
parallel_ids.append(child_node.id)
|
246
274
|
|
247
275
|
# No real results from job groups yet (they cannot be executed from the platform)
|
248
|
-
job_result =
|
249
|
-
result_id = NodeId.of("trac_job_result", self._job_namespace,
|
276
|
+
job_result = _cfg.JobResult()
|
277
|
+
result_id = NodeId.of("trac_job_result", self._job_namespace, _cfg.JobResult)
|
250
278
|
result_node = StaticValueNode(result_id, job_result, explicit_deps=parallel_ids)
|
251
279
|
nodes[result_id] = result_node
|
252
280
|
|
253
281
|
return GraphSection(nodes, inputs={job_push_id}, outputs={result_id})
|
254
282
|
|
255
|
-
def build_child_job(self, child_job_def:
|
283
|
+
def build_child_job(self, child_job_def: _meta.JobDefinition, explicit_deps) -> Node[_cfg.JobResult]:
|
256
284
|
|
257
|
-
child_job_id =
|
285
|
+
child_job_id = self._allocate_id(_meta.ObjectType.JOB)
|
258
286
|
|
259
287
|
child_builder = self._child_builder(child_job_id)
|
260
288
|
child_graph = child_builder.build_job(child_job_def)
|
261
289
|
|
262
290
|
child_node_name = _util.object_key(child_job_id)
|
263
|
-
child_node_id = NodeId.of(child_node_name, self._job_namespace,
|
291
|
+
child_node_id = NodeId.of(child_node_name, self._job_namespace, _cfg.JobResult)
|
264
292
|
|
265
293
|
child_node = ChildJobNode(
|
266
294
|
child_node_id, child_job_id, child_job_def,
|
@@ -269,9 +297,9 @@ class GraphBuilder:
|
|
269
297
|
return child_node
|
270
298
|
|
271
299
|
def build_calculation_job(
|
272
|
-
self, job_def:
|
273
|
-
target_selector:
|
274
|
-
target_def:
|
300
|
+
self, job_def: _meta.JobDefinition, job_push_id: NodeId,
|
301
|
+
target_selector: _meta.TagSelector,
|
302
|
+
target_def: _tp.Union[_meta.ModelDefinition, _meta.FlowDefinition],
|
275
303
|
job_details: __JOB_DETAILS) \
|
276
304
|
-> GraphSection:
|
277
305
|
|
@@ -282,11 +310,11 @@ class GraphBuilder:
|
|
282
310
|
|
283
311
|
required_params = target_def.parameters
|
284
312
|
required_inputs = target_def.inputs
|
285
|
-
|
313
|
+
expected_outputs = target_def.outputs
|
286
314
|
|
287
315
|
provided_params = job_details.parameters
|
288
316
|
provided_inputs = job_details.inputs
|
289
|
-
|
317
|
+
prior_outputs = job_details.priorOutputs
|
290
318
|
|
291
319
|
params_section = self.build_job_parameters(
|
292
320
|
required_params, provided_params,
|
@@ -296,36 +324,48 @@ class GraphBuilder:
|
|
296
324
|
required_inputs, provided_inputs,
|
297
325
|
explicit_deps=[job_push_id])
|
298
326
|
|
327
|
+
prior_outputs_section = self.build_job_prior_outputs(
|
328
|
+
expected_outputs, prior_outputs,
|
329
|
+
explicit_deps=[job_push_id])
|
330
|
+
|
299
331
|
exec_namespace = self._job_namespace
|
300
|
-
exec_obj = _util.
|
332
|
+
exec_obj = _util.get_job_metadata(target_selector, self._job_config)
|
301
333
|
|
302
334
|
exec_section = self.build_model_or_flow(
|
303
335
|
exec_namespace, job_def, exec_obj,
|
304
336
|
explicit_deps=[job_push_id])
|
305
337
|
|
306
338
|
output_section = self.build_job_outputs(
|
307
|
-
|
339
|
+
expected_outputs, prior_outputs,
|
308
340
|
explicit_deps=[job_push_id])
|
309
341
|
|
310
|
-
main_section = self._join_sections(
|
342
|
+
main_section = self._join_sections(
|
343
|
+
params_section, input_section, prior_outputs_section,
|
344
|
+
exec_section, output_section)
|
311
345
|
|
312
346
|
# Build job-level metadata outputs
|
313
347
|
|
314
|
-
|
348
|
+
output_ids = list(
|
315
349
|
nid for nid, n in main_section.nodes.items()
|
316
|
-
if isinstance(n,
|
350
|
+
if nid.result_type == GraphOutput or isinstance(n, SaveDataNode))
|
351
|
+
|
352
|
+
# Map the SAVE nodes to their corresponding named output keys
|
353
|
+
output_keys = dict(
|
354
|
+
(nid, nid.name.replace(":SAVE", ""))
|
355
|
+
for nid, n in output_section.nodes.items()
|
356
|
+
if isinstance(n, SaveDataNode))
|
317
357
|
|
318
|
-
result_section = self.
|
319
|
-
|
358
|
+
result_section = self.build_job_result(
|
359
|
+
output_ids, output_keys,
|
320
360
|
explicit_deps=[job_push_id, *main_section.must_run])
|
321
361
|
|
322
362
|
return self._join_sections(main_section, result_section)
|
323
363
|
|
324
364
|
def build_job_parameters(
|
325
365
|
self,
|
326
|
-
required_params:
|
327
|
-
supplied_params:
|
328
|
-
explicit_deps:
|
366
|
+
required_params: _tp.Dict[str, _meta.ModelParameter],
|
367
|
+
supplied_params: _tp.Dict[str, _meta.Value],
|
368
|
+
explicit_deps: _tp.Optional[_tp.List[NodeId]] = None) \
|
329
369
|
-> GraphSection:
|
330
370
|
|
331
371
|
nodes = dict()
|
@@ -341,7 +381,7 @@ class GraphBuilder:
|
|
341
381
|
self._error(_ex.EJobValidation(f"Missing required parameter: [{param_name}]"))
|
342
382
|
continue
|
343
383
|
|
344
|
-
param_id = NodeId(param_name, self._job_namespace,
|
384
|
+
param_id = NodeId(param_name, self._job_namespace, _meta.Value)
|
345
385
|
param_node = StaticValueNode(param_id, param_def, explicit_deps=explicit_deps)
|
346
386
|
|
347
387
|
nodes[param_id] = param_node
|
@@ -350,402 +390,241 @@ class GraphBuilder:
|
|
350
390
|
|
351
391
|
def build_job_inputs(
|
352
392
|
self,
|
353
|
-
required_inputs:
|
354
|
-
supplied_inputs:
|
355
|
-
explicit_deps:
|
393
|
+
required_inputs: _tp.Dict[str, _meta.ModelInputSchema],
|
394
|
+
supplied_inputs: _tp.Dict[str, _meta.TagSelector],
|
395
|
+
explicit_deps: _tp.Optional[_tp.List[NodeId]] = None) \
|
356
396
|
-> GraphSection:
|
357
397
|
|
358
398
|
nodes = dict()
|
359
399
|
outputs = set()
|
360
400
|
|
361
|
-
for input_name,
|
362
|
-
|
363
|
-
# Backwards compatibility with pre 0.8 versions
|
364
|
-
input_type = meta.ObjectType.DATA \
|
365
|
-
if input_def.objectType == meta.ObjectType.OBJECT_TYPE_NOT_SET \
|
366
|
-
else input_def.objectType
|
401
|
+
for input_name, input_schema in required_inputs.items():
|
367
402
|
|
368
403
|
input_selector = supplied_inputs.get(input_name)
|
369
404
|
|
370
405
|
if input_selector is None:
|
371
406
|
|
372
|
-
if
|
407
|
+
if input_schema.optional:
|
373
408
|
data_view_id = NodeId.of(input_name, self._job_namespace, _data.DataView)
|
374
|
-
data_view = _data.DataView.create_empty(
|
409
|
+
data_view = _data.DataView.create_empty(input_schema.objectType)
|
375
410
|
nodes[data_view_id] = StaticValueNode(data_view_id, data_view, explicit_deps=explicit_deps)
|
376
411
|
outputs.add(data_view_id)
|
377
412
|
else:
|
378
413
|
self._error(_ex.EJobValidation(f"Missing required input: [{input_name}]"))
|
379
414
|
|
380
|
-
|
381
|
-
self._build_data_input(input_name, input_selector, nodes, outputs, explicit_deps)
|
415
|
+
continue
|
382
416
|
|
383
|
-
|
417
|
+
if input_schema.objectType == _meta.ObjectType.DATA:
|
418
|
+
self._build_data_input(input_name, input_selector, nodes, outputs, explicit_deps)
|
419
|
+
elif input_schema.objectType == _meta.ObjectType.FILE:
|
384
420
|
self._build_file_input(input_name, input_selector, nodes, outputs, explicit_deps)
|
385
|
-
|
386
421
|
else:
|
387
|
-
self._error(_ex.EJobValidation(f"Invalid input type [{
|
422
|
+
self._error(_ex.EJobValidation(f"Invalid input type [{input_schema.objectType}] for input [{input_name}]"))
|
388
423
|
|
389
424
|
return GraphSection(nodes, outputs=outputs)
|
390
425
|
|
391
|
-
def
|
392
|
-
|
393
|
-
|
394
|
-
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
if data_def.schemaId:
|
399
|
-
schema_def = _util.get_job_resource(data_def.schemaId, self._job_config).schema
|
400
|
-
else:
|
401
|
-
schema_def = data_def.schema
|
426
|
+
def build_job_prior_outputs(
|
427
|
+
self,
|
428
|
+
expected_outputs: _tp.Dict[str, _meta.ModelOutputSchema],
|
429
|
+
prior_outputs: _tp.Dict[str, _meta.TagSelector],
|
430
|
+
explicit_deps: _tp.Optional[_tp.List[NodeId]] = None) \
|
431
|
+
-> GraphSection:
|
402
432
|
|
403
|
-
|
404
|
-
|
405
|
-
data_spec = _data.DataSpec.create_data_spec(data_item, data_def, storage_def, schema_def)
|
433
|
+
nodes = dict()
|
434
|
+
outputs = set()
|
406
435
|
|
407
|
-
|
408
|
-
# Currently one item per input, since inputs are single part/delta
|
409
|
-
data_load_id = NodeId.of(f"{input_name}:LOAD", self._job_namespace, _data.DataItem)
|
410
|
-
nodes[data_load_id] = LoadDataNode(data_load_id, spec=data_spec, explicit_deps=explicit_deps)
|
436
|
+
for output_name, output_schema in expected_outputs.items():
|
411
437
|
|
412
|
-
|
413
|
-
data_view_id = NodeId.of(input_name, self._job_namespace, _data.DataView)
|
414
|
-
nodes[data_view_id] = DataViewNode(data_view_id, schema_def, data_load_id)
|
415
|
-
outputs.add(data_view_id)
|
438
|
+
prior_selector = prior_outputs.get(output_name)
|
416
439
|
|
417
|
-
|
440
|
+
# Prior outputs are always optional
|
441
|
+
if prior_selector is None:
|
442
|
+
continue
|
418
443
|
|
419
|
-
|
420
|
-
|
444
|
+
if output_schema.objectType == _meta.ObjectType.DATA:
|
445
|
+
prior_spec = self._build_data_spec(prior_selector)
|
446
|
+
elif output_schema.objectType == _meta.ObjectType.FILE:
|
447
|
+
prior_spec = self._build_file_spec(prior_selector)
|
448
|
+
else:
|
449
|
+
self._error(_ex.EJobValidation(f"Invalid output type [{output_schema.objectType}] for output [{output_name}]"))
|
450
|
+
continue
|
421
451
|
|
422
|
-
|
423
|
-
|
424
|
-
|
452
|
+
prior_output_id = NodeId.of(f"{output_name}:PRIOR", self._job_namespace, _data.DataSpec)
|
453
|
+
nodes[prior_output_id] = StaticValueNode(prior_output_id, prior_spec, explicit_deps=explicit_deps)
|
454
|
+
outputs.add(prior_output_id)
|
425
455
|
|
426
|
-
|
427
|
-
file_view_id = NodeId.of(input_name, self._job_namespace, _data.DataView)
|
428
|
-
nodes[file_view_id] = DataViewNode(file_view_id, None, file_load_id)
|
429
|
-
outputs.add(file_view_id)
|
456
|
+
return GraphSection(nodes, outputs=outputs)
|
430
457
|
|
431
458
|
def build_job_outputs(
|
432
459
|
self,
|
433
|
-
required_outputs:
|
434
|
-
|
435
|
-
explicit_deps:
|
460
|
+
required_outputs: _tp.Dict[str, _meta.ModelOutputSchema],
|
461
|
+
prior_outputs: _tp.Dict[str, _meta.TagSelector],
|
462
|
+
explicit_deps: _tp.Optional[_tp.List[NodeId]] = None) \
|
436
463
|
-> GraphSection:
|
437
464
|
|
438
465
|
nodes = {}
|
439
|
-
|
466
|
+
section_inputs = set()
|
440
467
|
|
441
|
-
for output_name,
|
468
|
+
for output_name, output_schema in required_outputs.items():
|
442
469
|
|
443
470
|
# Output data view must already exist in the namespace, it is an input to the save operation
|
444
471
|
data_view_id = NodeId.of(output_name, self._job_namespace, _data.DataView)
|
445
|
-
|
472
|
+
section_inputs.add(data_view_id)
|
446
473
|
|
447
|
-
#
|
448
|
-
|
449
|
-
if output_def.objectType == meta.ObjectType.OBJECT_TYPE_NOT_SET \
|
450
|
-
else output_def.objectType
|
474
|
+
# Check for prior outputs
|
475
|
+
prior_selector = prior_outputs.get(output_name)
|
451
476
|
|
452
|
-
|
477
|
+
if output_schema.objectType == _meta.ObjectType.DATA:
|
478
|
+
self._build_data_output(output_name, output_schema, data_view_id, prior_selector, nodes, explicit_deps)
|
479
|
+
elif output_schema.objectType == _meta.ObjectType.FILE:
|
480
|
+
self._build_file_output(output_name, output_schema, data_view_id, prior_selector, nodes, explicit_deps)
|
481
|
+
else:
|
482
|
+
self._error(_ex.EJobValidation(f"Invalid output type [{output_schema.objectType}] for input [{output_name}]"))
|
453
483
|
|
454
|
-
|
455
|
-
if output_def.optional:
|
456
|
-
optional_info = "(configuration is required for all optional outputs, in case they are produced)"
|
457
|
-
self._error(_ex.EJobValidation(f"Missing optional output: [{output_name}] {optional_info}"))
|
458
|
-
continue
|
459
|
-
else:
|
460
|
-
self._error(_ex.EJobValidation(f"Missing required output: [{output_name}]"))
|
461
|
-
continue
|
484
|
+
return GraphSection(nodes, inputs=section_inputs)
|
462
485
|
|
463
|
-
|
464
|
-
self._build_data_output(output_name, output_selector, data_view_id, nodes, explicit_deps)
|
486
|
+
def _build_data_input(self, input_name, input_selector, nodes, outputs, explicit_deps):
|
465
487
|
|
466
|
-
|
467
|
-
|
488
|
+
data_spec = self._build_data_spec(input_selector)
|
489
|
+
data_spec = self._attach_metadata(data_spec, input_selector)
|
468
490
|
|
469
|
-
|
470
|
-
|
491
|
+
# Physical load of data items from disk
|
492
|
+
# Currently one item per input, since inputs are single part/delta
|
493
|
+
data_load_id = NodeId.of(f"{input_name}:LOAD", self._job_namespace, _data.DataItem)
|
494
|
+
nodes[data_load_id] = LoadDataNode(data_load_id, spec=data_spec, explicit_deps=explicit_deps)
|
471
495
|
|
472
|
-
|
496
|
+
# Input views assembled by mapping one root part to each view
|
497
|
+
data_view_id = NodeId.of(input_name, self._job_namespace, _data.DataView)
|
498
|
+
nodes[data_view_id] = DataViewNode(data_view_id, data_spec.schema, data_load_id)
|
499
|
+
outputs.add(data_view_id)
|
473
500
|
|
474
|
-
def _build_data_output(self, output_name,
|
501
|
+
def _build_data_output(self, output_name, output_schema, data_view_id, prior_selector, nodes, explicit_deps):
|
475
502
|
|
476
503
|
# Map one data item from each view, since outputs are single part/delta
|
477
504
|
data_item_id = NodeId(f"{output_name}:ITEM", self._job_namespace, _data.DataItem)
|
478
505
|
nodes[data_item_id] = DataItemNode(data_item_id, data_view_id)
|
479
506
|
|
480
|
-
|
481
|
-
|
482
|
-
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
data_def = data_obj.data
|
487
|
-
storage_def = _util.get_job_resource(data_def.storageId, self._job_config).storage
|
488
|
-
|
489
|
-
if data_def.schemaId:
|
490
|
-
schema_def = _util.get_job_resource(data_def.schemaId, self._job_config).schema
|
491
|
-
else:
|
492
|
-
schema_def = data_def.schema
|
493
|
-
|
494
|
-
root_part_opaque_key = 'part-root' # TODO: Central part names / constants
|
495
|
-
data_item = data_def.parts[root_part_opaque_key].snap.deltas[0].dataItem
|
496
|
-
data_spec = _data.DataSpec.create_data_spec(data_item, data_def, storage_def, schema_def)
|
497
|
-
|
498
|
-
# Create a physical save operation for the data item
|
499
|
-
data_save_id = NodeId.of(f"{output_name}:SAVE", self._job_namespace, _data.DataSpec)
|
500
|
-
nodes[data_save_id] = SaveDataNode(data_save_id, data_item_id, spec=data_spec)
|
501
|
-
|
502
|
-
output_key = output_name
|
503
|
-
storage_key = output_name + ":STORAGE"
|
504
|
-
|
507
|
+
if prior_selector is None:
|
508
|
+
# New output - Allocate new TRAC object IDs
|
509
|
+
prior_spec = None
|
510
|
+
data_id = self._allocate_id(_meta.ObjectType.DATA)
|
511
|
+
storage_id = self._allocate_id(_meta.ObjectType.STORAGE)
|
505
512
|
else:
|
513
|
+
# New version - Get the prior version metadata and bump the object IDs
|
514
|
+
prior_spec = self._build_data_spec(prior_selector)
|
515
|
+
data_id = _util.new_object_version(prior_spec.primary_id)
|
516
|
+
storage_id = _util.new_object_version(prior_spec.storage_id)
|
506
517
|
|
507
|
-
|
508
|
-
|
518
|
+
# Graph node ID for the save operation
|
519
|
+
data_save_id = NodeId.of(f"{output_name}:SAVE", self._job_namespace, _data.DataSpec)
|
509
520
|
|
510
|
-
|
511
|
-
mapped_storage_key = output_name + ":STORAGE"
|
521
|
+
if output_schema.dynamic:
|
512
522
|
|
513
|
-
|
514
|
-
|
515
|
-
|
516
|
-
data_spec_id =
|
517
|
-
nodes[data_spec_id] = DynamicDataSpecNode(
|
523
|
+
# For dynamic outputs, an extra graph node is needed to assemble the schema information
|
524
|
+
# This will call build_data_spec() at runtime, once the schema is known
|
525
|
+
data_spec_id = NodeId.of(f"{output_name}:DYNAMIC_SCHEMA", self._job_namespace, _data.DataSpec)
|
526
|
+
nodes[data_spec_id] = DataSpecNode(
|
518
527
|
data_spec_id, data_view_id,
|
519
|
-
data_id, storage_id,
|
520
|
-
|
528
|
+
data_id, storage_id, output_name,
|
529
|
+
self._sys_config,
|
530
|
+
prior_data_spec=prior_spec,
|
521
531
|
explicit_deps=explicit_deps)
|
522
532
|
|
523
|
-
#
|
524
|
-
data_save_id = NodeId.of(f"{output_name}:SAVE", self._job_namespace, _data.DataSpec)
|
533
|
+
# Save operation uses the dynamically produced schema info
|
525
534
|
nodes[data_save_id] = SaveDataNode(data_save_id, data_item_id, spec_id=data_spec_id)
|
526
535
|
|
527
|
-
|
528
|
-
storage_key = _util.object_key(storage_id)
|
529
|
-
|
530
|
-
data_result_id = NodeId.of(f"{output_name}:RESULT", self._job_namespace, ObjectBundle)
|
531
|
-
nodes[data_result_id] = DataResultNode(
|
532
|
-
data_result_id, output_name, data_save_id,
|
533
|
-
data_key=output_key,
|
534
|
-
storage_key=storage_key)
|
535
|
-
|
536
|
-
def _build_file_output(self, output_name, output_def, output_selector, file_view_id, nodes, explicit_deps):
|
536
|
+
else:
|
537
537
|
|
538
|
-
|
539
|
-
|
538
|
+
# If the output is not dynamic, a data spec can be built ahead of time
|
539
|
+
data_spec = _data.build_data_spec(
|
540
|
+
data_id, storage_id, output_name,
|
541
|
+
output_schema.schema,
|
542
|
+
self._sys_config,
|
543
|
+
prior_spec=prior_spec)
|
540
544
|
|
541
|
-
|
545
|
+
# Save operation uses the statically produced schema info
|
546
|
+
nodes[data_save_id] = SaveDataNode(data_save_id, data_item_id, spec=data_spec)
|
542
547
|
|
543
|
-
|
548
|
+
def _build_data_spec(self, data_selector):
|
544
549
|
|
545
|
-
|
550
|
+
# Build a data spec using metadata from the job config
|
551
|
+
# For now we are always loading the root part, snap 0, delta 0
|
552
|
+
data_def = _util.get_job_metadata(data_selector, self._job_config).data
|
553
|
+
storage_def = _util.get_job_metadata(data_def.storageId, self._job_config).storage
|
546
554
|
|
547
|
-
|
548
|
-
|
555
|
+
if data_def.schemaId:
|
556
|
+
schema_def = _util.get_job_metadata(data_def.schemaId, self._job_config).schema
|
557
|
+
else:
|
558
|
+
schema_def = data_def.schema
|
549
559
|
|
550
|
-
|
551
|
-
|
560
|
+
root_part_opaque_key = 'part-root' # TODO: Central part names / constants
|
561
|
+
data_item = data_def.parts[root_part_opaque_key].snap.deltas[0].dataItem
|
552
562
|
|
553
|
-
|
563
|
+
data_id = _util.get_job_mapping(data_selector, self._job_config)
|
564
|
+
storage_id = _util.get_job_mapping(data_def.storageId, self._job_config)
|
554
565
|
|
555
|
-
|
566
|
+
return _data.DataSpec \
|
567
|
+
.create_data_spec(data_item, data_def, storage_def, schema_def) \
|
568
|
+
.with_ids(data_id, storage_id)
|
556
569
|
|
557
|
-
|
558
|
-
storage_id = self._job_config.resultMapping[mapped_storage_key]
|
570
|
+
def _build_file_input(self, input_name, input_selector, nodes, outputs, explicit_deps):
|
559
571
|
|
560
|
-
|
561
|
-
|
562
|
-
data_item = f"file/{output_id.objectId}/version-{output_id.objectVersion}"
|
563
|
-
storage_key = self._sys_config.storage.defaultBucket
|
564
|
-
storage_path = f"file/FILE-{output_id.objectId}/version-{output_id.objectVersion}/{output_name}.{file_type.extension}"
|
572
|
+
file_spec = self._build_file_spec(input_selector)
|
573
|
+
file_spec = self._attach_metadata(file_spec, input_selector)
|
565
574
|
|
566
|
-
|
567
|
-
|
575
|
+
file_load_id = NodeId.of(f"{input_name}:LOAD", self._job_namespace, _data.DataItem)
|
576
|
+
nodes[file_load_id] = LoadDataNode(file_load_id, spec=file_spec, explicit_deps=explicit_deps)
|
568
577
|
|
569
|
-
|
570
|
-
|
578
|
+
# Input views assembled by mapping one root part to each view
|
579
|
+
file_view_id = NodeId.of(input_name, self._job_namespace, _data.DataView)
|
580
|
+
nodes[file_view_id] = DataViewNode(file_view_id, None, file_load_id)
|
581
|
+
outputs.add(file_view_id)
|
571
582
|
|
572
|
-
|
583
|
+
def _build_file_output(self, output_name, output_schema, file_view_id, prior_selector, nodes, explicit_deps):
|
573
584
|
|
585
|
+
# Map file item from view
|
574
586
|
file_item_id = NodeId(f"{output_name}:ITEM", self._job_namespace, _data.DataItem)
|
575
587
|
nodes[file_item_id] = DataItemNode(file_item_id, file_view_id, explicit_deps=explicit_deps)
|
576
588
|
|
577
|
-
|
589
|
+
if prior_selector is None:
|
590
|
+
# New output - Allocate new TRAC object IDs
|
591
|
+
prior_spec = None
|
592
|
+
file_id = self._allocate_id(_meta.ObjectType.FILE)
|
593
|
+
storage_id = self._allocate_id(_meta.ObjectType.STORAGE)
|
594
|
+
else:
|
595
|
+
# New version - Get the prior version metadata and bump the object IDs
|
596
|
+
prior_spec = self._build_file_spec(prior_selector) if prior_selector else None
|
597
|
+
file_id = _util.new_object_version(prior_spec.primary_id)
|
598
|
+
storage_id = _util.new_object_version(prior_spec.storage_id)
|
599
|
+
|
600
|
+
# File spec can always be built ahead of time (no equivalent of dynamic schemas)
|
601
|
+
file_spec = _data.build_file_spec(
|
602
|
+
file_id, storage_id,
|
603
|
+
output_name, output_schema.fileType,
|
604
|
+
self._sys_config,
|
605
|
+
prior_spec=prior_spec)
|
606
|
+
|
607
|
+
# Graph node for the save operation
|
578
608
|
file_save_id = NodeId.of(f"{output_name}:SAVE", self._job_namespace, _data.DataSpec)
|
579
609
|
nodes[file_save_id] = SaveDataNode(file_save_id, file_item_id, spec=file_spec)
|
580
610
|
|
581
|
-
|
582
|
-
nodes[data_result_id] = DataResultNode(
|
583
|
-
data_result_id, output_name, file_save_id,
|
584
|
-
file_key=resolved_output_key,
|
585
|
-
storage_key=resolved_storage_key)
|
586
|
-
|
587
|
-
@classmethod
|
588
|
-
def build_output_file_and_storage(cls, output_key, file_type: meta.FileType, sys_config: cfg.RuntimeConfig, job_config: cfg.JobConfig):
|
589
|
-
|
590
|
-
# TODO: Review and de-dupe building of output metadata
|
591
|
-
# Responsibility for assigning outputs could perhaps move from orchestrator to runtime
|
592
|
-
|
593
|
-
output_storage_key = f"{output_key}:STORAGE"
|
594
|
-
|
595
|
-
output_id = job_config.resultMapping[output_key]
|
596
|
-
output_storage_id = job_config.resultMapping[output_storage_key]
|
597
|
-
|
598
|
-
timestamp = _dt.datetime.fromisoformat(output_id.objectTimestamp.isoDatetime)
|
599
|
-
data_item = f"file/{output_id.objectId}/version-{output_id.objectVersion}"
|
600
|
-
storage_key = sys_config.storage.defaultBucket
|
601
|
-
storage_path = f"file/FILE-{output_id.objectId}/version-{output_id.objectVersion}/{output_key}.{file_type.extension}"
|
602
|
-
|
603
|
-
file_def = cls.build_file_def(output_key, file_type, output_storage_id, data_item)
|
604
|
-
storage_def = cls.build_storage_def(data_item, storage_key, storage_path, file_type.mimeType, timestamp)
|
605
|
-
|
606
|
-
return file_def, storage_def
|
607
|
-
|
608
|
-
@classmethod
|
609
|
-
def build_runtime_outputs(cls, output_names: tp.List[str], job_namespace: NodeNamespace):
|
610
|
-
|
611
|
-
# This method is called dynamically during job execution
|
612
|
-
# So it cannot use stateful information like self._job_config or self._job_namespace
|
613
|
-
|
614
|
-
# TODO: Factor out common logic with regular job outputs (including static / dynamic)
|
615
|
-
|
616
|
-
nodes = {}
|
617
|
-
inputs = set()
|
618
|
-
outputs = list()
|
619
|
-
|
620
|
-
for output_name in output_names:
|
621
|
-
|
622
|
-
# Output data view must already exist in the namespace
|
623
|
-
data_view_id = NodeId.of(output_name, job_namespace, _data.DataView)
|
624
|
-
data_spec_id = NodeId.of(f"{output_name}:SPEC", job_namespace, _data.DataSpec)
|
625
|
-
|
626
|
-
mapped_output_key = output_name
|
627
|
-
mapped_storage_key = output_name + ":STORAGE"
|
628
|
-
|
629
|
-
data_id = _util.new_object_id(meta.ObjectType.DATA)
|
630
|
-
storage_id = _util.new_object_id(meta.ObjectType.STORAGE)
|
631
|
-
|
632
|
-
data_spec_node = DynamicDataSpecNode(
|
633
|
-
data_spec_id, data_view_id,
|
634
|
-
data_id, storage_id,
|
635
|
-
prior_data_spec=None)
|
636
|
-
|
637
|
-
output_key = _util.object_key(data_id)
|
638
|
-
storage_key = _util.object_key(storage_id)
|
639
|
-
|
640
|
-
# Map one data item from each view, since outputs are single part/delta
|
641
|
-
data_item_id = NodeId(f"{output_name}:ITEM", job_namespace, _data.DataItem)
|
642
|
-
data_item_node = DataItemNode(data_item_id, data_view_id)
|
643
|
-
|
644
|
-
# Create a physical save operation for the data item
|
645
|
-
data_save_id = NodeId.of(f"{output_name}:SAVE", job_namespace, _data.DataSpec)
|
646
|
-
data_save_node = SaveDataNode(data_save_id, data_item_id, spec_id=data_spec_id)
|
647
|
-
|
648
|
-
data_result_id = NodeId.of(f"{output_name}:RESULT", job_namespace, ObjectBundle)
|
649
|
-
data_result_node = DataResultNode(
|
650
|
-
data_result_id, output_name, data_save_id,
|
651
|
-
output_key, storage_key)
|
652
|
-
|
653
|
-
nodes[data_spec_id] = data_spec_node
|
654
|
-
nodes[data_item_id] = data_item_node
|
655
|
-
nodes[data_save_id] = data_save_node
|
656
|
-
nodes[data_result_id] = data_result_node
|
657
|
-
|
658
|
-
# Job-level data view is an input to the save operation
|
659
|
-
inputs.add(data_view_id)
|
660
|
-
outputs.append(data_result_id)
|
661
|
-
|
662
|
-
runtime_outputs = JobOutputs(bundles=outputs)
|
663
|
-
runtime_outputs_id = NodeId.of("trac_runtime_outputs", job_namespace, JobOutputs)
|
664
|
-
runtime_outputs_node = RuntimeOutputsNode(runtime_outputs_id, runtime_outputs)
|
665
|
-
|
666
|
-
nodes[runtime_outputs_id] = runtime_outputs_node
|
667
|
-
|
668
|
-
return GraphSection(nodes, inputs=inputs, outputs={runtime_outputs_id})
|
669
|
-
|
670
|
-
@classmethod
|
671
|
-
def build_file_def(cls, file_name, file_type, storage_id, data_item):
|
672
|
-
|
673
|
-
file_def = meta.FileDefinition()
|
674
|
-
file_def.name = f"{file_name}.{file_type.extension}"
|
675
|
-
file_def.extension = file_type.extension
|
676
|
-
file_def.mimeType = file_type.mimeType
|
677
|
-
file_def.storageId = _util.selector_for_latest(storage_id)
|
678
|
-
file_def.dataItem = data_item
|
679
|
-
file_def.size = 0
|
680
|
-
|
681
|
-
return file_def
|
682
|
-
|
683
|
-
@classmethod
|
684
|
-
def build_storage_def(
|
685
|
-
cls, data_item: str,
|
686
|
-
storage_key, storage_path, storage_format,
|
687
|
-
timestamp: _dt.datetime):
|
688
|
-
|
689
|
-
first_incarnation = 0
|
690
|
-
|
691
|
-
storage_copy = meta.StorageCopy(
|
692
|
-
storage_key, storage_path, storage_format,
|
693
|
-
copyStatus=meta.CopyStatus.COPY_AVAILABLE,
|
694
|
-
copyTimestamp=meta.DatetimeValue(timestamp.isoformat()))
|
695
|
-
|
696
|
-
storage_incarnation = meta.StorageIncarnation(
|
697
|
-
[storage_copy],
|
698
|
-
incarnationIndex=first_incarnation,
|
699
|
-
incarnationTimestamp=meta.DatetimeValue(timestamp.isoformat()),
|
700
|
-
incarnationStatus=meta.IncarnationStatus.INCARNATION_AVAILABLE)
|
701
|
-
|
702
|
-
storage_item = meta.StorageItem([storage_incarnation])
|
703
|
-
|
704
|
-
storage_def = meta.StorageDefinition()
|
705
|
-
storage_def.dataItems[data_item] = storage_item
|
706
|
-
|
707
|
-
return storage_def
|
611
|
+
def _build_file_spec(self, file_selector):
|
708
612
|
|
709
|
-
|
710
|
-
|
711
|
-
objects: tp.Dict[str, NodeId[meta.ObjectDefinition]] = None,
|
712
|
-
bundles: tp.List[NodeId[ObjectBundle]] = None,
|
713
|
-
explicit_deps: tp.Optional[tp.List[NodeId]] = None) \
|
714
|
-
-> GraphSection:
|
715
|
-
|
716
|
-
result_id = self._job_config.resultMapping.get("trac_job_result")
|
717
|
-
result_node_id = NodeId.of("trac_job_result", self._job_namespace, cfg.JobResult)
|
718
|
-
|
719
|
-
if objects is not None:
|
720
|
-
|
721
|
-
results_inputs = set(objects.values())
|
722
|
-
|
723
|
-
build_result_node = BuildJobResultNode(
|
724
|
-
result_node_id, result_id, self._job_config.jobId,
|
725
|
-
outputs=JobOutputs(objects=objects),
|
726
|
-
explicit_deps=explicit_deps)
|
727
|
-
|
728
|
-
elif bundles is not None:
|
729
|
-
|
730
|
-
results_inputs = set(bundles)
|
731
|
-
|
732
|
-
build_result_node = BuildJobResultNode(
|
733
|
-
result_node_id, result_id, self._job_config.jobId,
|
734
|
-
outputs=JobOutputs(bundles=bundles),
|
735
|
-
explicit_deps=explicit_deps)
|
736
|
-
|
737
|
-
else:
|
738
|
-
raise _ex.EUnexpected()
|
613
|
+
file_def = _util.get_job_metadata(file_selector, self._job_config).file
|
614
|
+
storage_def = _util.get_job_metadata(file_def.storageId, self._job_config).storage
|
739
615
|
|
740
|
-
|
616
|
+
file_id = _util.get_job_mapping(file_selector, self._job_config)
|
617
|
+
storage_id = _util.get_job_mapping(file_def.storageId, self._job_config)
|
741
618
|
|
742
|
-
return
|
619
|
+
return _data.DataSpec \
|
620
|
+
.create_file_spec(file_def.dataItem, file_def, storage_def) \
|
621
|
+
.with_ids(file_id, storage_id)
|
743
622
|
|
744
623
|
def build_model_or_flow_with_context(
|
745
624
|
self, namespace: NodeNamespace, model_or_flow_name: str,
|
746
|
-
job_def:
|
747
|
-
input_mapping:
|
748
|
-
explicit_deps:
|
625
|
+
job_def: _meta.JobDefinition, model_or_flow: _meta.ObjectDefinition,
|
626
|
+
input_mapping: _tp.Dict[str, NodeId], output_mapping: _tp.Dict[str, NodeId],
|
627
|
+
explicit_deps: _tp.Optional[_tp.List[NodeId]] = None) \
|
749
628
|
-> GraphSection:
|
750
629
|
|
751
630
|
# Generate a name for a new unique sub-context
|
@@ -772,32 +651,35 @@ class GraphBuilder:
|
|
772
651
|
|
773
652
|
def build_model_or_flow(
|
774
653
|
self, namespace: NodeNamespace,
|
775
|
-
job_def:
|
776
|
-
model_or_flow:
|
777
|
-
explicit_deps:
|
654
|
+
job_def: _meta.JobDefinition,
|
655
|
+
model_or_flow: _meta.ObjectDefinition,
|
656
|
+
explicit_deps: _tp.Optional[_tp.List[NodeId]] = None) \
|
778
657
|
-> GraphSection:
|
779
658
|
|
780
|
-
if model_or_flow.objectType ==
|
659
|
+
if model_or_flow.objectType == _meta.ObjectType.MODEL:
|
781
660
|
return self.build_model(namespace, job_def, model_or_flow.model, explicit_deps)
|
782
661
|
|
783
|
-
elif model_or_flow.objectType ==
|
662
|
+
elif model_or_flow.objectType == _meta.ObjectType.FLOW:
|
784
663
|
return self.build_flow(namespace, job_def, model_or_flow.flow)
|
785
664
|
|
786
665
|
else:
|
787
666
|
message = f"Invalid job config, expected model or flow, got [{model_or_flow.objectType}]"
|
788
667
|
self._error(_ex.EJobValidation(message))
|
789
668
|
|
669
|
+
# Allow building to continue for better error reporting
|
670
|
+
return GraphSection(dict())
|
671
|
+
|
790
672
|
def build_model(
|
791
673
|
self, namespace: NodeNamespace,
|
792
|
-
job_def:
|
793
|
-
model_def:
|
794
|
-
explicit_deps:
|
674
|
+
job_def: _meta.JobDefinition,
|
675
|
+
model_def: _meta.ModelDefinition,
|
676
|
+
explicit_deps: _tp.Optional[_tp.List[NodeId]] = None) \
|
795
677
|
-> GraphSection:
|
796
678
|
|
797
679
|
self.check_model_type(job_def, model_def)
|
798
680
|
|
799
681
|
def param_id(node_name):
|
800
|
-
return NodeId(node_name, namespace,
|
682
|
+
return NodeId(node_name, namespace, _meta.Value)
|
801
683
|
|
802
684
|
def data_id(node_name):
|
803
685
|
return NodeId(node_name, namespace, _data.DataView)
|
@@ -808,9 +690,9 @@ class GraphBuilder:
|
|
808
690
|
output_ids = set(map(data_id, model_def.outputs))
|
809
691
|
|
810
692
|
# Set up storage access for import / export data jobs
|
811
|
-
if job_def.jobType ==
|
693
|
+
if job_def.jobType == _meta.JobType.IMPORT_DATA:
|
812
694
|
storage_access = job_def.importData.storageAccess
|
813
|
-
elif job_def.jobType ==
|
695
|
+
elif job_def.jobType == _meta.JobType.EXPORT_DATA:
|
814
696
|
storage_access = job_def.exportData.storageAccess
|
815
697
|
else:
|
816
698
|
storage_access = None
|
@@ -827,16 +709,19 @@ class GraphBuilder:
|
|
827
709
|
model_name = model_def.entryPoint.split(".")[-1] # TODO: Check unique model name
|
828
710
|
model_id = NodeId(model_name, namespace, Bundle[_data.DataView])
|
829
711
|
|
712
|
+
# Used to set up a dynamic builder at runtime if dynamic graph updates are needed
|
713
|
+
context = GraphContext(
|
714
|
+
self._job_config.jobId,
|
715
|
+
self._job_namespace, namespace,
|
716
|
+
self._sys_config)
|
717
|
+
|
830
718
|
model_node = RunModelNode(
|
831
|
-
model_id,
|
719
|
+
model_id, model_def, model_scope,
|
832
720
|
frozenset(parameter_ids), frozenset(input_ids),
|
833
721
|
explicit_deps=explicit_deps, bundle=model_id.namespace,
|
834
|
-
storage_access=storage_access)
|
722
|
+
storage_access=storage_access, graph_context=context)
|
835
723
|
|
836
|
-
|
837
|
-
model_result_node = RunModelResultNode(model_result_id, model_id)
|
838
|
-
|
839
|
-
nodes = {model_id: model_node, model_result_id: model_result_node}
|
724
|
+
nodes = {model_id: model_node}
|
840
725
|
|
841
726
|
# Create nodes for each model output
|
842
727
|
# The model node itself outputs a bundle (dictionary of named outputs)
|
@@ -849,13 +734,13 @@ class GraphBuilder:
|
|
849
734
|
nodes[output_id] = BundleItemNode(output_id, model_id, output_id.name)
|
850
735
|
|
851
736
|
# Assemble a graph to include the model and its outputs
|
852
|
-
return GraphSection(nodes, inputs={*parameter_ids, *input_ids}, outputs=output_ids, must_run=[
|
737
|
+
return GraphSection(nodes, inputs={*parameter_ids, *input_ids}, outputs=output_ids, must_run=[model_id])
|
853
738
|
|
854
739
|
def build_flow(
|
855
740
|
self, namespace: NodeNamespace,
|
856
|
-
job_def:
|
857
|
-
flow_def:
|
858
|
-
explicit_deps:
|
741
|
+
job_def: _meta.JobDefinition,
|
742
|
+
flow_def: _meta.FlowDefinition,
|
743
|
+
explicit_deps: _tp.Optional[_tp.List[NodeId]] = None) \
|
859
744
|
-> GraphSection:
|
860
745
|
|
861
746
|
def socket_key(socket):
|
@@ -875,7 +760,7 @@ class GraphBuilder:
|
|
875
760
|
target_edges = {socket_key(edge.target): edge for edge in flow_def.edges}
|
876
761
|
|
877
762
|
# Initially parameters and inputs are reachable, everything else is not
|
878
|
-
def is_input(n): return n[1].nodeType in [
|
763
|
+
def is_input(n): return n[1].nodeType in [_meta.FlowNodeType.PARAMETER_NODE, _meta.FlowNodeType.INPUT_NODE]
|
879
764
|
reachable_nodes = dict(filter(is_input, flow_def.nodes.items()))
|
880
765
|
remaining_nodes = dict(filter(lambda n: not is_input(n), flow_def.nodes.items()))
|
881
766
|
|
@@ -892,7 +777,7 @@ class GraphBuilder:
|
|
892
777
|
|
893
778
|
graph_section = self._join_sections(graph_section, sub_section, allow_partial_inputs=True)
|
894
779
|
|
895
|
-
if node.nodeType !=
|
780
|
+
if node.nodeType != _meta.FlowNodeType.OUTPUT_NODE:
|
896
781
|
|
897
782
|
source_edges = remaining_edges_by_source.pop(node_name)
|
898
783
|
|
@@ -916,10 +801,10 @@ class GraphBuilder:
|
|
916
801
|
|
917
802
|
def build_flow_node(
|
918
803
|
self, namespace: NodeNamespace,
|
919
|
-
job_def:
|
920
|
-
target_edges:
|
921
|
-
node_name: str, node:
|
922
|
-
explicit_deps:
|
804
|
+
job_def: _meta.JobDefinition,
|
805
|
+
target_edges: _tp.Dict[_meta.FlowSocket, _meta.FlowEdge],
|
806
|
+
node_name: str, node: _meta.FlowNode,
|
807
|
+
explicit_deps: _tp.Optional[_tp.List[NodeId]] = None) \
|
923
808
|
-> GraphSection:
|
924
809
|
|
925
810
|
def socket_key(socket):
|
@@ -930,27 +815,27 @@ class GraphBuilder:
|
|
930
815
|
return NodeId(socket_name, namespace, result_type)
|
931
816
|
|
932
817
|
def edge_mapping(node_: str, socket_: str = None, result_type=None):
|
933
|
-
socket = socket_key(
|
818
|
+
socket = socket_key(_meta.FlowSocket(node_, socket_))
|
934
819
|
edge = target_edges.get(socket)
|
935
820
|
# Report missing edges as a job consistency error (this might happen sometimes in dev mode)
|
936
821
|
if edge is None:
|
937
822
|
self._error(_ex.EJobValidation(f"Inconsistent flow: Socket [{socket}] is not connected"))
|
938
823
|
return socket_id(edge.source.node, edge.source.socket, result_type)
|
939
824
|
|
940
|
-
if node.nodeType ==
|
941
|
-
return GraphSection({}, inputs={NodeId(node_name, namespace, result_type=
|
825
|
+
if node.nodeType == _meta.FlowNodeType.PARAMETER_NODE:
|
826
|
+
return GraphSection({}, inputs={NodeId(node_name, namespace, result_type=_meta.Value)})
|
942
827
|
|
943
|
-
if node.nodeType ==
|
828
|
+
if node.nodeType == _meta.FlowNodeType.INPUT_NODE:
|
944
829
|
return GraphSection({}, inputs={NodeId(node_name, namespace, result_type=_data.DataView)})
|
945
830
|
|
946
|
-
if node.nodeType ==
|
831
|
+
if node.nodeType == _meta.FlowNodeType.OUTPUT_NODE:
|
947
832
|
target_id = NodeId(node_name, namespace, result_type=_data.DataView)
|
948
833
|
source_id = edge_mapping(node_name, None, _data.DataView)
|
949
834
|
return GraphSection({target_id: IdentityNode(target_id, source_id)}, outputs={target_id})
|
950
835
|
|
951
|
-
if node.nodeType ==
|
836
|
+
if node.nodeType == _meta.FlowNodeType.MODEL_NODE:
|
952
837
|
|
953
|
-
param_mapping = {socket: edge_mapping(node_name, socket,
|
838
|
+
param_mapping = {socket: edge_mapping(node_name, socket, _meta.Value) for socket in node.parameters}
|
954
839
|
input_mapping = {socket: edge_mapping(node_name, socket, _data.DataView) for socket in node.inputs}
|
955
840
|
output_mapping = {socket: socket_id(node_name, socket, _data.DataView) for socket in node.outputs}
|
956
841
|
|
@@ -958,10 +843,10 @@ class GraphBuilder:
|
|
958
843
|
pop_mapping = output_mapping
|
959
844
|
|
960
845
|
model_selector = job_def.runFlow.models.get(node_name)
|
961
|
-
model_obj = _util.
|
846
|
+
model_obj = _util.get_job_metadata(model_selector, self._job_config)
|
962
847
|
|
963
848
|
# Missing models in the job config is a job consistency error
|
964
|
-
if model_obj is None or model_obj.objectType !=
|
849
|
+
if model_obj is None or model_obj.objectType != _meta.ObjectType.MODEL:
|
965
850
|
self._error(_ex.EJobValidation(f"No model was provided for flow node [{node_name}]"))
|
966
851
|
|
967
852
|
# Explicit check for model compatibility - report an error now, do not try build_model()
|
@@ -976,9 +861,12 @@ class GraphBuilder:
|
|
976
861
|
|
977
862
|
self._error(_ex.EJobValidation(f"Flow node [{node_name}] has invalid node type [{node.nodeType}]"))
|
978
863
|
|
864
|
+
# Allow building to continue for better error reporting
|
865
|
+
return GraphSection(dict())
|
866
|
+
|
979
867
|
def check_model_compatibility(
|
980
|
-
self, model_selector:
|
981
|
-
model_def:
|
868
|
+
self, model_selector: _meta.TagSelector,
|
869
|
+
model_def: _meta.ModelDefinition, node_name: str, flow_node: _meta.FlowNode):
|
982
870
|
|
983
871
|
model_params = list(sorted(model_def.parameters.keys()))
|
984
872
|
model_inputs = list(sorted(model_def.inputs.keys()))
|
@@ -992,14 +880,14 @@ class GraphBuilder:
|
|
992
880
|
model_key = _util.object_key(model_selector)
|
993
881
|
self._error(_ex.EJobValidation(f"Incompatible model for flow node [{node_name}] (Model: [{model_key}])"))
|
994
882
|
|
995
|
-
def check_model_type(self, job_def:
|
883
|
+
def check_model_type(self, job_def: _meta.JobDefinition, model_def: _meta.ModelDefinition):
|
996
884
|
|
997
|
-
if job_def.jobType ==
|
998
|
-
allowed_model_types = [
|
999
|
-
elif job_def.jobType ==
|
1000
|
-
allowed_model_types = [
|
885
|
+
if job_def.jobType == _meta.JobType.IMPORT_DATA:
|
886
|
+
allowed_model_types = [_meta.ModelType.DATA_IMPORT_MODEL]
|
887
|
+
elif job_def.jobType == _meta.JobType.EXPORT_DATA:
|
888
|
+
allowed_model_types = [_meta.ModelType.DATA_EXPORT_MODEL]
|
1001
889
|
else:
|
1002
|
-
allowed_model_types = [
|
890
|
+
allowed_model_types = [_meta.ModelType.STANDARD_MODEL]
|
1003
891
|
|
1004
892
|
if model_def.modelType not in allowed_model_types:
|
1005
893
|
job_type = job_def.jobType.name
|
@@ -1008,8 +896,8 @@ class GraphBuilder:
|
|
1008
896
|
|
1009
897
|
@staticmethod
|
1010
898
|
def build_context_push(
|
1011
|
-
namespace: NodeNamespace, input_mapping:
|
1012
|
-
explicit_deps:
|
899
|
+
namespace: NodeNamespace, input_mapping: _tp.Dict[str, NodeId],
|
900
|
+
explicit_deps: _tp.Optional[_tp.List[NodeId]] = None) \
|
1013
901
|
-> GraphSection:
|
1014
902
|
|
1015
903
|
"""
|
@@ -1021,7 +909,7 @@ class GraphBuilder:
|
|
1021
909
|
for input_name, outer_id
|
1022
910
|
in input_mapping.items()}
|
1023
911
|
|
1024
|
-
push_id = NodeId("trac_ctx_push", namespace, Bundle[
|
912
|
+
push_id = NodeId("trac_ctx_push", namespace, Bundle[_tp.Any])
|
1025
913
|
push_node = ContextPushNode(push_id, namespace, push_mapping, explicit_deps, bundle=push_id.namespace)
|
1026
914
|
|
1027
915
|
nodes = {push_id: push_node}
|
@@ -1038,8 +926,8 @@ class GraphBuilder:
|
|
1038
926
|
|
1039
927
|
@staticmethod
|
1040
928
|
def build_context_pop(
|
1041
|
-
namespace: NodeNamespace, output_mapping:
|
1042
|
-
explicit_deps:
|
929
|
+
namespace: NodeNamespace, output_mapping: _tp.Dict[str, NodeId],
|
930
|
+
explicit_deps: _tp.Optional[_tp.List[NodeId]] = None) \
|
1043
931
|
-> GraphSection:
|
1044
932
|
|
1045
933
|
"""
|
@@ -1051,8 +939,14 @@ class GraphBuilder:
|
|
1051
939
|
for output_name, outer_id
|
1052
940
|
in output_mapping.items()}
|
1053
941
|
|
1054
|
-
|
1055
|
-
|
942
|
+
push_id = NodeId("trac_ctx_push", namespace, Bundle[_tp.Any])
|
943
|
+
explicit_deps = [push_id, *explicit_deps] if explicit_deps else [push_id]
|
944
|
+
|
945
|
+
pop_id = NodeId("trac_ctx_pop", namespace, Bundle[_tp.Any])
|
946
|
+
pop_node = ContextPopNode(
|
947
|
+
pop_id, namespace, pop_mapping,
|
948
|
+
explicit_deps=explicit_deps,
|
949
|
+
bundle=pop_id.namespace.parent)
|
1056
950
|
|
1057
951
|
nodes = {pop_id: pop_node}
|
1058
952
|
|
@@ -1066,6 +960,91 @@ class GraphBuilder:
|
|
1066
960
|
outputs={*pop_mapping.values()},
|
1067
961
|
must_run=[pop_id])
|
1068
962
|
|
963
|
+
def build_job_result(
|
964
|
+
self, output_ids: _tp.List[NodeId[JOB_OUTPUT_TYPE]],
|
965
|
+
output_keys: _tp.Optional[_tp.Dict[NodeId, str]] = None,
|
966
|
+
explicit_deps: _tp.Optional[_tp.List[NodeId]] = None) \
|
967
|
+
-> GraphSection:
|
968
|
+
|
969
|
+
if output_keys:
|
970
|
+
named_outputs = dict((output_keys[oid], oid) for oid in filter(lambda oid: oid in output_keys, output_ids))
|
971
|
+
unnamed_outputs = list(filter(lambda oid: oid not in output_keys, output_ids))
|
972
|
+
else:
|
973
|
+
named_outputs = dict()
|
974
|
+
unnamed_outputs = output_ids
|
975
|
+
|
976
|
+
result_node_id = NodeId.of("trac_job_result", self._job_namespace, _cfg.JobResult)
|
977
|
+
result_node = JobResultNode(
|
978
|
+
result_node_id,
|
979
|
+
self._job_config.jobId,
|
980
|
+
self._job_config.resultId,
|
981
|
+
named_outputs, unnamed_outputs,
|
982
|
+
explicit_deps=explicit_deps)
|
983
|
+
|
984
|
+
result_nodes = {result_node_id: result_node}
|
985
|
+
|
986
|
+
return GraphSection(result_nodes, inputs=set(output_ids), must_run=[result_node_id])
|
987
|
+
|
988
|
+
def build_dynamic_outputs(self, source_id: NodeId, output_names: _tp.List[str]) -> GraphUpdate:
|
989
|
+
|
990
|
+
nodes = dict()
|
991
|
+
dependencies = dict()
|
992
|
+
|
993
|
+
# All dynamic outputs are DATA with dynamic schemas for now
|
994
|
+
dynamic_schema = _meta.ModelOutputSchema(
|
995
|
+
objectType=_meta.ObjectType.DATA,
|
996
|
+
schema=None, dynamic=True)
|
997
|
+
|
998
|
+
for output_name in output_names:
|
999
|
+
|
1000
|
+
# Node to extract dynamic outputs from the source node (a model or flow output bundle)
|
1001
|
+
output_id = NodeId.of(output_name, source_id.namespace, _data.DataView)
|
1002
|
+
output_node = BundleItemNode(output_id, source_id, output_name)
|
1003
|
+
nodes[output_id] = output_node
|
1004
|
+
|
1005
|
+
# All dynamic outputs are DATA for now
|
1006
|
+
self._build_data_output(output_name, dynamic_schema, output_id, prior_selector=None, nodes=nodes,
|
1007
|
+
explicit_deps=[source_id])
|
1008
|
+
|
1009
|
+
named_outputs = dict(
|
1010
|
+
(nid.name, nid) for nid, n in nodes.items()
|
1011
|
+
if nid.result_type == GraphOutput or isinstance(n, SaveDataNode))
|
1012
|
+
|
1013
|
+
dynamic_outputs_id = NodeId.of("trac_dynamic_outputs", source_id.namespace, DynamicOutputsNode)
|
1014
|
+
dynamic_outputs_node = DynamicOutputsNode(
|
1015
|
+
dynamic_outputs_id, named_outputs,
|
1016
|
+
explicit_deps=[source_id])
|
1017
|
+
|
1018
|
+
job_result_id = NodeId.of("trac_job_result", self._job_namespace, _cfg.JobResult)
|
1019
|
+
|
1020
|
+
nodes[dynamic_outputs_id] = dynamic_outputs_node
|
1021
|
+
dependencies[job_result_id] = [Dependency(dynamic_outputs_id, DependencyType.HARD)]
|
1022
|
+
|
1023
|
+
return GraphUpdate(nodes, dependencies)
|
1024
|
+
|
1025
|
+
def _allocate_id(self, object_type: _meta.ObjectType):
|
1026
|
+
|
1027
|
+
preallocated_ids = self._preallocated_ids.get(object_type)
|
1028
|
+
|
1029
|
+
if preallocated_ids:
|
1030
|
+
# Preallocated IDs have objectVersion = 0, use a new version to get objectVersion = 1
|
1031
|
+
return _util.new_object_version(preallocated_ids.pop())
|
1032
|
+
else:
|
1033
|
+
return _util.new_object_id(object_type)
|
1034
|
+
|
1035
|
+
def _attach_metadata(self, obj: _tp.Any, selector: _meta.TagSelector):
|
1036
|
+
|
1037
|
+
item_id = _util.get_job_mapping(selector, self._job_config)
|
1038
|
+
tag = _util.get_job_metadata_tag(selector, self._job_config, optional=True)
|
1039
|
+
|
1040
|
+
attributes = dict() if tag is None else dict(
|
1041
|
+
(attr_name, _type_system.MetadataCodec.decode_value(attr_value))
|
1042
|
+
for attr_name, attr_value in tag.attrs.items())
|
1043
|
+
|
1044
|
+
metadata = _api.RuntimeMetadata(objectId=item_id, attributes=attributes)
|
1045
|
+
|
1046
|
+
return _util.attach_runtime_metadata(obj, metadata)
|
1047
|
+
|
1069
1048
|
def _join_sections(self, *sections: GraphSection, allow_partial_inputs: bool = False):
|
1070
1049
|
|
1071
1050
|
n_sections = len(sections)
|
@@ -1097,7 +1076,7 @@ class GraphBuilder:
|
|
1097
1076
|
|
1098
1077
|
return GraphSection(nodes, inputs, last_section.outputs, must_run)
|
1099
1078
|
|
1100
|
-
def _invalid_graph_error(self, missing_dependencies:
|
1079
|
+
def _invalid_graph_error(self, missing_dependencies: _tp.Iterable[NodeId]):
|
1101
1080
|
|
1102
1081
|
missing_ids = ", ".join(map(self._missing_item_display_name, missing_dependencies))
|
1103
1082
|
message = f"The execution graph has unsatisfied dependencies: [{missing_ids}]"
|