tracdap-runtime 0.8.0rc2__py3-none-any.whl → 0.9.0b2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. tracdap/rt/_impl/core/config_parser.py +29 -3
  2. tracdap/rt/_impl/core/data.py +627 -40
  3. tracdap/rt/_impl/core/repos.py +17 -8
  4. tracdap/rt/_impl/core/storage.py +25 -13
  5. tracdap/rt/_impl/core/struct.py +254 -60
  6. tracdap/rt/_impl/core/util.py +125 -11
  7. tracdap/rt/_impl/exec/context.py +35 -8
  8. tracdap/rt/_impl/exec/dev_mode.py +169 -127
  9. tracdap/rt/_impl/exec/engine.py +203 -140
  10. tracdap/rt/_impl/exec/functions.py +228 -263
  11. tracdap/rt/_impl/exec/graph.py +141 -126
  12. tracdap/rt/_impl/exec/graph_builder.py +428 -449
  13. tracdap/rt/_impl/grpc/codec.py +8 -13
  14. tracdap/rt/_impl/grpc/server.py +7 -7
  15. tracdap/rt/_impl/grpc/tracdap/api/internal/runtime_pb2.py +25 -18
  16. tracdap/rt/_impl/grpc/tracdap/api/internal/runtime_pb2.pyi +27 -9
  17. tracdap/rt/_impl/grpc/tracdap/metadata/common_pb2.py +1 -1
  18. tracdap/rt/_impl/grpc/tracdap/metadata/config_pb2.py +1 -1
  19. tracdap/rt/_impl/grpc/tracdap/metadata/custom_pb2.py +1 -1
  20. tracdap/rt/_impl/grpc/tracdap/metadata/data_pb2.py +37 -35
  21. tracdap/rt/_impl/grpc/tracdap/metadata/data_pb2.pyi +37 -43
  22. tracdap/rt/_impl/grpc/tracdap/metadata/file_pb2.py +1 -1
  23. tracdap/rt/_impl/grpc/tracdap/metadata/flow_pb2.py +1 -1
  24. tracdap/rt/_impl/grpc/tracdap/metadata/job_pb2.py +67 -63
  25. tracdap/rt/_impl/grpc/tracdap/metadata/job_pb2.pyi +11 -2
  26. tracdap/rt/_impl/grpc/tracdap/metadata/model_pb2.py +1 -1
  27. tracdap/rt/_impl/grpc/tracdap/metadata/object_id_pb2.py +1 -1
  28. tracdap/rt/_impl/grpc/tracdap/metadata/object_pb2.py +1 -1
  29. tracdap/rt/_impl/grpc/tracdap/metadata/resource_pb2.py +1 -1
  30. tracdap/rt/_impl/grpc/tracdap/metadata/search_pb2.py +1 -1
  31. tracdap/rt/_impl/grpc/tracdap/metadata/storage_pb2.py +11 -9
  32. tracdap/rt/_impl/grpc/tracdap/metadata/storage_pb2.pyi +11 -2
  33. tracdap/rt/_impl/grpc/tracdap/metadata/tag_pb2.py +1 -1
  34. tracdap/rt/_impl/grpc/tracdap/metadata/tag_update_pb2.py +1 -1
  35. tracdap/rt/_impl/grpc/tracdap/metadata/type_pb2.py +23 -19
  36. tracdap/rt/_impl/grpc/tracdap/metadata/type_pb2.pyi +15 -2
  37. tracdap/rt/_impl/runtime.py +3 -9
  38. tracdap/rt/_impl/static_api.py +5 -6
  39. tracdap/rt/_plugins/format_csv.py +2 -2
  40. tracdap/rt/_plugins/repo_git.py +56 -11
  41. tracdap/rt/_plugins/storage_aws.py +165 -150
  42. tracdap/rt/_plugins/storage_azure.py +17 -11
  43. tracdap/rt/_plugins/storage_gcp.py +35 -18
  44. tracdap/rt/_version.py +1 -1
  45. tracdap/rt/api/model_api.py +45 -0
  46. tracdap/rt/config/__init__.py +7 -9
  47. tracdap/rt/config/common.py +3 -14
  48. tracdap/rt/config/job.py +17 -3
  49. tracdap/rt/config/platform.py +9 -32
  50. tracdap/rt/config/result.py +8 -4
  51. tracdap/rt/config/runtime.py +5 -10
  52. tracdap/rt/config/tenant.py +28 -0
  53. tracdap/rt/launch/cli.py +0 -8
  54. tracdap/rt/launch/launch.py +1 -3
  55. tracdap/rt/metadata/__init__.py +35 -35
  56. tracdap/rt/metadata/data.py +19 -31
  57. tracdap/rt/metadata/job.py +3 -1
  58. tracdap/rt/metadata/storage.py +9 -0
  59. tracdap/rt/metadata/type.py +9 -5
  60. {tracdap_runtime-0.8.0rc2.dist-info → tracdap_runtime-0.9.0b2.dist-info}/METADATA +5 -3
  61. {tracdap_runtime-0.8.0rc2.dist-info → tracdap_runtime-0.9.0b2.dist-info}/RECORD +64 -63
  62. {tracdap_runtime-0.8.0rc2.dist-info → tracdap_runtime-0.9.0b2.dist-info}/WHEEL +1 -1
  63. {tracdap_runtime-0.8.0rc2.dist-info → tracdap_runtime-0.9.0b2.dist-info}/licenses/LICENSE +0 -0
  64. {tracdap_runtime-0.8.0rc2.dist-info → tracdap_runtime-0.9.0b2.dist-info}/top_level.txt +0 -0
@@ -13,29 +13,41 @@
13
13
  # See the License for the specific language governing permissions and
14
14
  # limitations under the License.
15
15
 
16
- import datetime as _dt
16
+ import itertools as _itr
17
+ import typing as _tp
17
18
 
18
- import tracdap.rt.config as config
19
+ import tracdap.rt.metadata as _meta
20
+ import tracdap.rt.config as _cfg
19
21
  import tracdap.rt.exceptions as _ex
20
22
  import tracdap.rt._impl.core.data as _data
23
+ import tracdap.rt._impl.core.type_system as _type_system
21
24
  import tracdap.rt._impl.core.util as _util
25
+ import tracdap.rt.api as _api
22
26
 
23
27
  from .graph import *
24
28
 
25
29
 
26
30
  class GraphBuilder:
27
31
 
28
- __JOB_DETAILS = tp.TypeVar(
32
+ __JOB_DETAILS = _tp.TypeVar(
29
33
  "__JOB_DETAILS",
30
- meta.RunModelJob,
31
- meta.RunFlowJob,
32
- meta.ImportModelJob,
33
- meta.ImportDataJob,
34
- meta.ExportDataJob)
34
+ _meta.RunModelJob,
35
+ _meta.RunFlowJob,
36
+ _meta.ImportModelJob,
37
+ _meta.ImportDataJob,
38
+ _meta.ExportDataJob)
35
39
 
36
- __JOB_BUILD_FUNC = tp.Callable[[meta.JobDefinition, NodeId], GraphSection]
40
+ __JOB_BUILD_FUNC = _tp.Callable[[_meta.JobDefinition, NodeId], GraphSection]
37
41
 
38
- def __init__(self, sys_config: config.RuntimeConfig, job_config: config.JobConfig):
42
+ @classmethod
43
+ def dynamic(cls, context: GraphContext) -> "GraphBuilder":
44
+
45
+ sys_config = context.sys_config
46
+ job_config = _cfg.JobConfig(context.job_id)
47
+
48
+ return GraphBuilder(sys_config, job_config)
49
+
50
+ def __init__(self, sys_config: _cfg.RuntimeConfig, job_config: _cfg.JobConfig):
39
51
 
40
52
  self._sys_config = sys_config
41
53
  self._job_config = job_config
@@ -43,80 +55,97 @@ class GraphBuilder:
43
55
  self._job_key = _util.object_key(job_config.jobId)
44
56
  self._job_namespace = NodeNamespace(self._job_key)
45
57
 
46
- self._errors = []
58
+ # Dictionary of object type to preallocated IDs
59
+ self._preallocated_ids = dict(
60
+ (k, list(v)) for k, v in _itr.groupby(
61
+ sorted(job_config.preallocatedIds, key=lambda oid: oid.objectType.value),
62
+ lambda oid: oid.objectType))
63
+
64
+ self._errors = list()
65
+
66
+ def unallocated_ids(self) -> _tp.Dict[_meta.ObjectType, _meta.TagHeader]:
67
+ return self._preallocated_ids
47
68
 
48
- def _child_builder(self, job_id: meta.TagHeader) -> "GraphBuilder":
69
+ def _child_builder(self, job_id: _meta.TagHeader) -> "GraphBuilder":
49
70
 
50
71
  builder = GraphBuilder(self._sys_config, self._job_config)
51
72
  builder._job_key = _util.object_key(job_id)
52
73
  builder._job_namespace = NodeNamespace(builder._job_key)
53
74
 
75
+ # Do not share preallocated IDs with the child graph
76
+ builder._preallocated_ids = dict()
77
+
54
78
  return builder
55
79
 
56
- def build_job(self, job_def: meta.JobDefinition,) -> Graph:
80
+ def build_job(self, job_def: _meta.JobDefinition, ) -> Graph:
57
81
 
58
82
  try:
59
83
 
60
- if job_def.jobType == meta.JobType.IMPORT_MODEL:
61
- return self.build_standard_job(job_def, self.build_import_model_job)
84
+ if job_def.jobType == _meta.JobType.IMPORT_MODEL:
85
+ graph = self.build_standard_job(job_def, self.build_import_model_job)
86
+
87
+ elif job_def.jobType == _meta.JobType.RUN_MODEL:
88
+ graph = self.build_standard_job(job_def, self.build_run_model_job)
62
89
 
63
- if job_def.jobType == meta.JobType.RUN_MODEL:
64
- return self.build_standard_job(job_def, self.build_run_model_job)
90
+ elif job_def.jobType == _meta.JobType.RUN_FLOW:
91
+ graph = self.build_standard_job(job_def, self.build_run_flow_job)
65
92
 
66
- if job_def.jobType == meta.JobType.RUN_FLOW:
67
- return self.build_standard_job(job_def, self.build_run_flow_job)
93
+ elif job_def.jobType in [_meta.JobType.IMPORT_DATA, _meta.JobType.EXPORT_DATA]:
94
+ graph = self.build_standard_job(job_def, self.build_import_export_data_job)
68
95
 
69
- if job_def.jobType in [meta.JobType.IMPORT_DATA, meta.JobType.EXPORT_DATA]:
70
- return self.build_standard_job(job_def, self.build_import_export_data_job)
96
+ elif job_def.jobType == _meta.JobType.JOB_GROUP:
97
+ graph = self.build_standard_job(job_def, self.build_job_group)
71
98
 
72
- if job_def.jobType == meta.JobType.JOB_GROUP:
73
- return self.build_standard_job(job_def, self.build_job_group)
99
+ else:
100
+ self._error(_ex.EJobValidation(f"Job type [{job_def.jobType.name}] is not supported yet"))
101
+ raise self._error_summary()
74
102
 
75
- self._error(_ex.EJobValidation(f"Job type [{job_def.jobType.name}] is not supported yet"))
103
+ if any(self._errors):
104
+ raise self._error_summary()
105
+ else:
106
+ return graph
76
107
 
77
108
  except Exception as e:
78
109
 
79
110
  # If there are recorded, errors, assume unhandled exceptions are a result of those
80
111
  # Only report the recorded errors, to reduce noise
81
112
  if any(self._errors):
82
- pass
113
+ raise self._error_summary()
83
114
 
84
115
  # If no errors are recorded, an exception here would be a bug
85
116
  raise _ex.ETracInternal(f"Unexpected error preparing the job execution graph") from e
86
117
 
87
- finally:
88
-
89
- if any(self._errors):
118
+ def _error_summary(self) -> Exception:
90
119
 
91
- if len(self._errors) == 1:
92
- raise self._errors[0]
93
- else:
94
- err_text = "\n".join(map(str, self._errors))
95
- raise _ex.EJobValidation("Invalid job configuration\n" + err_text)
120
+ if len(self._errors) == 1:
121
+ return self._errors[0]
122
+ else:
123
+ err_text = "\n".join(map(str, self._errors))
124
+ return _ex.EJobValidation("Invalid job configuration\n" + err_text)
96
125
 
97
- def build_standard_job(self, job_def: meta.JobDefinition, build_func: __JOB_BUILD_FUNC):
126
+ def build_standard_job(self, job_def: _meta.JobDefinition, build_func: __JOB_BUILD_FUNC):
98
127
 
99
128
  # Set up the job context
100
129
 
101
- push_id = NodeId("trac_job_push", self._job_namespace, Bundle[tp.Any])
130
+ push_id = NodeId("trac_job_push", self._job_namespace, Bundle[_tp.Any])
102
131
  push_node = ContextPushNode(push_id, self._job_namespace)
103
132
  push_section = GraphSection({push_id: push_node}, must_run=[push_id])
104
133
 
105
134
  # Build the execution graphs for the main job and results recording
106
135
 
107
136
  main_section = build_func(job_def, push_id)
108
- main_result_id = NodeId.of("trac_job_result", self._job_namespace, config.JobResult)
137
+ main_result_id = NodeId.of("trac_job_result", self._job_namespace, _cfg.JobResult)
109
138
 
110
139
  # Clean up the job context
111
140
 
112
- global_result_id = NodeId.of(self._job_key, NodeNamespace.root(), config.JobResult)
141
+ global_result_id = NodeId.of(self._job_key, NodeNamespace.root(), _cfg.JobResult)
113
142
 
114
- pop_id = NodeId("trac_job_pop", self._job_namespace, Bundle[tp.Any])
143
+ pop_id = NodeId("trac_job_pop", self._job_namespace, Bundle[_tp.Any])
115
144
  pop_mapping = {main_result_id: global_result_id}
116
145
 
117
146
  pop_node = ContextPopNode(
118
147
  pop_id, self._job_namespace, pop_mapping,
119
- explicit_deps=main_section.must_run,
148
+ explicit_deps=[push_id, *main_section.must_run],
120
149
  bundle=NodeNamespace.root())
121
150
 
122
151
  global_result_node = BundleItemNode(global_result_id, pop_id, self._job_key)
@@ -129,42 +158,41 @@ class GraphBuilder:
129
158
 
130
159
  return Graph(job.nodes, global_result_id)
131
160
 
132
- def build_import_model_job(self, job_def: meta.JobDefinition, job_push_id: NodeId) -> GraphSection:
161
+ def build_import_model_job(self, job_def: _meta.JobDefinition, job_push_id: NodeId) -> GraphSection:
133
162
 
134
- # Main section: run the model import
163
+ # TRAC object ID for the new model
164
+ model_id = self._allocate_id(_meta.ObjectType.MODEL)
135
165
 
136
- # TODO: Import model job should pre-allocate an ID, then model ID comes from job_config.resultMapping
137
- new_model_id = _util.new_object_id(meta.ObjectType.MODEL)
138
- new_model_key = _util.object_key(new_model_id)
139
-
140
- model_scope = self._job_key
141
166
  import_details = job_def.importModel
167
+ import_scope = self._job_key
142
168
 
143
- import_id = NodeId.of("trac_import_model", self._job_namespace, meta.ObjectDefinition)
144
- import_node = ImportModelNode(import_id, model_scope, import_details, explicit_deps=[job_push_id])
169
+ # Graph node ID for the import operation
170
+ import_id = NodeId.of("trac_import_model", self._job_namespace, GraphOutput)
145
171
 
146
- main_section = GraphSection(nodes={import_id: import_node})
172
+ import_node = ImportModelNode(
173
+ import_id, model_id,
174
+ import_details, import_scope,
175
+ explicit_deps=[job_push_id])
147
176
 
148
- # Build job-level metadata outputs
177
+ main_section = GraphSection(nodes={import_id: import_node})
149
178
 
150
- result_section = self.build_job_results(
151
- objects={new_model_key: import_id},
152
- explicit_deps=[job_push_id, *main_section.must_run])
179
+ # RESULT will have a single (unnamed) output
180
+ result_section = self.build_job_result([import_id], explicit_deps=[job_push_id, *main_section.must_run])
153
181
 
154
182
  return self._join_sections(main_section, result_section)
155
183
 
156
- def build_import_export_data_job(self, job_def: meta.JobDefinition, job_push_id: NodeId) -> GraphSection:
184
+ def build_import_export_data_job(self, job_def: _meta.JobDefinition, job_push_id: NodeId) -> GraphSection:
157
185
 
158
186
  # TODO: These are processed as regular calculation jobs for now
159
187
  # That might be ok, but is worth reviewing
160
188
 
161
- if job_def.jobType == meta.JobType.IMPORT_DATA:
189
+ if job_def.jobType == _meta.JobType.IMPORT_DATA:
162
190
  job_details = job_def.importData
163
191
  else:
164
192
  job_details = job_def.exportData
165
193
 
166
194
  target_selector = job_details.model
167
- target_obj = _util.get_job_resource(target_selector, self._job_config)
195
+ target_obj = _util.get_job_metadata(target_selector, self._job_config)
168
196
  target_def = target_obj.model
169
197
 
170
198
  return self.build_calculation_job(
@@ -172,12 +200,12 @@ class GraphBuilder:
172
200
  target_selector, target_def,
173
201
  job_details)
174
202
 
175
- def build_run_model_job(self, job_def: meta.JobDefinition, job_push_id: NodeId) -> GraphSection:
203
+ def build_run_model_job(self, job_def: _meta.JobDefinition, job_push_id: NodeId) -> GraphSection:
176
204
 
177
205
  job_details = job_def.runModel
178
206
 
179
207
  target_selector = job_details.model
180
- target_obj = _util.get_job_resource(target_selector, self._job_config)
208
+ target_obj = _util.get_job_metadata(target_selector, self._job_config)
181
209
  target_def = target_obj.model
182
210
 
183
211
  return self.build_calculation_job(
@@ -185,12 +213,12 @@ class GraphBuilder:
185
213
  target_selector, target_def,
186
214
  job_details)
187
215
 
188
- def build_run_flow_job(self, job_def: meta.JobDefinition, job_push_id: NodeId) -> GraphSection:
216
+ def build_run_flow_job(self, job_def: _meta.JobDefinition, job_push_id: NodeId) -> GraphSection:
189
217
 
190
218
  job_details = job_def.runFlow
191
219
 
192
220
  target_selector = job_details.flow
193
- target_obj = _util.get_job_resource(target_selector, self._job_config)
221
+ target_obj = _util.get_job_metadata(target_selector, self._job_config)
194
222
  target_def = target_obj.flow
195
223
 
196
224
  return self.build_calculation_job(
@@ -198,21 +226,21 @@ class GraphBuilder:
198
226
  target_selector, target_def,
199
227
  job_details)
200
228
 
201
- def build_job_group(self, job_def: meta.JobDefinition, job_push_id: NodeId) -> GraphSection:
229
+ def build_job_group(self, job_def: _meta.JobDefinition, job_push_id: NodeId) -> GraphSection:
202
230
 
203
231
  job_group = job_def.jobGroup
204
232
 
205
- if job_group.jobGroupType == meta.JobGroupType.SEQUENTIAL_JOB_GROUP:
233
+ if job_group.jobGroupType == _meta.JobGroupType.SEQUENTIAL_JOB_GROUP:
206
234
  return self.build_sequential_job_group(job_group, job_push_id)
207
235
 
208
- if job_group.jobGroupType == meta.JobGroupType.PARALLEL_JOB_GROUP:
236
+ if job_group.jobGroupType == _meta.JobGroupType.PARALLEL_JOB_GROUP:
209
237
  return self.build_parallel_job_group(job_group, job_push_id)
210
238
 
211
239
  else:
212
240
  self._error(_ex.EJobValidation(f"Job group type [{job_group.jobGroupType.name}] is not supported yet"))
213
241
  return GraphSection(dict(), inputs={job_push_id})
214
242
 
215
- def build_sequential_job_group(self, job_group: meta.JobGroup, job_push_id: NodeId) -> GraphSection:
243
+ def build_sequential_job_group(self, job_group: _meta.JobGroup, job_push_id: NodeId) -> GraphSection:
216
244
 
217
245
  nodes = dict()
218
246
  prior_id = job_push_id
@@ -225,14 +253,14 @@ class GraphBuilder:
225
253
  prior_id = child_node.id
226
254
 
227
255
  # No real results from job groups yet (they cannot be executed from the platform)
228
- job_result = cfg.JobResult()
229
- result_id = NodeId.of("trac_job_result", self._job_namespace, cfg.JobResult)
256
+ job_result = _cfg.JobResult()
257
+ result_id = NodeId.of("trac_job_result", self._job_namespace, _cfg.JobResult)
230
258
  result_node = StaticValueNode(result_id, job_result, explicit_deps=[prior_id])
231
259
  nodes[result_id] = result_node
232
260
 
233
261
  return GraphSection(nodes, inputs={job_push_id}, outputs={result_id})
234
262
 
235
- def build_parallel_job_group(self, job_group: meta.JobGroup, job_push_id: NodeId) -> GraphSection:
263
+ def build_parallel_job_group(self, job_group: _meta.JobGroup, job_push_id: NodeId) -> GraphSection:
236
264
 
237
265
  nodes = dict()
238
266
  parallel_ids = [job_push_id]
@@ -245,22 +273,22 @@ class GraphBuilder:
245
273
  parallel_ids.append(child_node.id)
246
274
 
247
275
  # No real results from job groups yet (they cannot be executed from the platform)
248
- job_result = cfg.JobResult()
249
- result_id = NodeId.of("trac_job_result", self._job_namespace, cfg.JobResult)
276
+ job_result = _cfg.JobResult()
277
+ result_id = NodeId.of("trac_job_result", self._job_namespace, _cfg.JobResult)
250
278
  result_node = StaticValueNode(result_id, job_result, explicit_deps=parallel_ids)
251
279
  nodes[result_id] = result_node
252
280
 
253
281
  return GraphSection(nodes, inputs={job_push_id}, outputs={result_id})
254
282
 
255
- def build_child_job(self, child_job_def: meta.JobDefinition, explicit_deps) -> Node[config.JobResult]:
283
+ def build_child_job(self, child_job_def: _meta.JobDefinition, explicit_deps) -> Node[_cfg.JobResult]:
256
284
 
257
- child_job_id = _util.new_object_id(meta.ObjectType.JOB)
285
+ child_job_id = self._allocate_id(_meta.ObjectType.JOB)
258
286
 
259
287
  child_builder = self._child_builder(child_job_id)
260
288
  child_graph = child_builder.build_job(child_job_def)
261
289
 
262
290
  child_node_name = _util.object_key(child_job_id)
263
- child_node_id = NodeId.of(child_node_name, self._job_namespace, cfg.JobResult)
291
+ child_node_id = NodeId.of(child_node_name, self._job_namespace, _cfg.JobResult)
264
292
 
265
293
  child_node = ChildJobNode(
266
294
  child_node_id, child_job_id, child_job_def,
@@ -269,9 +297,9 @@ class GraphBuilder:
269
297
  return child_node
270
298
 
271
299
  def build_calculation_job(
272
- self, job_def: meta.JobDefinition, job_push_id: NodeId,
273
- target_selector: meta.TagSelector,
274
- target_def: tp.Union[meta.ModelDefinition, meta.FlowDefinition],
300
+ self, job_def: _meta.JobDefinition, job_push_id: NodeId,
301
+ target_selector: _meta.TagSelector,
302
+ target_def: _tp.Union[_meta.ModelDefinition, _meta.FlowDefinition],
275
303
  job_details: __JOB_DETAILS) \
276
304
  -> GraphSection:
277
305
 
@@ -282,11 +310,11 @@ class GraphBuilder:
282
310
 
283
311
  required_params = target_def.parameters
284
312
  required_inputs = target_def.inputs
285
- required_outputs = target_def.outputs
313
+ expected_outputs = target_def.outputs
286
314
 
287
315
  provided_params = job_details.parameters
288
316
  provided_inputs = job_details.inputs
289
- provided_outputs = job_details.outputs
317
+ prior_outputs = job_details.priorOutputs
290
318
 
291
319
  params_section = self.build_job_parameters(
292
320
  required_params, provided_params,
@@ -296,36 +324,48 @@ class GraphBuilder:
296
324
  required_inputs, provided_inputs,
297
325
  explicit_deps=[job_push_id])
298
326
 
327
+ prior_outputs_section = self.build_job_prior_outputs(
328
+ expected_outputs, prior_outputs,
329
+ explicit_deps=[job_push_id])
330
+
299
331
  exec_namespace = self._job_namespace
300
- exec_obj = _util.get_job_resource(target_selector, self._job_config)
332
+ exec_obj = _util.get_job_metadata(target_selector, self._job_config)
301
333
 
302
334
  exec_section = self.build_model_or_flow(
303
335
  exec_namespace, job_def, exec_obj,
304
336
  explicit_deps=[job_push_id])
305
337
 
306
338
  output_section = self.build_job_outputs(
307
- required_outputs, provided_outputs,
339
+ expected_outputs, prior_outputs,
308
340
  explicit_deps=[job_push_id])
309
341
 
310
- main_section = self._join_sections(params_section, input_section, exec_section, output_section)
342
+ main_section = self._join_sections(
343
+ params_section, input_section, prior_outputs_section,
344
+ exec_section, output_section)
311
345
 
312
346
  # Build job-level metadata outputs
313
347
 
314
- data_result_ids = list(
348
+ output_ids = list(
315
349
  nid for nid, n in main_section.nodes.items()
316
- if isinstance(n, DataResultNode))
350
+ if nid.result_type == GraphOutput or isinstance(n, SaveDataNode))
351
+
352
+ # Map the SAVE nodes to their corresponding named output keys
353
+ output_keys = dict(
354
+ (nid, nid.name.replace(":SAVE", ""))
355
+ for nid, n in output_section.nodes.items()
356
+ if isinstance(n, SaveDataNode))
317
357
 
318
- result_section = self.build_job_results(
319
- bundles=data_result_ids,
358
+ result_section = self.build_job_result(
359
+ output_ids, output_keys,
320
360
  explicit_deps=[job_push_id, *main_section.must_run])
321
361
 
322
362
  return self._join_sections(main_section, result_section)
323
363
 
324
364
  def build_job_parameters(
325
365
  self,
326
- required_params: tp.Dict[str, meta.ModelParameter],
327
- supplied_params: tp.Dict[str, meta.Value],
328
- explicit_deps: tp.Optional[tp.List[NodeId]] = None) \
366
+ required_params: _tp.Dict[str, _meta.ModelParameter],
367
+ supplied_params: _tp.Dict[str, _meta.Value],
368
+ explicit_deps: _tp.Optional[_tp.List[NodeId]] = None) \
329
369
  -> GraphSection:
330
370
 
331
371
  nodes = dict()
@@ -341,7 +381,7 @@ class GraphBuilder:
341
381
  self._error(_ex.EJobValidation(f"Missing required parameter: [{param_name}]"))
342
382
  continue
343
383
 
344
- param_id = NodeId(param_name, self._job_namespace, meta.Value)
384
+ param_id = NodeId(param_name, self._job_namespace, _meta.Value)
345
385
  param_node = StaticValueNode(param_id, param_def, explicit_deps=explicit_deps)
346
386
 
347
387
  nodes[param_id] = param_node
@@ -350,402 +390,241 @@ class GraphBuilder:
350
390
 
351
391
  def build_job_inputs(
352
392
  self,
353
- required_inputs: tp.Dict[str, meta.ModelInputSchema],
354
- supplied_inputs: tp.Dict[str, meta.TagSelector],
355
- explicit_deps: tp.Optional[tp.List[NodeId]] = None) \
393
+ required_inputs: _tp.Dict[str, _meta.ModelInputSchema],
394
+ supplied_inputs: _tp.Dict[str, _meta.TagSelector],
395
+ explicit_deps: _tp.Optional[_tp.List[NodeId]] = None) \
356
396
  -> GraphSection:
357
397
 
358
398
  nodes = dict()
359
399
  outputs = set()
360
400
 
361
- for input_name, input_def in required_inputs.items():
362
-
363
- # Backwards compatibility with pre 0.8 versions
364
- input_type = meta.ObjectType.DATA \
365
- if input_def.objectType == meta.ObjectType.OBJECT_TYPE_NOT_SET \
366
- else input_def.objectType
401
+ for input_name, input_schema in required_inputs.items():
367
402
 
368
403
  input_selector = supplied_inputs.get(input_name)
369
404
 
370
405
  if input_selector is None:
371
406
 
372
- if input_def.optional:
407
+ if input_schema.optional:
373
408
  data_view_id = NodeId.of(input_name, self._job_namespace, _data.DataView)
374
- data_view = _data.DataView.create_empty(input_type)
409
+ data_view = _data.DataView.create_empty(input_schema.objectType)
375
410
  nodes[data_view_id] = StaticValueNode(data_view_id, data_view, explicit_deps=explicit_deps)
376
411
  outputs.add(data_view_id)
377
412
  else:
378
413
  self._error(_ex.EJobValidation(f"Missing required input: [{input_name}]"))
379
414
 
380
- elif input_type == meta.ObjectType.DATA:
381
- self._build_data_input(input_name, input_selector, nodes, outputs, explicit_deps)
415
+ continue
382
416
 
383
- elif input_type == meta.ObjectType.FILE:
417
+ if input_schema.objectType == _meta.ObjectType.DATA:
418
+ self._build_data_input(input_name, input_selector, nodes, outputs, explicit_deps)
419
+ elif input_schema.objectType == _meta.ObjectType.FILE:
384
420
  self._build_file_input(input_name, input_selector, nodes, outputs, explicit_deps)
385
-
386
421
  else:
387
- self._error(_ex.EJobValidation(f"Invalid input type [{input_type.name}] for input [{input_name}]"))
422
+ self._error(_ex.EJobValidation(f"Invalid input type [{input_schema.objectType}] for input [{input_name}]"))
388
423
 
389
424
  return GraphSection(nodes, outputs=outputs)
390
425
 
391
- def _build_data_input(self, input_name, input_selector, nodes, outputs, explicit_deps):
392
-
393
- # Build a data spec using metadata from the job config
394
- # For now we are always loading the root part, snap 0, delta 0
395
- data_def = _util.get_job_resource(input_selector, self._job_config).data
396
- storage_def = _util.get_job_resource(data_def.storageId, self._job_config).storage
397
-
398
- if data_def.schemaId:
399
- schema_def = _util.get_job_resource(data_def.schemaId, self._job_config).schema
400
- else:
401
- schema_def = data_def.schema
426
+ def build_job_prior_outputs(
427
+ self,
428
+ expected_outputs: _tp.Dict[str, _meta.ModelOutputSchema],
429
+ prior_outputs: _tp.Dict[str, _meta.TagSelector],
430
+ explicit_deps: _tp.Optional[_tp.List[NodeId]] = None) \
431
+ -> GraphSection:
402
432
 
403
- root_part_opaque_key = 'part-root' # TODO: Central part names / constants
404
- data_item = data_def.parts[root_part_opaque_key].snap.deltas[0].dataItem
405
- data_spec = _data.DataSpec.create_data_spec(data_item, data_def, storage_def, schema_def)
433
+ nodes = dict()
434
+ outputs = set()
406
435
 
407
- # Physical load of data items from disk
408
- # Currently one item per input, since inputs are single part/delta
409
- data_load_id = NodeId.of(f"{input_name}:LOAD", self._job_namespace, _data.DataItem)
410
- nodes[data_load_id] = LoadDataNode(data_load_id, spec=data_spec, explicit_deps=explicit_deps)
436
+ for output_name, output_schema in expected_outputs.items():
411
437
 
412
- # Input views assembled by mapping one root part to each view
413
- data_view_id = NodeId.of(input_name, self._job_namespace, _data.DataView)
414
- nodes[data_view_id] = DataViewNode(data_view_id, schema_def, data_load_id)
415
- outputs.add(data_view_id)
438
+ prior_selector = prior_outputs.get(output_name)
416
439
 
417
- def _build_file_input(self, input_name, input_selector, nodes, outputs, explicit_deps):
440
+ # Prior outputs are always optional
441
+ if prior_selector is None:
442
+ continue
418
443
 
419
- file_def = _util.get_job_resource(input_selector, self._job_config).file
420
- storage_def = _util.get_job_resource(file_def.storageId, self._job_config).storage
444
+ if output_schema.objectType == _meta.ObjectType.DATA:
445
+ prior_spec = self._build_data_spec(prior_selector)
446
+ elif output_schema.objectType == _meta.ObjectType.FILE:
447
+ prior_spec = self._build_file_spec(prior_selector)
448
+ else:
449
+ self._error(_ex.EJobValidation(f"Invalid output type [{output_schema.objectType}] for output [{output_name}]"))
450
+ continue
421
451
 
422
- file_spec = _data.DataSpec.create_file_spec(file_def.dataItem, file_def, storage_def)
423
- file_load_id = NodeId.of(f"{input_name}:LOAD", self._job_namespace, _data.DataItem)
424
- nodes[file_load_id] = LoadDataNode(file_load_id, spec=file_spec, explicit_deps=explicit_deps)
452
+ prior_output_id = NodeId.of(f"{output_name}:PRIOR", self._job_namespace, _data.DataSpec)
453
+ nodes[prior_output_id] = StaticValueNode(prior_output_id, prior_spec, explicit_deps=explicit_deps)
454
+ outputs.add(prior_output_id)
425
455
 
426
- # Input views assembled by mapping one root part to each view
427
- file_view_id = NodeId.of(input_name, self._job_namespace, _data.DataView)
428
- nodes[file_view_id] = DataViewNode(file_view_id, None, file_load_id)
429
- outputs.add(file_view_id)
456
+ return GraphSection(nodes, outputs=outputs)
430
457
 
431
458
  def build_job_outputs(
432
459
  self,
433
- required_outputs: tp.Dict[str, meta.ModelOutputSchema],
434
- supplied_outputs: tp.Dict[str, meta.TagSelector],
435
- explicit_deps: tp.Optional[tp.List[NodeId]] = None) \
460
+ required_outputs: _tp.Dict[str, _meta.ModelOutputSchema],
461
+ prior_outputs: _tp.Dict[str, _meta.TagSelector],
462
+ explicit_deps: _tp.Optional[_tp.List[NodeId]] = None) \
436
463
  -> GraphSection:
437
464
 
438
465
  nodes = {}
439
- inputs = set()
466
+ section_inputs = set()
440
467
 
441
- for output_name, output_def in required_outputs.items():
468
+ for output_name, output_schema in required_outputs.items():
442
469
 
443
470
  # Output data view must already exist in the namespace, it is an input to the save operation
444
471
  data_view_id = NodeId.of(output_name, self._job_namespace, _data.DataView)
445
- inputs.add(data_view_id)
472
+ section_inputs.add(data_view_id)
446
473
 
447
- # Backwards compatibility with pre 0.8 versions
448
- output_type = meta.ObjectType.DATA \
449
- if output_def.objectType == meta.ObjectType.OBJECT_TYPE_NOT_SET \
450
- else output_def.objectType
474
+ # Check for prior outputs
475
+ prior_selector = prior_outputs.get(output_name)
451
476
 
452
- output_selector = supplied_outputs.get(output_name)
477
+ if output_schema.objectType == _meta.ObjectType.DATA:
478
+ self._build_data_output(output_name, output_schema, data_view_id, prior_selector, nodes, explicit_deps)
479
+ elif output_schema.objectType == _meta.ObjectType.FILE:
480
+ self._build_file_output(output_name, output_schema, data_view_id, prior_selector, nodes, explicit_deps)
481
+ else:
482
+ self._error(_ex.EJobValidation(f"Invalid output type [{output_schema.objectType}] for input [{output_name}]"))
453
483
 
454
- if output_selector is None:
455
- if output_def.optional:
456
- optional_info = "(configuration is required for all optional outputs, in case they are produced)"
457
- self._error(_ex.EJobValidation(f"Missing optional output: [{output_name}] {optional_info}"))
458
- continue
459
- else:
460
- self._error(_ex.EJobValidation(f"Missing required output: [{output_name}]"))
461
- continue
484
+ return GraphSection(nodes, inputs=section_inputs)
462
485
 
463
- elif output_type == meta.ObjectType.DATA:
464
- self._build_data_output(output_name, output_selector, data_view_id, nodes, explicit_deps)
486
+ def _build_data_input(self, input_name, input_selector, nodes, outputs, explicit_deps):
465
487
 
466
- elif output_type == meta.ObjectType.FILE:
467
- self._build_file_output(output_name, output_def, output_selector, data_view_id, nodes, explicit_deps)
488
+ data_spec = self._build_data_spec(input_selector)
489
+ data_spec = self._attach_metadata(data_spec, input_selector)
468
490
 
469
- else:
470
- self._error(_ex.EJobValidation(f"Invalid output type [{output_type.name}] for input [{output_name}]"))
491
+ # Physical load of data items from disk
492
+ # Currently one item per input, since inputs are single part/delta
493
+ data_load_id = NodeId.of(f"{input_name}:LOAD", self._job_namespace, _data.DataItem)
494
+ nodes[data_load_id] = LoadDataNode(data_load_id, spec=data_spec, explicit_deps=explicit_deps)
471
495
 
472
- return GraphSection(nodes, inputs=inputs)
496
+ # Input views assembled by mapping one root part to each view
497
+ data_view_id = NodeId.of(input_name, self._job_namespace, _data.DataView)
498
+ nodes[data_view_id] = DataViewNode(data_view_id, data_spec.schema, data_load_id)
499
+ outputs.add(data_view_id)
473
500
 
474
- def _build_data_output(self, output_name, output_selector, data_view_id, nodes, explicit_deps):
501
+ def _build_data_output(self, output_name, output_schema, data_view_id, prior_selector, nodes, explicit_deps):
475
502
 
476
503
  # Map one data item from each view, since outputs are single part/delta
477
504
  data_item_id = NodeId(f"{output_name}:ITEM", self._job_namespace, _data.DataItem)
478
505
  nodes[data_item_id] = DataItemNode(data_item_id, data_view_id)
479
506
 
480
- data_obj = _util.get_job_resource(output_selector, self._job_config, optional=True)
481
-
482
- if data_obj is not None:
483
-
484
- # If data def for the output has been built in advance, use a static data spec
485
-
486
- data_def = data_obj.data
487
- storage_def = _util.get_job_resource(data_def.storageId, self._job_config).storage
488
-
489
- if data_def.schemaId:
490
- schema_def = _util.get_job_resource(data_def.schemaId, self._job_config).schema
491
- else:
492
- schema_def = data_def.schema
493
-
494
- root_part_opaque_key = 'part-root' # TODO: Central part names / constants
495
- data_item = data_def.parts[root_part_opaque_key].snap.deltas[0].dataItem
496
- data_spec = _data.DataSpec.create_data_spec(data_item, data_def, storage_def, schema_def)
497
-
498
- # Create a physical save operation for the data item
499
- data_save_id = NodeId.of(f"{output_name}:SAVE", self._job_namespace, _data.DataSpec)
500
- nodes[data_save_id] = SaveDataNode(data_save_id, data_item_id, spec=data_spec)
501
-
502
- output_key = output_name
503
- storage_key = output_name + ":STORAGE"
504
-
507
+ if prior_selector is None:
508
+ # New output - Allocate new TRAC object IDs
509
+ prior_spec = None
510
+ data_id = self._allocate_id(_meta.ObjectType.DATA)
511
+ storage_id = self._allocate_id(_meta.ObjectType.STORAGE)
505
512
  else:
513
+ # New version - Get the prior version metadata and bump the object IDs
514
+ prior_spec = self._build_data_spec(prior_selector)
515
+ data_id = _util.new_object_version(prior_spec.primary_id)
516
+ storage_id = _util.new_object_version(prior_spec.storage_id)
506
517
 
507
- # If output data def for an output was not supplied in the job, create a dynamic data spec
508
- # Dynamic data def will always use an embedded schema (this is no ID for an external schema)
518
+ # Graph node ID for the save operation
519
+ data_save_id = NodeId.of(f"{output_name}:SAVE", self._job_namespace, _data.DataSpec)
509
520
 
510
- mapped_output_key = output_name
511
- mapped_storage_key = output_name + ":STORAGE"
521
+ if output_schema.dynamic:
512
522
 
513
- data_id = self._job_config.resultMapping[mapped_output_key]
514
- storage_id = self._job_config.resultMapping[mapped_storage_key]
515
-
516
- data_spec_id = NodeId.of(f"{output_name}:SPEC", self._job_namespace, _data.DataSpec)
517
- nodes[data_spec_id] = DynamicDataSpecNode(
523
+ # For dynamic outputs, an extra graph node is needed to assemble the schema information
524
+ # This will call build_data_spec() at runtime, once the schema is known
525
+ data_spec_id = NodeId.of(f"{output_name}:DYNAMIC_SCHEMA", self._job_namespace, _data.DataSpec)
526
+ nodes[data_spec_id] = DataSpecNode(
518
527
  data_spec_id, data_view_id,
519
- data_id, storage_id,
520
- prior_data_spec=None,
528
+ data_id, storage_id, output_name,
529
+ self._sys_config,
530
+ prior_data_spec=prior_spec,
521
531
  explicit_deps=explicit_deps)
522
532
 
523
- # Create a physical save operation for the data item
524
- data_save_id = NodeId.of(f"{output_name}:SAVE", self._job_namespace, _data.DataSpec)
533
+ # Save operation uses the dynamically produced schema info
525
534
  nodes[data_save_id] = SaveDataNode(data_save_id, data_item_id, spec_id=data_spec_id)
526
535
 
527
- output_key = _util.object_key(data_id)
528
- storage_key = _util.object_key(storage_id)
529
-
530
- data_result_id = NodeId.of(f"{output_name}:RESULT", self._job_namespace, ObjectBundle)
531
- nodes[data_result_id] = DataResultNode(
532
- data_result_id, output_name, data_save_id,
533
- data_key=output_key,
534
- storage_key=storage_key)
535
-
536
- def _build_file_output(self, output_name, output_def, output_selector, file_view_id, nodes, explicit_deps):
536
+ else:
537
537
 
538
- mapped_output_key = output_name
539
- mapped_storage_key = output_name + ":STORAGE"
538
+ # If the output is not dynamic, a data spec can be built ahead of time
539
+ data_spec = _data.build_data_spec(
540
+ data_id, storage_id, output_name,
541
+ output_schema.schema,
542
+ self._sys_config,
543
+ prior_spec=prior_spec)
540
544
 
541
- file_obj = _util.get_job_resource(output_selector, self._job_config, optional=True)
545
+ # Save operation uses the statically produced schema info
546
+ nodes[data_save_id] = SaveDataNode(data_save_id, data_item_id, spec=data_spec)
542
547
 
543
- if file_obj is not None:
548
+ def _build_data_spec(self, data_selector):
544
549
 
545
- # Definitions already exist (generated by dev mode translator)
550
+ # Build a data spec using metadata from the job config
551
+ # For now we are always loading the root part, snap 0, delta 0
552
+ data_def = _util.get_job_metadata(data_selector, self._job_config).data
553
+ storage_def = _util.get_job_metadata(data_def.storageId, self._job_config).storage
546
554
 
547
- file_def = _util.get_job_resource(output_selector, self._job_config).file
548
- storage_def = _util.get_job_resource(file_def.storageId, self._job_config).storage
555
+ if data_def.schemaId:
556
+ schema_def = _util.get_job_metadata(data_def.schemaId, self._job_config).schema
557
+ else:
558
+ schema_def = data_def.schema
549
559
 
550
- resolved_output_key = mapped_output_key
551
- resolved_storage_key = mapped_storage_key
560
+ root_part_opaque_key = 'part-root' # TODO: Central part names / constants
561
+ data_item = data_def.parts[root_part_opaque_key].snap.deltas[0].dataItem
552
562
 
553
- else:
563
+ data_id = _util.get_job_mapping(data_selector, self._job_config)
564
+ storage_id = _util.get_job_mapping(data_def.storageId, self._job_config)
554
565
 
555
- # Create new definitions (default behavior for jobs sent from the platform)
566
+ return _data.DataSpec \
567
+ .create_data_spec(data_item, data_def, storage_def, schema_def) \
568
+ .with_ids(data_id, storage_id)
556
569
 
557
- output_id = self._job_config.resultMapping[mapped_output_key]
558
- storage_id = self._job_config.resultMapping[mapped_storage_key]
570
+ def _build_file_input(self, input_name, input_selector, nodes, outputs, explicit_deps):
559
571
 
560
- file_type = output_def.fileType
561
- timestamp = _dt.datetime.fromisoformat(output_id.objectTimestamp.isoDatetime)
562
- data_item = f"file/{output_id.objectId}/version-{output_id.objectVersion}"
563
- storage_key = self._sys_config.storage.defaultBucket
564
- storage_path = f"file/FILE-{output_id.objectId}/version-{output_id.objectVersion}/{output_name}.{file_type.extension}"
572
+ file_spec = self._build_file_spec(input_selector)
573
+ file_spec = self._attach_metadata(file_spec, input_selector)
565
574
 
566
- file_def = self.build_file_def(output_name, file_type, storage_id, data_item)
567
- storage_def = self.build_storage_def(data_item, storage_key, storage_path, file_type.mimeType, timestamp)
575
+ file_load_id = NodeId.of(f"{input_name}:LOAD", self._job_namespace, _data.DataItem)
576
+ nodes[file_load_id] = LoadDataNode(file_load_id, spec=file_spec, explicit_deps=explicit_deps)
568
577
 
569
- resolved_output_key = _util.object_key(output_id)
570
- resolved_storage_key = _util.object_key(storage_id)
578
+ # Input views assembled by mapping one root part to each view
579
+ file_view_id = NodeId.of(input_name, self._job_namespace, _data.DataView)
580
+ nodes[file_view_id] = DataViewNode(file_view_id, None, file_load_id)
581
+ outputs.add(file_view_id)
571
582
 
572
- # Required object defs are available, now build the graph nodes
583
+ def _build_file_output(self, output_name, output_schema, file_view_id, prior_selector, nodes, explicit_deps):
573
584
 
585
+ # Map file item from view
574
586
  file_item_id = NodeId(f"{output_name}:ITEM", self._job_namespace, _data.DataItem)
575
587
  nodes[file_item_id] = DataItemNode(file_item_id, file_view_id, explicit_deps=explicit_deps)
576
588
 
577
- file_spec = _data.DataSpec.create_file_spec(file_def.dataItem, file_def, storage_def)
589
+ if prior_selector is None:
590
+ # New output - Allocate new TRAC object IDs
591
+ prior_spec = None
592
+ file_id = self._allocate_id(_meta.ObjectType.FILE)
593
+ storage_id = self._allocate_id(_meta.ObjectType.STORAGE)
594
+ else:
595
+ # New version - Get the prior version metadata and bump the object IDs
596
+ prior_spec = self._build_file_spec(prior_selector) if prior_selector else None
597
+ file_id = _util.new_object_version(prior_spec.primary_id)
598
+ storage_id = _util.new_object_version(prior_spec.storage_id)
599
+
600
+ # File spec can always be built ahead of time (no equivalent of dynamic schemas)
601
+ file_spec = _data.build_file_spec(
602
+ file_id, storage_id,
603
+ output_name, output_schema.fileType,
604
+ self._sys_config,
605
+ prior_spec=prior_spec)
606
+
607
+ # Graph node for the save operation
578
608
  file_save_id = NodeId.of(f"{output_name}:SAVE", self._job_namespace, _data.DataSpec)
579
609
  nodes[file_save_id] = SaveDataNode(file_save_id, file_item_id, spec=file_spec)
580
610
 
581
- data_result_id = NodeId.of(f"{output_name}:RESULT", self._job_namespace, ObjectBundle)
582
- nodes[data_result_id] = DataResultNode(
583
- data_result_id, output_name, file_save_id,
584
- file_key=resolved_output_key,
585
- storage_key=resolved_storage_key)
586
-
587
- @classmethod
588
- def build_output_file_and_storage(cls, output_key, file_type: meta.FileType, sys_config: cfg.RuntimeConfig, job_config: cfg.JobConfig):
589
-
590
- # TODO: Review and de-dupe building of output metadata
591
- # Responsibility for assigning outputs could perhaps move from orchestrator to runtime
592
-
593
- output_storage_key = f"{output_key}:STORAGE"
594
-
595
- output_id = job_config.resultMapping[output_key]
596
- output_storage_id = job_config.resultMapping[output_storage_key]
597
-
598
- timestamp = _dt.datetime.fromisoformat(output_id.objectTimestamp.isoDatetime)
599
- data_item = f"file/{output_id.objectId}/version-{output_id.objectVersion}"
600
- storage_key = sys_config.storage.defaultBucket
601
- storage_path = f"file/FILE-{output_id.objectId}/version-{output_id.objectVersion}/{output_key}.{file_type.extension}"
602
-
603
- file_def = cls.build_file_def(output_key, file_type, output_storage_id, data_item)
604
- storage_def = cls.build_storage_def(data_item, storage_key, storage_path, file_type.mimeType, timestamp)
605
-
606
- return file_def, storage_def
607
-
608
- @classmethod
609
- def build_runtime_outputs(cls, output_names: tp.List[str], job_namespace: NodeNamespace):
610
-
611
- # This method is called dynamically during job execution
612
- # So it cannot use stateful information like self._job_config or self._job_namespace
613
-
614
- # TODO: Factor out common logic with regular job outputs (including static / dynamic)
615
-
616
- nodes = {}
617
- inputs = set()
618
- outputs = list()
619
-
620
- for output_name in output_names:
621
-
622
- # Output data view must already exist in the namespace
623
- data_view_id = NodeId.of(output_name, job_namespace, _data.DataView)
624
- data_spec_id = NodeId.of(f"{output_name}:SPEC", job_namespace, _data.DataSpec)
625
-
626
- mapped_output_key = output_name
627
- mapped_storage_key = output_name + ":STORAGE"
628
-
629
- data_id = _util.new_object_id(meta.ObjectType.DATA)
630
- storage_id = _util.new_object_id(meta.ObjectType.STORAGE)
631
-
632
- data_spec_node = DynamicDataSpecNode(
633
- data_spec_id, data_view_id,
634
- data_id, storage_id,
635
- prior_data_spec=None)
636
-
637
- output_key = _util.object_key(data_id)
638
- storage_key = _util.object_key(storage_id)
639
-
640
- # Map one data item from each view, since outputs are single part/delta
641
- data_item_id = NodeId(f"{output_name}:ITEM", job_namespace, _data.DataItem)
642
- data_item_node = DataItemNode(data_item_id, data_view_id)
643
-
644
- # Create a physical save operation for the data item
645
- data_save_id = NodeId.of(f"{output_name}:SAVE", job_namespace, _data.DataSpec)
646
- data_save_node = SaveDataNode(data_save_id, data_item_id, spec_id=data_spec_id)
647
-
648
- data_result_id = NodeId.of(f"{output_name}:RESULT", job_namespace, ObjectBundle)
649
- data_result_node = DataResultNode(
650
- data_result_id, output_name, data_save_id,
651
- output_key, storage_key)
652
-
653
- nodes[data_spec_id] = data_spec_node
654
- nodes[data_item_id] = data_item_node
655
- nodes[data_save_id] = data_save_node
656
- nodes[data_result_id] = data_result_node
657
-
658
- # Job-level data view is an input to the save operation
659
- inputs.add(data_view_id)
660
- outputs.append(data_result_id)
661
-
662
- runtime_outputs = JobOutputs(bundles=outputs)
663
- runtime_outputs_id = NodeId.of("trac_runtime_outputs", job_namespace, JobOutputs)
664
- runtime_outputs_node = RuntimeOutputsNode(runtime_outputs_id, runtime_outputs)
665
-
666
- nodes[runtime_outputs_id] = runtime_outputs_node
667
-
668
- return GraphSection(nodes, inputs=inputs, outputs={runtime_outputs_id})
669
-
670
- @classmethod
671
- def build_file_def(cls, file_name, file_type, storage_id, data_item):
672
-
673
- file_def = meta.FileDefinition()
674
- file_def.name = f"{file_name}.{file_type.extension}"
675
- file_def.extension = file_type.extension
676
- file_def.mimeType = file_type.mimeType
677
- file_def.storageId = _util.selector_for_latest(storage_id)
678
- file_def.dataItem = data_item
679
- file_def.size = 0
680
-
681
- return file_def
682
-
683
- @classmethod
684
- def build_storage_def(
685
- cls, data_item: str,
686
- storage_key, storage_path, storage_format,
687
- timestamp: _dt.datetime):
688
-
689
- first_incarnation = 0
690
-
691
- storage_copy = meta.StorageCopy(
692
- storage_key, storage_path, storage_format,
693
- copyStatus=meta.CopyStatus.COPY_AVAILABLE,
694
- copyTimestamp=meta.DatetimeValue(timestamp.isoformat()))
695
-
696
- storage_incarnation = meta.StorageIncarnation(
697
- [storage_copy],
698
- incarnationIndex=first_incarnation,
699
- incarnationTimestamp=meta.DatetimeValue(timestamp.isoformat()),
700
- incarnationStatus=meta.IncarnationStatus.INCARNATION_AVAILABLE)
701
-
702
- storage_item = meta.StorageItem([storage_incarnation])
703
-
704
- storage_def = meta.StorageDefinition()
705
- storage_def.dataItems[data_item] = storage_item
706
-
707
- return storage_def
611
+ def _build_file_spec(self, file_selector):
708
612
 
709
- def build_job_results(
710
- self,
711
- objects: tp.Dict[str, NodeId[meta.ObjectDefinition]] = None,
712
- bundles: tp.List[NodeId[ObjectBundle]] = None,
713
- explicit_deps: tp.Optional[tp.List[NodeId]] = None) \
714
- -> GraphSection:
715
-
716
- result_id = self._job_config.resultMapping.get("trac_job_result")
717
- result_node_id = NodeId.of("trac_job_result", self._job_namespace, cfg.JobResult)
718
-
719
- if objects is not None:
720
-
721
- results_inputs = set(objects.values())
722
-
723
- build_result_node = BuildJobResultNode(
724
- result_node_id, result_id, self._job_config.jobId,
725
- outputs=JobOutputs(objects=objects),
726
- explicit_deps=explicit_deps)
727
-
728
- elif bundles is not None:
729
-
730
- results_inputs = set(bundles)
731
-
732
- build_result_node = BuildJobResultNode(
733
- result_node_id, result_id, self._job_config.jobId,
734
- outputs=JobOutputs(bundles=bundles),
735
- explicit_deps=explicit_deps)
736
-
737
- else:
738
- raise _ex.EUnexpected()
613
+ file_def = _util.get_job_metadata(file_selector, self._job_config).file
614
+ storage_def = _util.get_job_metadata(file_def.storageId, self._job_config).storage
739
615
 
740
- result_nodes = {result_node_id: build_result_node}
616
+ file_id = _util.get_job_mapping(file_selector, self._job_config)
617
+ storage_id = _util.get_job_mapping(file_def.storageId, self._job_config)
741
618
 
742
- return GraphSection(result_nodes, inputs=results_inputs, must_run=[result_node_id])
619
+ return _data.DataSpec \
620
+ .create_file_spec(file_def.dataItem, file_def, storage_def) \
621
+ .with_ids(file_id, storage_id)
743
622
 
744
623
  def build_model_or_flow_with_context(
745
624
  self, namespace: NodeNamespace, model_or_flow_name: str,
746
- job_def: meta.JobDefinition, model_or_flow: meta.ObjectDefinition,
747
- input_mapping: tp.Dict[str, NodeId], output_mapping: tp.Dict[str, NodeId],
748
- explicit_deps: tp.Optional[tp.List[NodeId]] = None) \
625
+ job_def: _meta.JobDefinition, model_or_flow: _meta.ObjectDefinition,
626
+ input_mapping: _tp.Dict[str, NodeId], output_mapping: _tp.Dict[str, NodeId],
627
+ explicit_deps: _tp.Optional[_tp.List[NodeId]] = None) \
749
628
  -> GraphSection:
750
629
 
751
630
  # Generate a name for a new unique sub-context
@@ -772,32 +651,35 @@ class GraphBuilder:
772
651
 
773
652
  def build_model_or_flow(
774
653
  self, namespace: NodeNamespace,
775
- job_def: meta.JobDefinition,
776
- model_or_flow: meta.ObjectDefinition,
777
- explicit_deps: tp.Optional[tp.List[NodeId]] = None) \
654
+ job_def: _meta.JobDefinition,
655
+ model_or_flow: _meta.ObjectDefinition,
656
+ explicit_deps: _tp.Optional[_tp.List[NodeId]] = None) \
778
657
  -> GraphSection:
779
658
 
780
- if model_or_flow.objectType == meta.ObjectType.MODEL:
659
+ if model_or_flow.objectType == _meta.ObjectType.MODEL:
781
660
  return self.build_model(namespace, job_def, model_or_flow.model, explicit_deps)
782
661
 
783
- elif model_or_flow.objectType == meta.ObjectType.FLOW:
662
+ elif model_or_flow.objectType == _meta.ObjectType.FLOW:
784
663
  return self.build_flow(namespace, job_def, model_or_flow.flow)
785
664
 
786
665
  else:
787
666
  message = f"Invalid job config, expected model or flow, got [{model_or_flow.objectType}]"
788
667
  self._error(_ex.EJobValidation(message))
789
668
 
669
+ # Allow building to continue for better error reporting
670
+ return GraphSection(dict())
671
+
790
672
  def build_model(
791
673
  self, namespace: NodeNamespace,
792
- job_def: meta.JobDefinition,
793
- model_def: meta.ModelDefinition,
794
- explicit_deps: tp.Optional[tp.List[NodeId]] = None) \
674
+ job_def: _meta.JobDefinition,
675
+ model_def: _meta.ModelDefinition,
676
+ explicit_deps: _tp.Optional[_tp.List[NodeId]] = None) \
795
677
  -> GraphSection:
796
678
 
797
679
  self.check_model_type(job_def, model_def)
798
680
 
799
681
  def param_id(node_name):
800
- return NodeId(node_name, namespace, meta.Value)
682
+ return NodeId(node_name, namespace, _meta.Value)
801
683
 
802
684
  def data_id(node_name):
803
685
  return NodeId(node_name, namespace, _data.DataView)
@@ -808,9 +690,9 @@ class GraphBuilder:
808
690
  output_ids = set(map(data_id, model_def.outputs))
809
691
 
810
692
  # Set up storage access for import / export data jobs
811
- if job_def.jobType == meta.JobType.IMPORT_DATA:
693
+ if job_def.jobType == _meta.JobType.IMPORT_DATA:
812
694
  storage_access = job_def.importData.storageAccess
813
- elif job_def.jobType == meta.JobType.EXPORT_DATA:
695
+ elif job_def.jobType == _meta.JobType.EXPORT_DATA:
814
696
  storage_access = job_def.exportData.storageAccess
815
697
  else:
816
698
  storage_access = None
@@ -827,16 +709,19 @@ class GraphBuilder:
827
709
  model_name = model_def.entryPoint.split(".")[-1] # TODO: Check unique model name
828
710
  model_id = NodeId(model_name, namespace, Bundle[_data.DataView])
829
711
 
712
+ # Used to set up a dynamic builder at runtime if dynamic graph updates are needed
713
+ context = GraphContext(
714
+ self._job_config.jobId,
715
+ self._job_namespace, namespace,
716
+ self._sys_config)
717
+
830
718
  model_node = RunModelNode(
831
- model_id, model_scope, model_def,
719
+ model_id, model_def, model_scope,
832
720
  frozenset(parameter_ids), frozenset(input_ids),
833
721
  explicit_deps=explicit_deps, bundle=model_id.namespace,
834
- storage_access=storage_access)
722
+ storage_access=storage_access, graph_context=context)
835
723
 
836
- model_result_id = NodeId(f"{model_name}:RESULT", namespace)
837
- model_result_node = RunModelResultNode(model_result_id, model_id)
838
-
839
- nodes = {model_id: model_node, model_result_id: model_result_node}
724
+ nodes = {model_id: model_node}
840
725
 
841
726
  # Create nodes for each model output
842
727
  # The model node itself outputs a bundle (dictionary of named outputs)
@@ -849,13 +734,13 @@ class GraphBuilder:
849
734
  nodes[output_id] = BundleItemNode(output_id, model_id, output_id.name)
850
735
 
851
736
  # Assemble a graph to include the model and its outputs
852
- return GraphSection(nodes, inputs={*parameter_ids, *input_ids}, outputs=output_ids, must_run=[model_result_id])
737
+ return GraphSection(nodes, inputs={*parameter_ids, *input_ids}, outputs=output_ids, must_run=[model_id])
853
738
 
854
739
  def build_flow(
855
740
  self, namespace: NodeNamespace,
856
- job_def: meta.JobDefinition,
857
- flow_def: meta.FlowDefinition,
858
- explicit_deps: tp.Optional[tp.List[NodeId]] = None) \
741
+ job_def: _meta.JobDefinition,
742
+ flow_def: _meta.FlowDefinition,
743
+ explicit_deps: _tp.Optional[_tp.List[NodeId]] = None) \
859
744
  -> GraphSection:
860
745
 
861
746
  def socket_key(socket):
@@ -875,7 +760,7 @@ class GraphBuilder:
875
760
  target_edges = {socket_key(edge.target): edge for edge in flow_def.edges}
876
761
 
877
762
  # Initially parameters and inputs are reachable, everything else is not
878
- def is_input(n): return n[1].nodeType in [meta.FlowNodeType.PARAMETER_NODE, meta.FlowNodeType.INPUT_NODE]
763
+ def is_input(n): return n[1].nodeType in [_meta.FlowNodeType.PARAMETER_NODE, _meta.FlowNodeType.INPUT_NODE]
879
764
  reachable_nodes = dict(filter(is_input, flow_def.nodes.items()))
880
765
  remaining_nodes = dict(filter(lambda n: not is_input(n), flow_def.nodes.items()))
881
766
 
@@ -892,7 +777,7 @@ class GraphBuilder:
892
777
 
893
778
  graph_section = self._join_sections(graph_section, sub_section, allow_partial_inputs=True)
894
779
 
895
- if node.nodeType != meta.FlowNodeType.OUTPUT_NODE:
780
+ if node.nodeType != _meta.FlowNodeType.OUTPUT_NODE:
896
781
 
897
782
  source_edges = remaining_edges_by_source.pop(node_name)
898
783
 
@@ -916,10 +801,10 @@ class GraphBuilder:
916
801
 
917
802
  def build_flow_node(
918
803
  self, namespace: NodeNamespace,
919
- job_def: meta.JobDefinition,
920
- target_edges: tp.Dict[meta.FlowSocket, meta.FlowEdge],
921
- node_name: str, node: meta.FlowNode,
922
- explicit_deps: tp.Optional[tp.List[NodeId]] = None) \
804
+ job_def: _meta.JobDefinition,
805
+ target_edges: _tp.Dict[_meta.FlowSocket, _meta.FlowEdge],
806
+ node_name: str, node: _meta.FlowNode,
807
+ explicit_deps: _tp.Optional[_tp.List[NodeId]] = None) \
923
808
  -> GraphSection:
924
809
 
925
810
  def socket_key(socket):
@@ -930,27 +815,27 @@ class GraphBuilder:
930
815
  return NodeId(socket_name, namespace, result_type)
931
816
 
932
817
  def edge_mapping(node_: str, socket_: str = None, result_type=None):
933
- socket = socket_key(meta.FlowSocket(node_, socket_))
818
+ socket = socket_key(_meta.FlowSocket(node_, socket_))
934
819
  edge = target_edges.get(socket)
935
820
  # Report missing edges as a job consistency error (this might happen sometimes in dev mode)
936
821
  if edge is None:
937
822
  self._error(_ex.EJobValidation(f"Inconsistent flow: Socket [{socket}] is not connected"))
938
823
  return socket_id(edge.source.node, edge.source.socket, result_type)
939
824
 
940
- if node.nodeType == meta.FlowNodeType.PARAMETER_NODE:
941
- return GraphSection({}, inputs={NodeId(node_name, namespace, result_type=meta.Value)})
825
+ if node.nodeType == _meta.FlowNodeType.PARAMETER_NODE:
826
+ return GraphSection({}, inputs={NodeId(node_name, namespace, result_type=_meta.Value)})
942
827
 
943
- if node.nodeType == meta.FlowNodeType.INPUT_NODE:
828
+ if node.nodeType == _meta.FlowNodeType.INPUT_NODE:
944
829
  return GraphSection({}, inputs={NodeId(node_name, namespace, result_type=_data.DataView)})
945
830
 
946
- if node.nodeType == meta.FlowNodeType.OUTPUT_NODE:
831
+ if node.nodeType == _meta.FlowNodeType.OUTPUT_NODE:
947
832
  target_id = NodeId(node_name, namespace, result_type=_data.DataView)
948
833
  source_id = edge_mapping(node_name, None, _data.DataView)
949
834
  return GraphSection({target_id: IdentityNode(target_id, source_id)}, outputs={target_id})
950
835
 
951
- if node.nodeType == meta.FlowNodeType.MODEL_NODE:
836
+ if node.nodeType == _meta.FlowNodeType.MODEL_NODE:
952
837
 
953
- param_mapping = {socket: edge_mapping(node_name, socket, meta.Value) for socket in node.parameters}
838
+ param_mapping = {socket: edge_mapping(node_name, socket, _meta.Value) for socket in node.parameters}
954
839
  input_mapping = {socket: edge_mapping(node_name, socket, _data.DataView) for socket in node.inputs}
955
840
  output_mapping = {socket: socket_id(node_name, socket, _data.DataView) for socket in node.outputs}
956
841
 
@@ -958,10 +843,10 @@ class GraphBuilder:
958
843
  pop_mapping = output_mapping
959
844
 
960
845
  model_selector = job_def.runFlow.models.get(node_name)
961
- model_obj = _util.get_job_resource(model_selector, self._job_config)
846
+ model_obj = _util.get_job_metadata(model_selector, self._job_config)
962
847
 
963
848
  # Missing models in the job config is a job consistency error
964
- if model_obj is None or model_obj.objectType != meta.ObjectType.MODEL:
849
+ if model_obj is None or model_obj.objectType != _meta.ObjectType.MODEL:
965
850
  self._error(_ex.EJobValidation(f"No model was provided for flow node [{node_name}]"))
966
851
 
967
852
  # Explicit check for model compatibility - report an error now, do not try build_model()
@@ -976,9 +861,12 @@ class GraphBuilder:
976
861
 
977
862
  self._error(_ex.EJobValidation(f"Flow node [{node_name}] has invalid node type [{node.nodeType}]"))
978
863
 
864
+ # Allow building to continue for better error reporting
865
+ return GraphSection(dict())
866
+
979
867
  def check_model_compatibility(
980
- self, model_selector: meta.TagSelector,
981
- model_def: meta.ModelDefinition, node_name: str, flow_node: meta.FlowNode):
868
+ self, model_selector: _meta.TagSelector,
869
+ model_def: _meta.ModelDefinition, node_name: str, flow_node: _meta.FlowNode):
982
870
 
983
871
  model_params = list(sorted(model_def.parameters.keys()))
984
872
  model_inputs = list(sorted(model_def.inputs.keys()))
@@ -992,14 +880,14 @@ class GraphBuilder:
992
880
  model_key = _util.object_key(model_selector)
993
881
  self._error(_ex.EJobValidation(f"Incompatible model for flow node [{node_name}] (Model: [{model_key}])"))
994
882
 
995
- def check_model_type(self, job_def: meta.JobDefinition, model_def: meta.ModelDefinition):
883
+ def check_model_type(self, job_def: _meta.JobDefinition, model_def: _meta.ModelDefinition):
996
884
 
997
- if job_def.jobType == meta.JobType.IMPORT_DATA:
998
- allowed_model_types = [meta.ModelType.DATA_IMPORT_MODEL]
999
- elif job_def.jobType == meta.JobType.EXPORT_DATA:
1000
- allowed_model_types = [meta.ModelType.DATA_EXPORT_MODEL]
885
+ if job_def.jobType == _meta.JobType.IMPORT_DATA:
886
+ allowed_model_types = [_meta.ModelType.DATA_IMPORT_MODEL]
887
+ elif job_def.jobType == _meta.JobType.EXPORT_DATA:
888
+ allowed_model_types = [_meta.ModelType.DATA_EXPORT_MODEL]
1001
889
  else:
1002
- allowed_model_types = [meta.ModelType.STANDARD_MODEL]
890
+ allowed_model_types = [_meta.ModelType.STANDARD_MODEL]
1003
891
 
1004
892
  if model_def.modelType not in allowed_model_types:
1005
893
  job_type = job_def.jobType.name
@@ -1008,8 +896,8 @@ class GraphBuilder:
1008
896
 
1009
897
  @staticmethod
1010
898
  def build_context_push(
1011
- namespace: NodeNamespace, input_mapping: tp.Dict[str, NodeId],
1012
- explicit_deps: tp.Optional[tp.List[NodeId]] = None) \
899
+ namespace: NodeNamespace, input_mapping: _tp.Dict[str, NodeId],
900
+ explicit_deps: _tp.Optional[_tp.List[NodeId]] = None) \
1013
901
  -> GraphSection:
1014
902
 
1015
903
  """
@@ -1021,7 +909,7 @@ class GraphBuilder:
1021
909
  for input_name, outer_id
1022
910
  in input_mapping.items()}
1023
911
 
1024
- push_id = NodeId("trac_ctx_push", namespace, Bundle[tp.Any])
912
+ push_id = NodeId("trac_ctx_push", namespace, Bundle[_tp.Any])
1025
913
  push_node = ContextPushNode(push_id, namespace, push_mapping, explicit_deps, bundle=push_id.namespace)
1026
914
 
1027
915
  nodes = {push_id: push_node}
@@ -1038,8 +926,8 @@ class GraphBuilder:
1038
926
 
1039
927
  @staticmethod
1040
928
  def build_context_pop(
1041
- namespace: NodeNamespace, output_mapping: tp.Dict[str, NodeId],
1042
- explicit_deps: tp.Optional[tp.List[NodeId]] = None) \
929
+ namespace: NodeNamespace, output_mapping: _tp.Dict[str, NodeId],
930
+ explicit_deps: _tp.Optional[_tp.List[NodeId]] = None) \
1043
931
  -> GraphSection:
1044
932
 
1045
933
  """
@@ -1051,8 +939,14 @@ class GraphBuilder:
1051
939
  for output_name, outer_id
1052
940
  in output_mapping.items()}
1053
941
 
1054
- pop_id = NodeId("trac_ctx_pop", namespace, Bundle[tp.Any])
1055
- pop_node = ContextPopNode(pop_id, namespace, pop_mapping, explicit_deps, bundle=pop_id.namespace.parent)
942
+ push_id = NodeId("trac_ctx_push", namespace, Bundle[_tp.Any])
943
+ explicit_deps = [push_id, *explicit_deps] if explicit_deps else [push_id]
944
+
945
+ pop_id = NodeId("trac_ctx_pop", namespace, Bundle[_tp.Any])
946
+ pop_node = ContextPopNode(
947
+ pop_id, namespace, pop_mapping,
948
+ explicit_deps=explicit_deps,
949
+ bundle=pop_id.namespace.parent)
1056
950
 
1057
951
  nodes = {pop_id: pop_node}
1058
952
 
@@ -1066,6 +960,91 @@ class GraphBuilder:
1066
960
  outputs={*pop_mapping.values()},
1067
961
  must_run=[pop_id])
1068
962
 
963
+ def build_job_result(
964
+ self, output_ids: _tp.List[NodeId[JOB_OUTPUT_TYPE]],
965
+ output_keys: _tp.Optional[_tp.Dict[NodeId, str]] = None,
966
+ explicit_deps: _tp.Optional[_tp.List[NodeId]] = None) \
967
+ -> GraphSection:
968
+
969
+ if output_keys:
970
+ named_outputs = dict((output_keys[oid], oid) for oid in filter(lambda oid: oid in output_keys, output_ids))
971
+ unnamed_outputs = list(filter(lambda oid: oid not in output_keys, output_ids))
972
+ else:
973
+ named_outputs = dict()
974
+ unnamed_outputs = output_ids
975
+
976
+ result_node_id = NodeId.of("trac_job_result", self._job_namespace, _cfg.JobResult)
977
+ result_node = JobResultNode(
978
+ result_node_id,
979
+ self._job_config.jobId,
980
+ self._job_config.resultId,
981
+ named_outputs, unnamed_outputs,
982
+ explicit_deps=explicit_deps)
983
+
984
+ result_nodes = {result_node_id: result_node}
985
+
986
+ return GraphSection(result_nodes, inputs=set(output_ids), must_run=[result_node_id])
987
+
988
+ def build_dynamic_outputs(self, source_id: NodeId, output_names: _tp.List[str]) -> GraphUpdate:
989
+
990
+ nodes = dict()
991
+ dependencies = dict()
992
+
993
+ # All dynamic outputs are DATA with dynamic schemas for now
994
+ dynamic_schema = _meta.ModelOutputSchema(
995
+ objectType=_meta.ObjectType.DATA,
996
+ schema=None, dynamic=True)
997
+
998
+ for output_name in output_names:
999
+
1000
+ # Node to extract dynamic outputs from the source node (a model or flow output bundle)
1001
+ output_id = NodeId.of(output_name, source_id.namespace, _data.DataView)
1002
+ output_node = BundleItemNode(output_id, source_id, output_name)
1003
+ nodes[output_id] = output_node
1004
+
1005
+ # All dynamic outputs are DATA for now
1006
+ self._build_data_output(output_name, dynamic_schema, output_id, prior_selector=None, nodes=nodes,
1007
+ explicit_deps=[source_id])
1008
+
1009
+ named_outputs = dict(
1010
+ (nid.name, nid) for nid, n in nodes.items()
1011
+ if nid.result_type == GraphOutput or isinstance(n, SaveDataNode))
1012
+
1013
+ dynamic_outputs_id = NodeId.of("trac_dynamic_outputs", source_id.namespace, DynamicOutputsNode)
1014
+ dynamic_outputs_node = DynamicOutputsNode(
1015
+ dynamic_outputs_id, named_outputs,
1016
+ explicit_deps=[source_id])
1017
+
1018
+ job_result_id = NodeId.of("trac_job_result", self._job_namespace, _cfg.JobResult)
1019
+
1020
+ nodes[dynamic_outputs_id] = dynamic_outputs_node
1021
+ dependencies[job_result_id] = [Dependency(dynamic_outputs_id, DependencyType.HARD)]
1022
+
1023
+ return GraphUpdate(nodes, dependencies)
1024
+
1025
+ def _allocate_id(self, object_type: _meta.ObjectType):
1026
+
1027
+ preallocated_ids = self._preallocated_ids.get(object_type)
1028
+
1029
+ if preallocated_ids:
1030
+ # Preallocated IDs have objectVersion = 0, use a new version to get objectVersion = 1
1031
+ return _util.new_object_version(preallocated_ids.pop())
1032
+ else:
1033
+ return _util.new_object_id(object_type)
1034
+
1035
+ def _attach_metadata(self, obj: _tp.Any, selector: _meta.TagSelector):
1036
+
1037
+ item_id = _util.get_job_mapping(selector, self._job_config)
1038
+ tag = _util.get_job_metadata_tag(selector, self._job_config, optional=True)
1039
+
1040
+ attributes = dict() if tag is None else dict(
1041
+ (attr_name, _type_system.MetadataCodec.decode_value(attr_value))
1042
+ for attr_name, attr_value in tag.attrs.items())
1043
+
1044
+ metadata = _api.RuntimeMetadata(objectId=item_id, attributes=attributes)
1045
+
1046
+ return _util.attach_runtime_metadata(obj, metadata)
1047
+
1069
1048
  def _join_sections(self, *sections: GraphSection, allow_partial_inputs: bool = False):
1070
1049
 
1071
1050
  n_sections = len(sections)
@@ -1097,7 +1076,7 @@ class GraphBuilder:
1097
1076
 
1098
1077
  return GraphSection(nodes, inputs, last_section.outputs, must_run)
1099
1078
 
1100
- def _invalid_graph_error(self, missing_dependencies: tp.Iterable[NodeId]):
1079
+ def _invalid_graph_error(self, missing_dependencies: _tp.Iterable[NodeId]):
1101
1080
 
1102
1081
  missing_ids = ", ".join(map(self._missing_item_display_name, missing_dependencies))
1103
1082
  message = f"The execution graph has unsatisfied dependencies: [{missing_ids}]"