PyPI - tracdap-runtime - Versions diffs - 0.8.0rc2__py3-none-any.whl → 0.9.0b2__py3-none-any.whl - Mend

tracdap-runtime 0.8.0rc2py3-none-any.whl → 0.9.0b2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (64) hide show

tracdap/rt/_impl/core/config_parser.py +29 -3
tracdap/rt/_impl/core/data.py +627 -40
tracdap/rt/_impl/core/repos.py +17 -8
tracdap/rt/_impl/core/storage.py +25 -13
tracdap/rt/_impl/core/struct.py +254 -60
tracdap/rt/_impl/core/util.py +125 -11
tracdap/rt/_impl/exec/context.py +35 -8
tracdap/rt/_impl/exec/dev_mode.py +169 -127
tracdap/rt/_impl/exec/engine.py +203 -140
tracdap/rt/_impl/exec/functions.py +228 -263
tracdap/rt/_impl/exec/graph.py +141 -126
tracdap/rt/_impl/exec/graph_builder.py +428 -449
tracdap/rt/_impl/grpc/codec.py +8 -13
tracdap/rt/_impl/grpc/server.py +7 -7
tracdap/rt/_impl/grpc/tracdap/api/internal/runtime_pb2.py +25 -18
tracdap/rt/_impl/grpc/tracdap/api/internal/runtime_pb2.pyi +27 -9
tracdap/rt/_impl/grpc/tracdap/metadata/common_pb2.py +1 -1
tracdap/rt/_impl/grpc/tracdap/metadata/config_pb2.py +1 -1
tracdap/rt/_impl/grpc/tracdap/metadata/custom_pb2.py +1 -1
tracdap/rt/_impl/grpc/tracdap/metadata/data_pb2.py +37 -35
tracdap/rt/_impl/grpc/tracdap/metadata/data_pb2.pyi +37 -43
tracdap/rt/_impl/grpc/tracdap/metadata/file_pb2.py +1 -1
tracdap/rt/_impl/grpc/tracdap/metadata/flow_pb2.py +1 -1
tracdap/rt/_impl/grpc/tracdap/metadata/job_pb2.py +67 -63
tracdap/rt/_impl/grpc/tracdap/metadata/job_pb2.pyi +11 -2
tracdap/rt/_impl/grpc/tracdap/metadata/model_pb2.py +1 -1
tracdap/rt/_impl/grpc/tracdap/metadata/object_id_pb2.py +1 -1
tracdap/rt/_impl/grpc/tracdap/metadata/object_pb2.py +1 -1
tracdap/rt/_impl/grpc/tracdap/metadata/resource_pb2.py +1 -1
tracdap/rt/_impl/grpc/tracdap/metadata/search_pb2.py +1 -1
tracdap/rt/_impl/grpc/tracdap/metadata/storage_pb2.py +11 -9
tracdap/rt/_impl/grpc/tracdap/metadata/storage_pb2.pyi +11 -2
tracdap/rt/_impl/grpc/tracdap/metadata/tag_pb2.py +1 -1
tracdap/rt/_impl/grpc/tracdap/metadata/tag_update_pb2.py +1 -1
tracdap/rt/_impl/grpc/tracdap/metadata/type_pb2.py +23 -19
tracdap/rt/_impl/grpc/tracdap/metadata/type_pb2.pyi +15 -2
tracdap/rt/_impl/runtime.py +3 -9
tracdap/rt/_impl/static_api.py +5 -6
tracdap/rt/_plugins/format_csv.py +2 -2
tracdap/rt/_plugins/repo_git.py +56 -11
tracdap/rt/_plugins/storage_aws.py +165 -150
tracdap/rt/_plugins/storage_azure.py +17 -11
tracdap/rt/_plugins/storage_gcp.py +35 -18
tracdap/rt/_version.py +1 -1
tracdap/rt/api/model_api.py +45 -0
tracdap/rt/config/__init__.py +7 -9
tracdap/rt/config/common.py +3 -14
tracdap/rt/config/job.py +17 -3
tracdap/rt/config/platform.py +9 -32
tracdap/rt/config/result.py +8 -4
tracdap/rt/config/runtime.py +5 -10
tracdap/rt/config/tenant.py +28 -0
tracdap/rt/launch/cli.py +0 -8
tracdap/rt/launch/launch.py +1 -3
tracdap/rt/metadata/__init__.py +35 -35
tracdap/rt/metadata/data.py +19 -31
tracdap/rt/metadata/job.py +3 -1
tracdap/rt/metadata/storage.py +9 -0
tracdap/rt/metadata/type.py +9 -5
{tracdap_runtime-0.8.0rc2.dist-info → tracdap_runtime-0.9.0b2.dist-info}/METADATA +5 -3
{tracdap_runtime-0.8.0rc2.dist-info → tracdap_runtime-0.9.0b2.dist-info}/RECORD +64 -63
{tracdap_runtime-0.8.0rc2.dist-info → tracdap_runtime-0.9.0b2.dist-info}/WHEEL +1 -1
{tracdap_runtime-0.8.0rc2.dist-info → tracdap_runtime-0.9.0b2.dist-info}/licenses/LICENSE +0 -0
{tracdap_runtime-0.8.0rc2.dist-info → tracdap_runtime-0.9.0b2.dist-info}/top_level.txt +0 -0

tracdap/rt/_impl/exec/graph_builder.py CHANGED Viewed

@@ -13,29 +13,41 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
-import datetime as _dt
+import itertools as _itr
+import typing as _tp
-import tracdap.rt.config as config
+import tracdap.rt.metadata as _meta
+import tracdap.rt.config as _cfg
 import tracdap.rt.exceptions as _ex
 import tracdap.rt._impl.core.data as _data
+import tracdap.rt._impl.core.type_system as _type_system
 import tracdap.rt._impl.core.util as _util
+import tracdap.rt.api as _api
 from .graph import *
 class GraphBuilder:
-    __JOB_DETAILS = tp.TypeVar(
+    __JOB_DETAILS = _tp.TypeVar(
         "__JOB_DETAILS",
-        meta.RunModelJob,
-        meta.RunFlowJob,
-        meta.ImportModelJob,
-        meta.ImportDataJob,
-        meta.ExportDataJob)
+        _meta.RunModelJob,
+        _meta.RunFlowJob,
+        _meta.ImportModelJob,
+        _meta.ImportDataJob,
+        _meta.ExportDataJob)
-    __JOB_BUILD_FUNC = tp.Callable[[meta.JobDefinition, NodeId], GraphSection]
+    __JOB_BUILD_FUNC = _tp.Callable[[_meta.JobDefinition, NodeId], GraphSection]
-    def __init__(self, sys_config: config.RuntimeConfig, job_config: config.JobConfig):
+    @classmethod
+    def dynamic(cls, context: GraphContext) -> "GraphBuilder":
+        sys_config = context.sys_config
+        job_config = _cfg.JobConfig(context.job_id)
+        return GraphBuilder(sys_config, job_config)
+    def __init__(self, sys_config: _cfg.RuntimeConfig, job_config: _cfg.JobConfig):
         self._sys_config = sys_config
         self._job_config = job_config
@@ -43,80 +55,97 @@ class GraphBuilder:
         self._job_key = _util.object_key(job_config.jobId)
         self._job_namespace = NodeNamespace(self._job_key)
-        self._errors = []
+        # Dictionary of object type to preallocated IDs
+        self._preallocated_ids = dict(
+            (k, list(v)) for k, v in _itr.groupby(
+                sorted(job_config.preallocatedIds, key=lambda oid: oid.objectType.value),
+                lambda oid: oid.objectType))
+        self._errors = list()
+    def unallocated_ids(self) -> _tp.Dict[_meta.ObjectType, _meta.TagHeader]:
+        return self._preallocated_ids
-    def _child_builder(self, job_id: meta.TagHeader) -> "GraphBuilder":
+    def _child_builder(self, job_id: _meta.TagHeader) -> "GraphBuilder":
         builder = GraphBuilder(self._sys_config, self._job_config)
         builder._job_key = _util.object_key(job_id)
         builder._job_namespace = NodeNamespace(builder._job_key)
+        # Do not share preallocated IDs with the child graph
+        builder._preallocated_ids = dict()
         return builder
-    def build_job(self, job_def: meta.JobDefinition,) -> Graph:
+    def build_job(self, job_def: _meta.JobDefinition, ) -> Graph:
         try:
-            if job_def.jobType == meta.JobType.IMPORT_MODEL:
-                return self.build_standard_job(job_def, self.build_import_model_job)
+            if job_def.jobType == _meta.JobType.IMPORT_MODEL:
+                graph = self.build_standard_job(job_def, self.build_import_model_job)
+            elif job_def.jobType == _meta.JobType.RUN_MODEL:
+                graph = self.build_standard_job(job_def, self.build_run_model_job)
-            if job_def.jobType == meta.JobType.RUN_MODEL:
-                return self.build_standard_job(job_def, self.build_run_model_job)
+            elif job_def.jobType == _meta.JobType.RUN_FLOW:
+                graph = self.build_standard_job(job_def, self.build_run_flow_job)
-            if job_def.jobType == meta.JobType.RUN_FLOW:
-                return self.build_standard_job(job_def, self.build_run_flow_job)
+            elif job_def.jobType in [_meta.JobType.IMPORT_DATA, _meta.JobType.EXPORT_DATA]:
+                graph = self.build_standard_job(job_def, self.build_import_export_data_job)
-            if job_def.jobType in [meta.JobType.IMPORT_DATA, meta.JobType.EXPORT_DATA]:
-                return self.build_standard_job(job_def, self.build_import_export_data_job)
+            elif job_def.jobType == _meta.JobType.JOB_GROUP:
+                graph = self.build_standard_job(job_def, self.build_job_group)
-            if job_def.jobType == meta.JobType.JOB_GROUP:
-                return self.build_standard_job(job_def, self.build_job_group)
+            else:
+                self._error(_ex.EJobValidation(f"Job type [{job_def.jobType.name}] is not supported yet"))
+                raise self._error_summary()
-            self._error(_ex.EJobValidation(f"Job type [{job_def.jobType.name}] is not supported yet"))
+            if any(self._errors):
+                raise self._error_summary()
+            else:
+                return graph
         except Exception as e:
             # If there are recorded, errors, assume unhandled exceptions are a result of those
             # Only report the recorded errors, to reduce noise
             if any(self._errors):
-                pass
+                raise self._error_summary()
             # If no errors are recorded, an exception here would be a bug
             raise _ex.ETracInternal(f"Unexpected error preparing the job execution graph") from e
-        finally:
-            if any(self._errors):
+    def _error_summary(self) -> Exception:
-                if len(self._errors) == 1:
-                    raise self._errors[0]
-                else:
-                    err_text = "\n".join(map(str, self._errors))
-                    raise _ex.EJobValidation("Invalid job configuration\n" + err_text)
+        if len(self._errors) == 1:
+            return self._errors[0]
+        else:
+            err_text = "\n".join(map(str, self._errors))
+            return _ex.EJobValidation("Invalid job configuration\n" + err_text)
-    def build_standard_job(self, job_def: meta.JobDefinition, build_func: __JOB_BUILD_FUNC):
+    def build_standard_job(self, job_def: _meta.JobDefinition, build_func: __JOB_BUILD_FUNC):
         # Set up the job context
-        push_id = NodeId("trac_job_push", self._job_namespace, Bundle[tp.Any])
+        push_id = NodeId("trac_job_push", self._job_namespace, Bundle[_tp.Any])
         push_node = ContextPushNode(push_id, self._job_namespace)
         push_section = GraphSection({push_id: push_node}, must_run=[push_id])
         # Build the execution graphs for the main job and results recording
         main_section = build_func(job_def, push_id)
-        main_result_id = NodeId.of("trac_job_result", self._job_namespace, config.JobResult)
+        main_result_id = NodeId.of("trac_job_result", self._job_namespace, _cfg.JobResult)
         # Clean up the job context
-        global_result_id = NodeId.of(self._job_key, NodeNamespace.root(), config.JobResult)
+        global_result_id = NodeId.of(self._job_key, NodeNamespace.root(), _cfg.JobResult)
-        pop_id = NodeId("trac_job_pop", self._job_namespace, Bundle[tp.Any])
+        pop_id = NodeId("trac_job_pop", self._job_namespace, Bundle[_tp.Any])
         pop_mapping = {main_result_id: global_result_id}
         pop_node = ContextPopNode(
             pop_id, self._job_namespace, pop_mapping,
-            explicit_deps=main_section.must_run,
+            explicit_deps=[push_id, *main_section.must_run],
             bundle=NodeNamespace.root())
         global_result_node = BundleItemNode(global_result_id, pop_id, self._job_key)
@@ -129,42 +158,41 @@ class GraphBuilder:
         return Graph(job.nodes, global_result_id)
-    def build_import_model_job(self, job_def: meta.JobDefinition, job_push_id: NodeId) -> GraphSection:
+    def build_import_model_job(self, job_def: _meta.JobDefinition, job_push_id: NodeId) -> GraphSection:
-        # Main section: run the model import
+        # TRAC object ID for the new model
+        model_id = self._allocate_id(_meta.ObjectType.MODEL)
-        # TODO: Import model job should pre-allocate an ID, then model ID comes from job_config.resultMapping
-        new_model_id = _util.new_object_id(meta.ObjectType.MODEL)
-        new_model_key = _util.object_key(new_model_id)
-        model_scope = self._job_key
         import_details = job_def.importModel
+        import_scope = self._job_key
-        import_id = NodeId.of("trac_import_model", self._job_namespace, meta.ObjectDefinition)
-        import_node = ImportModelNode(import_id, model_scope, import_details, explicit_deps=[job_push_id])
+        # Graph node ID for the import operation
+        import_id = NodeId.of("trac_import_model", self._job_namespace, GraphOutput)
-        main_section = GraphSection(nodes={import_id: import_node})
+        import_node = ImportModelNode(
+            import_id, model_id,
+            import_details, import_scope,
+            explicit_deps=[job_push_id])
-        # Build job-level metadata outputs
+        main_section = GraphSection(nodes={import_id: import_node})
-        result_section = self.build_job_results(
-            objects={new_model_key: import_id},
-            explicit_deps=[job_push_id, *main_section.must_run])
+        # RESULT will have a single (unnamed) output
+        result_section = self.build_job_result([import_id], explicit_deps=[job_push_id, *main_section.must_run])
         return self._join_sections(main_section, result_section)
-    def build_import_export_data_job(self, job_def: meta.JobDefinition, job_push_id: NodeId) -> GraphSection:
+    def build_import_export_data_job(self, job_def: _meta.JobDefinition, job_push_id: NodeId) -> GraphSection:
         # TODO: These are processed as regular calculation jobs for now
         # That might be ok, but is worth reviewing
-        if job_def.jobType == meta.JobType.IMPORT_DATA:
+        if job_def.jobType == _meta.JobType.IMPORT_DATA:
             job_details = job_def.importData
         else:
             job_details = job_def.exportData
         target_selector = job_details.model
-        target_obj = _util.get_job_resource(target_selector, self._job_config)
+        target_obj = _util.get_job_metadata(target_selector, self._job_config)
         target_def = target_obj.model
         return self.build_calculation_job(
@@ -172,12 +200,12 @@ class GraphBuilder:
             target_selector, target_def,
             job_details)
-    def build_run_model_job(self, job_def: meta.JobDefinition, job_push_id: NodeId) -> GraphSection:
+    def build_run_model_job(self, job_def: _meta.JobDefinition, job_push_id: NodeId) -> GraphSection:
         job_details = job_def.runModel
         target_selector = job_details.model
-        target_obj = _util.get_job_resource(target_selector, self._job_config)
+        target_obj = _util.get_job_metadata(target_selector, self._job_config)
         target_def = target_obj.model
         return self.build_calculation_job(
@@ -185,12 +213,12 @@ class GraphBuilder:
             target_selector, target_def,
             job_details)
-    def build_run_flow_job(self, job_def: meta.JobDefinition, job_push_id: NodeId) -> GraphSection:
+    def build_run_flow_job(self, job_def: _meta.JobDefinition, job_push_id: NodeId) -> GraphSection:
         job_details = job_def.runFlow
         target_selector = job_details.flow
-        target_obj = _util.get_job_resource(target_selector, self._job_config)
+        target_obj = _util.get_job_metadata(target_selector, self._job_config)
         target_def = target_obj.flow
         return self.build_calculation_job(
@@ -198,21 +226,21 @@ class GraphBuilder:
             target_selector, target_def,
             job_details)
-    def build_job_group(self, job_def: meta.JobDefinition, job_push_id: NodeId) -> GraphSection:
+    def build_job_group(self, job_def: _meta.JobDefinition, job_push_id: NodeId) -> GraphSection:
         job_group = job_def.jobGroup
-        if job_group.jobGroupType == meta.JobGroupType.SEQUENTIAL_JOB_GROUP:
+        if job_group.jobGroupType == _meta.JobGroupType.SEQUENTIAL_JOB_GROUP:
             return self.build_sequential_job_group(job_group, job_push_id)
-        if job_group.jobGroupType == meta.JobGroupType.PARALLEL_JOB_GROUP:
+        if job_group.jobGroupType == _meta.JobGroupType.PARALLEL_JOB_GROUP:
             return self.build_parallel_job_group(job_group, job_push_id)
         else:
             self._error(_ex.EJobValidation(f"Job group type [{job_group.jobGroupType.name}] is not supported yet"))
             return GraphSection(dict(), inputs={job_push_id})
-    def build_sequential_job_group(self, job_group: meta.JobGroup, job_push_id: NodeId) -> GraphSection:
+    def build_sequential_job_group(self, job_group: _meta.JobGroup, job_push_id: NodeId) -> GraphSection:
         nodes = dict()
         prior_id = job_push_id
@@ -225,14 +253,14 @@ class GraphBuilder:
             prior_id = child_node.id
         # No real results from job groups yet (they cannot be executed from the platform)
-        job_result =  cfg.JobResult()
-        result_id = NodeId.of("trac_job_result", self._job_namespace, cfg.JobResult)
+        job_result =  _cfg.JobResult()
+        result_id = NodeId.of("trac_job_result", self._job_namespace, _cfg.JobResult)
         result_node = StaticValueNode(result_id, job_result, explicit_deps=[prior_id])
         nodes[result_id] = result_node
         return GraphSection(nodes, inputs={job_push_id}, outputs={result_id})
-    def build_parallel_job_group(self, job_group: meta.JobGroup, job_push_id: NodeId) -> GraphSection:
+    def build_parallel_job_group(self, job_group: _meta.JobGroup, job_push_id: NodeId) -> GraphSection:
         nodes = dict()
         parallel_ids = [job_push_id]
@@ -245,22 +273,22 @@ class GraphBuilder:
             parallel_ids.append(child_node.id)
         # No real results from job groups yet (they cannot be executed from the platform)
-        job_result =  cfg.JobResult()
-        result_id = NodeId.of("trac_job_result", self._job_namespace, cfg.JobResult)
+        job_result =  _cfg.JobResult()
+        result_id = NodeId.of("trac_job_result", self._job_namespace, _cfg.JobResult)
         result_node = StaticValueNode(result_id, job_result, explicit_deps=parallel_ids)
         nodes[result_id] = result_node
         return GraphSection(nodes, inputs={job_push_id}, outputs={result_id})
-    def build_child_job(self, child_job_def: meta.JobDefinition, explicit_deps) -> Node[config.JobResult]:
+    def build_child_job(self, child_job_def: _meta.JobDefinition, explicit_deps) -> Node[_cfg.JobResult]:
-        child_job_id = _util.new_object_id(meta.ObjectType.JOB)
+        child_job_id = self._allocate_id(_meta.ObjectType.JOB)
         child_builder = self._child_builder(child_job_id)
         child_graph = child_builder.build_job(child_job_def)
         child_node_name = _util.object_key(child_job_id)
-        child_node_id = NodeId.of(child_node_name, self._job_namespace, cfg.JobResult)
+        child_node_id = NodeId.of(child_node_name, self._job_namespace, _cfg.JobResult)
         child_node = ChildJobNode(
             child_node_id, child_job_id, child_job_def,
@@ -269,9 +297,9 @@ class GraphBuilder:
         return child_node
     def build_calculation_job(
-            self, job_def: meta.JobDefinition, job_push_id: NodeId,
-            target_selector: meta.TagSelector,
-            target_def: tp.Union[meta.ModelDefinition, meta.FlowDefinition],
+            self, job_def: _meta.JobDefinition, job_push_id: NodeId,
+            target_selector: _meta.TagSelector,
+            target_def: _tp.Union[_meta.ModelDefinition, _meta.FlowDefinition],
             job_details: __JOB_DETAILS) \
             -> GraphSection:
@@ -282,11 +310,11 @@ class GraphBuilder:
         required_params = target_def.parameters
         required_inputs = target_def.inputs
-        required_outputs = target_def.outputs
+        expected_outputs = target_def.outputs
         provided_params = job_details.parameters
         provided_inputs = job_details.inputs
-        provided_outputs = job_details.outputs
+        prior_outputs = job_details.priorOutputs
         params_section = self.build_job_parameters(
             required_params, provided_params,
@@ -296,36 +324,48 @@ class GraphBuilder:
             required_inputs, provided_inputs,
             explicit_deps=[job_push_id])
+        prior_outputs_section = self.build_job_prior_outputs(
+            expected_outputs, prior_outputs,
+            explicit_deps=[job_push_id])
         exec_namespace = self._job_namespace
-        exec_obj = _util.get_job_resource(target_selector, self._job_config)
+        exec_obj = _util.get_job_metadata(target_selector, self._job_config)
         exec_section = self.build_model_or_flow(
             exec_namespace, job_def, exec_obj,
             explicit_deps=[job_push_id])
         output_section = self.build_job_outputs(
-            required_outputs, provided_outputs,
+            expected_outputs, prior_outputs,
             explicit_deps=[job_push_id])
-        main_section = self._join_sections(params_section, input_section, exec_section, output_section)
+        main_section = self._join_sections(
+            params_section, input_section, prior_outputs_section,
+            exec_section, output_section)
         # Build job-level metadata outputs
-        data_result_ids = list(
+        output_ids = list(
             nid for nid, n in main_section.nodes.items()
-            if isinstance(n, DataResultNode))
+            if nid.result_type == GraphOutput or isinstance(n, SaveDataNode))
+        # Map the SAVE nodes to their corresponding named output keys
+        output_keys = dict(
+            (nid, nid.name.replace(":SAVE", ""))
+            for nid, n in output_section.nodes.items()
+            if isinstance(n, SaveDataNode))
-        result_section = self.build_job_results(
-            bundles=data_result_ids,
+        result_section = self.build_job_result(
+            output_ids, output_keys,
             explicit_deps=[job_push_id, *main_section.must_run])
         return self._join_sections(main_section, result_section)
     def build_job_parameters(
             self,
-            required_params: tp.Dict[str, meta.ModelParameter],
-            supplied_params: tp.Dict[str, meta.Value],
-            explicit_deps: tp.Optional[tp.List[NodeId]] = None) \
+            required_params: _tp.Dict[str, _meta.ModelParameter],
+            supplied_params: _tp.Dict[str, _meta.Value],
+            explicit_deps: _tp.Optional[_tp.List[NodeId]] = None) \
             -> GraphSection:
         nodes = dict()
@@ -341,7 +381,7 @@ class GraphBuilder:
                     self._error(_ex.EJobValidation(f"Missing required parameter: [{param_name}]"))
                     continue
-            param_id = NodeId(param_name, self._job_namespace, meta.Value)
+            param_id = NodeId(param_name, self._job_namespace, _meta.Value)
             param_node = StaticValueNode(param_id, param_def, explicit_deps=explicit_deps)
             nodes[param_id] = param_node
@@ -350,402 +390,241 @@ class GraphBuilder:
     def build_job_inputs(
             self,
-            required_inputs: tp.Dict[str, meta.ModelInputSchema],
-            supplied_inputs: tp.Dict[str, meta.TagSelector],
-            explicit_deps: tp.Optional[tp.List[NodeId]] = None) \
+            required_inputs: _tp.Dict[str, _meta.ModelInputSchema],
+            supplied_inputs: _tp.Dict[str, _meta.TagSelector],
+            explicit_deps: _tp.Optional[_tp.List[NodeId]] = None) \
             -> GraphSection:
         nodes = dict()
         outputs = set()
-        for input_name, input_def in required_inputs.items():
-            # Backwards compatibility with pre 0.8 versions
-            input_type = meta.ObjectType.DATA \
-                if input_def.objectType == meta.ObjectType.OBJECT_TYPE_NOT_SET \
-                else input_def.objectType
+        for input_name, input_schema in required_inputs.items():
             input_selector = supplied_inputs.get(input_name)
             if input_selector is None:
-                if input_def.optional:
+                if input_schema.optional:
                     data_view_id = NodeId.of(input_name, self._job_namespace, _data.DataView)
-                    data_view = _data.DataView.create_empty(input_type)
+                    data_view = _data.DataView.create_empty(input_schema.objectType)
                     nodes[data_view_id] = StaticValueNode(data_view_id, data_view, explicit_deps=explicit_deps)
                     outputs.add(data_view_id)
                 else:
                     self._error(_ex.EJobValidation(f"Missing required input: [{input_name}]"))
-            elif input_type == meta.ObjectType.DATA:
-                self._build_data_input(input_name, input_selector, nodes, outputs, explicit_deps)
+                continue
-            elif input_type == meta.ObjectType.FILE:
+            if input_schema.objectType == _meta.ObjectType.DATA:
+                self._build_data_input(input_name, input_selector, nodes, outputs, explicit_deps)
+            elif input_schema.objectType == _meta.ObjectType.FILE:
                 self._build_file_input(input_name, input_selector, nodes, outputs, explicit_deps)
             else:
-                self._error(_ex.EJobValidation(f"Invalid input type [{input_type.name}] for input [{input_name}]"))
+                self._error(_ex.EJobValidation(f"Invalid input type [{input_schema.objectType}] for input [{input_name}]"))
         return GraphSection(nodes, outputs=outputs)
-    def _build_data_input(self, input_name, input_selector, nodes, outputs, explicit_deps):
-        # Build a data spec using metadata from the job config
-        # For now we are always loading the root part, snap 0, delta 0
-        data_def = _util.get_job_resource(input_selector, self._job_config).data
-        storage_def = _util.get_job_resource(data_def.storageId, self._job_config).storage
-        if data_def.schemaId:
-            schema_def = _util.get_job_resource(data_def.schemaId, self._job_config).schema
-        else:
-            schema_def = data_def.schema
+    def build_job_prior_outputs(
+            self,
+            expected_outputs: _tp.Dict[str, _meta.ModelOutputSchema],
+            prior_outputs: _tp.Dict[str, _meta.TagSelector],
+            explicit_deps: _tp.Optional[_tp.List[NodeId]] = None) \
+            -> GraphSection:
-        root_part_opaque_key = 'part-root'  # TODO: Central part names / constants
-        data_item = data_def.parts[root_part_opaque_key].snap.deltas[0].dataItem
-        data_spec = _data.DataSpec.create_data_spec(data_item, data_def, storage_def, schema_def)
+        nodes = dict()
+        outputs = set()
-        # Physical load of data items from disk
-        # Currently one item per input, since inputs are single part/delta
-        data_load_id = NodeId.of(f"{input_name}:LOAD", self._job_namespace, _data.DataItem)
-        nodes[data_load_id] = LoadDataNode(data_load_id, spec=data_spec, explicit_deps=explicit_deps)
+        for output_name, output_schema in expected_outputs.items():
-        # Input views assembled by mapping one root part to each view
-        data_view_id = NodeId.of(input_name, self._job_namespace, _data.DataView)
-        nodes[data_view_id] = DataViewNode(data_view_id, schema_def, data_load_id)
-        outputs.add(data_view_id)
+            prior_selector = prior_outputs.get(output_name)
-    def _build_file_input(self, input_name, input_selector, nodes, outputs, explicit_deps):
+            # Prior outputs are always optional
+            if prior_selector is None:
+                continue
-        file_def = _util.get_job_resource(input_selector, self._job_config).file
-        storage_def = _util.get_job_resource(file_def.storageId, self._job_config).storage
+            if output_schema.objectType == _meta.ObjectType.DATA:
+                prior_spec = self._build_data_spec(prior_selector)
+            elif output_schema.objectType == _meta.ObjectType.FILE:
+                prior_spec = self._build_file_spec(prior_selector)
+            else:
+                self._error(_ex.EJobValidation(f"Invalid output type [{output_schema.objectType}] for output [{output_name}]"))
+                continue
-        file_spec = _data.DataSpec.create_file_spec(file_def.dataItem, file_def, storage_def)
-        file_load_id = NodeId.of(f"{input_name}:LOAD", self._job_namespace, _data.DataItem)
-        nodes[file_load_id] = LoadDataNode(file_load_id, spec=file_spec, explicit_deps=explicit_deps)
+            prior_output_id = NodeId.of(f"{output_name}:PRIOR", self._job_namespace, _data.DataSpec)
+            nodes[prior_output_id] = StaticValueNode(prior_output_id, prior_spec, explicit_deps=explicit_deps)
+            outputs.add(prior_output_id)
-        # Input views assembled by mapping one root part to each view
-        file_view_id = NodeId.of(input_name, self._job_namespace, _data.DataView)
-        nodes[file_view_id] = DataViewNode(file_view_id, None, file_load_id)
-        outputs.add(file_view_id)
+        return GraphSection(nodes, outputs=outputs)
     def build_job_outputs(
             self,
-            required_outputs: tp.Dict[str, meta.ModelOutputSchema],
-            supplied_outputs: tp.Dict[str, meta.TagSelector],
-            explicit_deps: tp.Optional[tp.List[NodeId]] = None) \
+            required_outputs: _tp.Dict[str, _meta.ModelOutputSchema],
+            prior_outputs: _tp.Dict[str, _meta.TagSelector],
+            explicit_deps: _tp.Optional[_tp.List[NodeId]] = None) \
             -> GraphSection:
         nodes = {}
-        inputs = set()
+        section_inputs = set()
-        for output_name, output_def in required_outputs.items():
+        for output_name, output_schema in required_outputs.items():
             # Output data view must already exist in the namespace, it is an input to the save operation
             data_view_id = NodeId.of(output_name, self._job_namespace, _data.DataView)
-            inputs.add(data_view_id)
+            section_inputs.add(data_view_id)
-            # Backwards compatibility with pre 0.8 versions
-            output_type = meta.ObjectType.DATA \
-                if output_def.objectType == meta.ObjectType.OBJECT_TYPE_NOT_SET \
-                else output_def.objectType
+            # Check for prior outputs
+            prior_selector = prior_outputs.get(output_name)
-            output_selector = supplied_outputs.get(output_name)
+            if output_schema.objectType == _meta.ObjectType.DATA:
+                self._build_data_output(output_name, output_schema, data_view_id, prior_selector, nodes, explicit_deps)
+            elif output_schema.objectType == _meta.ObjectType.FILE:
+                self._build_file_output(output_name, output_schema, data_view_id, prior_selector, nodes, explicit_deps)
+            else:
+                self._error(_ex.EJobValidation(f"Invalid output type [{output_schema.objectType}] for input [{output_name}]"))
-            if output_selector is None:
-                if output_def.optional:
-                    optional_info = "(configuration is required for all optional outputs, in case they are produced)"
-                    self._error(_ex.EJobValidation(f"Missing optional output: [{output_name}] {optional_info}"))
-                    continue
-                else:
-                    self._error(_ex.EJobValidation(f"Missing required output: [{output_name}]"))
-                    continue
+        return GraphSection(nodes, inputs=section_inputs)
-            elif output_type == meta.ObjectType.DATA:
-                self._build_data_output(output_name, output_selector, data_view_id, nodes, explicit_deps)
+    def _build_data_input(self, input_name, input_selector, nodes, outputs, explicit_deps):
-            elif output_type == meta.ObjectType.FILE:
-                self._build_file_output(output_name, output_def, output_selector, data_view_id, nodes, explicit_deps)
+        data_spec = self._build_data_spec(input_selector)
+        data_spec = self._attach_metadata(data_spec, input_selector)
-            else:
-                self._error(_ex.EJobValidation(f"Invalid output type [{output_type.name}] for input [{output_name}]"))
+        # Physical load of data items from disk
+        # Currently one item per input, since inputs are single part/delta
+        data_load_id = NodeId.of(f"{input_name}:LOAD", self._job_namespace, _data.DataItem)
+        nodes[data_load_id] = LoadDataNode(data_load_id, spec=data_spec, explicit_deps=explicit_deps)
-        return GraphSection(nodes, inputs=inputs)
+        # Input views assembled by mapping one root part to each view
+        data_view_id = NodeId.of(input_name, self._job_namespace, _data.DataView)
+        nodes[data_view_id] = DataViewNode(data_view_id, data_spec.schema, data_load_id)
+        outputs.add(data_view_id)
-    def _build_data_output(self, output_name, output_selector, data_view_id, nodes, explicit_deps):
+    def _build_data_output(self, output_name, output_schema, data_view_id, prior_selector, nodes, explicit_deps):
         # Map one data item from each view, since outputs are single part/delta
         data_item_id = NodeId(f"{output_name}:ITEM", self._job_namespace, _data.DataItem)
         nodes[data_item_id] = DataItemNode(data_item_id, data_view_id)
-        data_obj = _util.get_job_resource(output_selector, self._job_config, optional=True)
-        if data_obj is not None:
-            # If data def for the output has been built in advance, use a static data spec
-            data_def = data_obj.data
-            storage_def = _util.get_job_resource(data_def.storageId, self._job_config).storage
-            if data_def.schemaId:
-                schema_def = _util.get_job_resource(data_def.schemaId, self._job_config).schema
-            else:
-                schema_def = data_def.schema
-            root_part_opaque_key = 'part-root'  # TODO: Central part names / constants
-            data_item = data_def.parts[root_part_opaque_key].snap.deltas[0].dataItem
-            data_spec = _data.DataSpec.create_data_spec(data_item, data_def, storage_def, schema_def)
-            # Create a physical save operation for the data item
-            data_save_id = NodeId.of(f"{output_name}:SAVE", self._job_namespace, _data.DataSpec)
-            nodes[data_save_id] = SaveDataNode(data_save_id, data_item_id, spec=data_spec)
-            output_key = output_name
-            storage_key = output_name + ":STORAGE"
+        if prior_selector is None:
+            # New output - Allocate new TRAC object IDs
+            prior_spec = None
+            data_id = self._allocate_id(_meta.ObjectType.DATA)
+            storage_id = self._allocate_id(_meta.ObjectType.STORAGE)
         else:
+            # New version - Get the prior version metadata and bump the object IDs
+            prior_spec = self._build_data_spec(prior_selector)
+            data_id = _util.new_object_version(prior_spec.primary_id)
+            storage_id = _util.new_object_version(prior_spec.storage_id)
-            # If output data def for an output was not supplied in the job, create a dynamic data spec
-            # Dynamic data def will always use an embedded schema (this is no ID for an external schema)
+        # Graph node ID for the save operation
+        data_save_id = NodeId.of(f"{output_name}:SAVE", self._job_namespace, _data.DataSpec)
-            mapped_output_key = output_name
-            mapped_storage_key = output_name + ":STORAGE"
+        if output_schema.dynamic:
-            data_id = self._job_config.resultMapping[mapped_output_key]
-            storage_id = self._job_config.resultMapping[mapped_storage_key]
-            data_spec_id = NodeId.of(f"{output_name}:SPEC", self._job_namespace, _data.DataSpec)
-            nodes[data_spec_id] = DynamicDataSpecNode(
+            # For dynamic outputs, an extra graph node is needed to assemble the schema information
+            # This will call build_data_spec() at runtime, once the schema is known
+            data_spec_id = NodeId.of(f"{output_name}:DYNAMIC_SCHEMA", self._job_namespace, _data.DataSpec)
+            nodes[data_spec_id] = DataSpecNode(
                 data_spec_id, data_view_id,
-                data_id, storage_id,
-                prior_data_spec=None,
+                data_id, storage_id, output_name,
+                self._sys_config,
+                prior_data_spec=prior_spec,
                 explicit_deps=explicit_deps)
-            # Create a physical save operation for the data item
-            data_save_id = NodeId.of(f"{output_name}:SAVE", self._job_namespace, _data.DataSpec)
+            # Save operation uses the dynamically produced schema info
             nodes[data_save_id] = SaveDataNode(data_save_id, data_item_id, spec_id=data_spec_id)
-            output_key = _util.object_key(data_id)
-            storage_key = _util.object_key(storage_id)
-        data_result_id = NodeId.of(f"{output_name}:RESULT", self._job_namespace, ObjectBundle)
-        nodes[data_result_id] = DataResultNode(
-            data_result_id, output_name, data_save_id,
-            data_key=output_key,
-            storage_key=storage_key)
-    def _build_file_output(self, output_name, output_def, output_selector, file_view_id, nodes, explicit_deps):
+        else:
-        mapped_output_key = output_name
-        mapped_storage_key = output_name + ":STORAGE"
+            # If the output is not dynamic, a data spec can be built ahead of time
+            data_spec = _data.build_data_spec(
+                data_id, storage_id, output_name,
+                output_schema.schema,
+                self._sys_config,
+                prior_spec=prior_spec)
-        file_obj = _util.get_job_resource(output_selector, self._job_config, optional=True)
+            # Save operation uses the statically produced schema info
+            nodes[data_save_id] = SaveDataNode(data_save_id, data_item_id, spec=data_spec)
-        if file_obj is not None:
+    def _build_data_spec(self, data_selector):
-            # Definitions already exist (generated by dev mode translator)
+        # Build a data spec using metadata from the job config
+        # For now we are always loading the root part, snap 0, delta 0
+        data_def = _util.get_job_metadata(data_selector, self._job_config).data
+        storage_def = _util.get_job_metadata(data_def.storageId, self._job_config).storage
-            file_def = _util.get_job_resource(output_selector, self._job_config).file
-            storage_def = _util.get_job_resource(file_def.storageId, self._job_config).storage
+        if data_def.schemaId:
+            schema_def = _util.get_job_metadata(data_def.schemaId, self._job_config).schema
+        else:
+            schema_def = data_def.schema
-            resolved_output_key = mapped_output_key
-            resolved_storage_key = mapped_storage_key
+        root_part_opaque_key = 'part-root'  # TODO: Central part names / constants
+        data_item = data_def.parts[root_part_opaque_key].snap.deltas[0].dataItem
-        else:
+        data_id = _util.get_job_mapping(data_selector, self._job_config)
+        storage_id = _util.get_job_mapping(data_def.storageId, self._job_config)
-            # Create new definitions (default behavior for jobs sent from the platform)
+        return _data.DataSpec \
+                .create_data_spec(data_item, data_def, storage_def, schema_def) \
+                .with_ids(data_id, storage_id)
-            output_id = self._job_config.resultMapping[mapped_output_key]
-            storage_id = self._job_config.resultMapping[mapped_storage_key]
+    def _build_file_input(self, input_name, input_selector, nodes, outputs, explicit_deps):
-            file_type = output_def.fileType
-            timestamp = _dt.datetime.fromisoformat(output_id.objectTimestamp.isoDatetime)
-            data_item = f"file/{output_id.objectId}/version-{output_id.objectVersion}"
-            storage_key = self._sys_config.storage.defaultBucket
-            storage_path = f"file/FILE-{output_id.objectId}/version-{output_id.objectVersion}/{output_name}.{file_type.extension}"
+        file_spec = self._build_file_spec(input_selector)
+        file_spec = self._attach_metadata(file_spec, input_selector)
-            file_def = self.build_file_def(output_name, file_type, storage_id, data_item)
-            storage_def = self.build_storage_def(data_item, storage_key, storage_path, file_type.mimeType, timestamp)
+        file_load_id = NodeId.of(f"{input_name}:LOAD", self._job_namespace, _data.DataItem)
+        nodes[file_load_id] = LoadDataNode(file_load_id, spec=file_spec, explicit_deps=explicit_deps)
-            resolved_output_key = _util.object_key(output_id)
-            resolved_storage_key = _util.object_key(storage_id)
+        # Input views assembled by mapping one root part to each view
+        file_view_id = NodeId.of(input_name, self._job_namespace, _data.DataView)
+        nodes[file_view_id] = DataViewNode(file_view_id, None, file_load_id)
+        outputs.add(file_view_id)
-        # Required object defs are available, now build the graph nodes
+    def _build_file_output(self, output_name, output_schema, file_view_id, prior_selector, nodes, explicit_deps):
+        # Map file item from view
         file_item_id = NodeId(f"{output_name}:ITEM", self._job_namespace, _data.DataItem)
         nodes[file_item_id] = DataItemNode(file_item_id, file_view_id, explicit_deps=explicit_deps)
-        file_spec = _data.DataSpec.create_file_spec(file_def.dataItem, file_def, storage_def)
+        if prior_selector is None:
+            # New output - Allocate new TRAC object IDs
+            prior_spec = None
+            file_id = self._allocate_id(_meta.ObjectType.FILE)
+            storage_id = self._allocate_id(_meta.ObjectType.STORAGE)
+        else:
+            # New version - Get the prior version metadata and bump the object IDs
+            prior_spec = self._build_file_spec(prior_selector) if prior_selector else None
+            file_id = _util.new_object_version(prior_spec.primary_id)
+            storage_id = _util.new_object_version(prior_spec.storage_id)
+        # File spec can always be built ahead of time (no equivalent of dynamic schemas)
+        file_spec = _data.build_file_spec(
+            file_id, storage_id,
+            output_name, output_schema.fileType,
+            self._sys_config,
+            prior_spec=prior_spec)
+        # Graph node for the save operation
         file_save_id = NodeId.of(f"{output_name}:SAVE", self._job_namespace, _data.DataSpec)
         nodes[file_save_id] = SaveDataNode(file_save_id, file_item_id, spec=file_spec)
-        data_result_id = NodeId.of(f"{output_name}:RESULT", self._job_namespace, ObjectBundle)
-        nodes[data_result_id] = DataResultNode(
-            data_result_id, output_name, file_save_id,
-            file_key=resolved_output_key,
-            storage_key=resolved_storage_key)
-    @classmethod
-    def build_output_file_and_storage(cls, output_key, file_type: meta.FileType, sys_config: cfg.RuntimeConfig, job_config: cfg.JobConfig):
-        # TODO: Review and de-dupe building of output metadata
-        # Responsibility for assigning outputs could perhaps move from orchestrator to runtime
-        output_storage_key = f"{output_key}:STORAGE"
-        output_id = job_config.resultMapping[output_key]
-        output_storage_id = job_config.resultMapping[output_storage_key]
-        timestamp = _dt.datetime.fromisoformat(output_id.objectTimestamp.isoDatetime)
-        data_item = f"file/{output_id.objectId}/version-{output_id.objectVersion}"
-        storage_key = sys_config.storage.defaultBucket
-        storage_path = f"file/FILE-{output_id.objectId}/version-{output_id.objectVersion}/{output_key}.{file_type.extension}"
-        file_def = cls.build_file_def(output_key, file_type, output_storage_id, data_item)
-        storage_def = cls.build_storage_def(data_item, storage_key, storage_path, file_type.mimeType, timestamp)
-        return file_def, storage_def
-    @classmethod
-    def build_runtime_outputs(cls, output_names: tp.List[str], job_namespace: NodeNamespace):
-        # This method is called dynamically during job execution
-        # So it cannot use stateful information like self._job_config or self._job_namespace
-        # TODO: Factor out common logic with regular job outputs (including static / dynamic)
-        nodes = {}
-        inputs = set()
-        outputs = list()
-        for output_name in output_names:
-            # Output data view must already exist in the namespace
-            data_view_id = NodeId.of(output_name, job_namespace, _data.DataView)
-            data_spec_id = NodeId.of(f"{output_name}:SPEC", job_namespace, _data.DataSpec)
-            mapped_output_key = output_name
-            mapped_storage_key = output_name + ":STORAGE"
-            data_id = _util.new_object_id(meta.ObjectType.DATA)
-            storage_id = _util.new_object_id(meta.ObjectType.STORAGE)
-            data_spec_node = DynamicDataSpecNode(
-                data_spec_id, data_view_id,
-                data_id, storage_id,
-                prior_data_spec=None)
-            output_key = _util.object_key(data_id)
-            storage_key = _util.object_key(storage_id)
-            # Map one data item from each view, since outputs are single part/delta
-            data_item_id = NodeId(f"{output_name}:ITEM", job_namespace, _data.DataItem)
-            data_item_node = DataItemNode(data_item_id, data_view_id)
-            # Create a physical save operation for the data item
-            data_save_id = NodeId.of(f"{output_name}:SAVE", job_namespace, _data.DataSpec)
-            data_save_node = SaveDataNode(data_save_id, data_item_id, spec_id=data_spec_id)
-            data_result_id = NodeId.of(f"{output_name}:RESULT", job_namespace, ObjectBundle)
-            data_result_node = DataResultNode(
-                data_result_id, output_name, data_save_id,
-                output_key, storage_key)
-            nodes[data_spec_id] = data_spec_node
-            nodes[data_item_id] = data_item_node
-            nodes[data_save_id] = data_save_node
-            nodes[data_result_id] = data_result_node
-            # Job-level data view is an input to the save operation
-            inputs.add(data_view_id)
-            outputs.append(data_result_id)
-        runtime_outputs = JobOutputs(bundles=outputs)
-        runtime_outputs_id = NodeId.of("trac_runtime_outputs", job_namespace, JobOutputs)
-        runtime_outputs_node = RuntimeOutputsNode(runtime_outputs_id, runtime_outputs)
-        nodes[runtime_outputs_id] = runtime_outputs_node
-        return GraphSection(nodes, inputs=inputs, outputs={runtime_outputs_id})
-    @classmethod
-    def build_file_def(cls, file_name, file_type, storage_id, data_item):
-        file_def = meta.FileDefinition()
-        file_def.name = f"{file_name}.{file_type.extension}"
-        file_def.extension = file_type.extension
-        file_def.mimeType = file_type.mimeType
-        file_def.storageId = _util.selector_for_latest(storage_id)
-        file_def.dataItem = data_item
-        file_def.size = 0
-        return file_def
-    @classmethod
-    def build_storage_def(
-            cls, data_item: str,
-            storage_key, storage_path, storage_format,
-            timestamp: _dt.datetime):
-        first_incarnation = 0
-        storage_copy = meta.StorageCopy(
-            storage_key, storage_path, storage_format,
-            copyStatus=meta.CopyStatus.COPY_AVAILABLE,
-            copyTimestamp=meta.DatetimeValue(timestamp.isoformat()))
-        storage_incarnation = meta.StorageIncarnation(
-            [storage_copy],
-            incarnationIndex=first_incarnation,
-            incarnationTimestamp=meta.DatetimeValue(timestamp.isoformat()),
-            incarnationStatus=meta.IncarnationStatus.INCARNATION_AVAILABLE)
-        storage_item = meta.StorageItem([storage_incarnation])
-        storage_def = meta.StorageDefinition()
-        storage_def.dataItems[data_item] = storage_item
-        return storage_def
+    def _build_file_spec(self, file_selector):
-    def build_job_results(
-            self,
-            objects: tp.Dict[str, NodeId[meta.ObjectDefinition]] = None,
-            bundles: tp.List[NodeId[ObjectBundle]] = None,
-            explicit_deps: tp.Optional[tp.List[NodeId]] = None) \
-            -> GraphSection:
-        result_id = self._job_config.resultMapping.get("trac_job_result")
-        result_node_id = NodeId.of("trac_job_result", self._job_namespace, cfg.JobResult)
-        if objects is not None:
-            results_inputs = set(objects.values())
-            build_result_node = BuildJobResultNode(
-                result_node_id, result_id, self._job_config.jobId,
-                outputs=JobOutputs(objects=objects),
-                explicit_deps=explicit_deps)
-        elif bundles is not None:
-            results_inputs = set(bundles)
-            build_result_node = BuildJobResultNode(
-                result_node_id, result_id, self._job_config.jobId,
-                outputs=JobOutputs(bundles=bundles),
-                explicit_deps=explicit_deps)
-        else:
-            raise _ex.EUnexpected()
+        file_def = _util.get_job_metadata(file_selector, self._job_config).file
+        storage_def = _util.get_job_metadata(file_def.storageId, self._job_config).storage
-        result_nodes = {result_node_id: build_result_node}
+        file_id = _util.get_job_mapping(file_selector, self._job_config)
+        storage_id = _util.get_job_mapping(file_def.storageId, self._job_config)
-        return GraphSection(result_nodes, inputs=results_inputs, must_run=[result_node_id])
+        return _data.DataSpec \
+            .create_file_spec(file_def.dataItem, file_def, storage_def) \
+            .with_ids(file_id, storage_id)
     def build_model_or_flow_with_context(
             self, namespace: NodeNamespace, model_or_flow_name: str,
-            job_def: meta.JobDefinition, model_or_flow: meta.ObjectDefinition,
-            input_mapping: tp.Dict[str, NodeId], output_mapping: tp.Dict[str, NodeId],
-            explicit_deps: tp.Optional[tp.List[NodeId]] = None) \
+            job_def: _meta.JobDefinition, model_or_flow: _meta.ObjectDefinition,
+            input_mapping: _tp.Dict[str, NodeId], output_mapping: _tp.Dict[str, NodeId],
+            explicit_deps: _tp.Optional[_tp.List[NodeId]] = None) \
             -> GraphSection:
         # Generate a name for a new unique sub-context
@@ -772,32 +651,35 @@ class GraphBuilder:
     def build_model_or_flow(
             self, namespace: NodeNamespace,
-            job_def: meta.JobDefinition,
-            model_or_flow: meta.ObjectDefinition,
-            explicit_deps: tp.Optional[tp.List[NodeId]] = None) \
+            job_def: _meta.JobDefinition,
+            model_or_flow: _meta.ObjectDefinition,
+            explicit_deps: _tp.Optional[_tp.List[NodeId]] = None) \
             -> GraphSection:
-        if model_or_flow.objectType == meta.ObjectType.MODEL:
+        if model_or_flow.objectType == _meta.ObjectType.MODEL:
             return self.build_model(namespace, job_def, model_or_flow.model, explicit_deps)
-        elif model_or_flow.objectType == meta.ObjectType.FLOW:
+        elif model_or_flow.objectType == _meta.ObjectType.FLOW:
             return self.build_flow(namespace, job_def, model_or_flow.flow)
         else:
             message = f"Invalid job config, expected model or flow, got [{model_or_flow.objectType}]"
             self._error(_ex.EJobValidation(message))
+            # Allow building to continue for better error reporting
+            return GraphSection(dict())
     def build_model(
             self, namespace: NodeNamespace,
-            job_def: meta.JobDefinition,
-            model_def: meta.ModelDefinition,
-            explicit_deps: tp.Optional[tp.List[NodeId]] = None) \
+            job_def: _meta.JobDefinition,
+            model_def: _meta.ModelDefinition,
+            explicit_deps: _tp.Optional[_tp.List[NodeId]] = None) \
             -> GraphSection:
         self.check_model_type(job_def, model_def)
         def param_id(node_name):
-            return NodeId(node_name, namespace, meta.Value)
+            return NodeId(node_name, namespace, _meta.Value)
         def data_id(node_name):
             return NodeId(node_name, namespace, _data.DataView)
@@ -808,9 +690,9 @@ class GraphBuilder:
         output_ids = set(map(data_id, model_def.outputs))
         # Set up storage access for import / export data jobs
-        if job_def.jobType == meta.JobType.IMPORT_DATA:
+        if job_def.jobType == _meta.JobType.IMPORT_DATA:
             storage_access = job_def.importData.storageAccess
-        elif job_def.jobType == meta.JobType.EXPORT_DATA:
+        elif job_def.jobType == _meta.JobType.EXPORT_DATA:
             storage_access = job_def.exportData.storageAccess
         else:
             storage_access = None
@@ -827,16 +709,19 @@ class GraphBuilder:
         model_name = model_def.entryPoint.split(".")[-1]  # TODO: Check unique model name
         model_id = NodeId(model_name, namespace, Bundle[_data.DataView])
+        # Used to set up a dynamic builder at runtime if dynamic graph updates are needed
+        context = GraphContext(
+            self._job_config.jobId,
+            self._job_namespace, namespace,
+            self._sys_config)
         model_node = RunModelNode(
-            model_id, model_scope, model_def,
+            model_id, model_def, model_scope,
             frozenset(parameter_ids), frozenset(input_ids),
             explicit_deps=explicit_deps, bundle=model_id.namespace,
-            storage_access=storage_access)
+            storage_access=storage_access, graph_context=context)
-        model_result_id = NodeId(f"{model_name}:RESULT", namespace)
-        model_result_node = RunModelResultNode(model_result_id, model_id)
-        nodes = {model_id: model_node, model_result_id: model_result_node}
+        nodes = {model_id: model_node}
         # Create nodes for each model output
         # The model node itself outputs a bundle (dictionary of named outputs)
@@ -849,13 +734,13 @@ class GraphBuilder:
             nodes[output_id] = BundleItemNode(output_id, model_id, output_id.name)
         # Assemble a graph to include the model and its outputs
-        return GraphSection(nodes, inputs={*parameter_ids, *input_ids}, outputs=output_ids, must_run=[model_result_id])
+        return GraphSection(nodes, inputs={*parameter_ids, *input_ids}, outputs=output_ids, must_run=[model_id])
     def build_flow(
             self, namespace: NodeNamespace,
-            job_def: meta.JobDefinition,
-            flow_def: meta.FlowDefinition,
-            explicit_deps: tp.Optional[tp.List[NodeId]] = None) \
+            job_def: _meta.JobDefinition,
+            flow_def: _meta.FlowDefinition,
+            explicit_deps: _tp.Optional[_tp.List[NodeId]] = None) \
             -> GraphSection:
         def socket_key(socket):
@@ -875,7 +760,7 @@ class GraphBuilder:
         target_edges = {socket_key(edge.target): edge for edge in flow_def.edges}
         # Initially parameters and inputs are reachable, everything else is not
-        def is_input(n): return n[1].nodeType in [meta.FlowNodeType.PARAMETER_NODE, meta.FlowNodeType.INPUT_NODE]
+        def is_input(n): return n[1].nodeType in [_meta.FlowNodeType.PARAMETER_NODE, _meta.FlowNodeType.INPUT_NODE]
         reachable_nodes = dict(filter(is_input, flow_def.nodes.items()))
         remaining_nodes = dict(filter(lambda n: not is_input(n), flow_def.nodes.items()))
@@ -892,7 +777,7 @@ class GraphBuilder:
             graph_section = self._join_sections(graph_section, sub_section, allow_partial_inputs=True)
-            if node.nodeType != meta.FlowNodeType.OUTPUT_NODE:
+            if node.nodeType != _meta.FlowNodeType.OUTPUT_NODE:
                 source_edges = remaining_edges_by_source.pop(node_name)
@@ -916,10 +801,10 @@ class GraphBuilder:
     def build_flow_node(
             self, namespace: NodeNamespace,
-            job_def: meta.JobDefinition,
-            target_edges: tp.Dict[meta.FlowSocket, meta.FlowEdge],
-            node_name: str, node: meta.FlowNode,
-            explicit_deps: tp.Optional[tp.List[NodeId]] = None) \
+            job_def: _meta.JobDefinition,
+            target_edges: _tp.Dict[_meta.FlowSocket, _meta.FlowEdge],
+            node_name: str, node: _meta.FlowNode,
+            explicit_deps: _tp.Optional[_tp.List[NodeId]] = None) \
             -> GraphSection:
         def socket_key(socket):
@@ -930,27 +815,27 @@ class GraphBuilder:
             return NodeId(socket_name, namespace, result_type)
         def edge_mapping(node_: str, socket_: str = None, result_type=None):
-            socket = socket_key(meta.FlowSocket(node_, socket_))
+            socket = socket_key(_meta.FlowSocket(node_, socket_))
             edge = target_edges.get(socket)
             # Report missing edges as a job consistency error (this might happen sometimes in dev mode)
             if edge is None:
                 self._error(_ex.EJobValidation(f"Inconsistent flow: Socket [{socket}] is not connected"))
             return socket_id(edge.source.node, edge.source.socket, result_type)
-        if node.nodeType == meta.FlowNodeType.PARAMETER_NODE:
-            return GraphSection({}, inputs={NodeId(node_name, namespace, result_type=meta.Value)})
+        if node.nodeType == _meta.FlowNodeType.PARAMETER_NODE:
+            return GraphSection({}, inputs={NodeId(node_name, namespace, result_type=_meta.Value)})
-        if node.nodeType == meta.FlowNodeType.INPUT_NODE:
+        if node.nodeType == _meta.FlowNodeType.INPUT_NODE:
             return GraphSection({}, inputs={NodeId(node_name, namespace, result_type=_data.DataView)})
-        if node.nodeType == meta.FlowNodeType.OUTPUT_NODE:
+        if node.nodeType == _meta.FlowNodeType.OUTPUT_NODE:
             target_id = NodeId(node_name, namespace, result_type=_data.DataView)
             source_id = edge_mapping(node_name, None, _data.DataView)
             return GraphSection({target_id: IdentityNode(target_id, source_id)}, outputs={target_id})
-        if node.nodeType == meta.FlowNodeType.MODEL_NODE:
+        if node.nodeType == _meta.FlowNodeType.MODEL_NODE:
-            param_mapping = {socket: edge_mapping(node_name, socket, meta.Value) for socket in node.parameters}
+            param_mapping = {socket: edge_mapping(node_name, socket, _meta.Value) for socket in node.parameters}
             input_mapping = {socket: edge_mapping(node_name, socket, _data.DataView) for socket in node.inputs}
             output_mapping = {socket: socket_id(node_name, socket, _data.DataView) for socket in node.outputs}
@@ -958,10 +843,10 @@ class GraphBuilder:
             pop_mapping = output_mapping
             model_selector = job_def.runFlow.models.get(node_name)
-            model_obj = _util.get_job_resource(model_selector, self._job_config)
+            model_obj = _util.get_job_metadata(model_selector, self._job_config)
             # Missing models in the job config is a job consistency error
-            if model_obj is None or model_obj.objectType != meta.ObjectType.MODEL:
+            if model_obj is None or model_obj.objectType != _meta.ObjectType.MODEL:
                 self._error(_ex.EJobValidation(f"No model was provided for flow node [{node_name}]"))
             # Explicit check for model compatibility - report an error now, do not try build_model()
@@ -976,9 +861,12 @@ class GraphBuilder:
         self._error(_ex.EJobValidation(f"Flow node [{node_name}] has invalid node type [{node.nodeType}]"))
+        # Allow building to continue for better error reporting
+        return GraphSection(dict())
     def check_model_compatibility(
-            self, model_selector: meta.TagSelector,
-            model_def: meta.ModelDefinition, node_name: str, flow_node: meta.FlowNode):
+            self, model_selector: _meta.TagSelector,
+            model_def: _meta.ModelDefinition, node_name: str, flow_node: _meta.FlowNode):
         model_params = list(sorted(model_def.parameters.keys()))
         model_inputs = list(sorted(model_def.inputs.keys()))
@@ -992,14 +880,14 @@ class GraphBuilder:
             model_key = _util.object_key(model_selector)
             self._error(_ex.EJobValidation(f"Incompatible model for flow node [{node_name}] (Model: [{model_key}])"))
-    def check_model_type(self, job_def: meta.JobDefinition, model_def: meta.ModelDefinition):
+    def check_model_type(self, job_def: _meta.JobDefinition, model_def: _meta.ModelDefinition):
-        if job_def.jobType == meta.JobType.IMPORT_DATA:
-            allowed_model_types = [meta.ModelType.DATA_IMPORT_MODEL]
-        elif job_def.jobType == meta.JobType.EXPORT_DATA:
-            allowed_model_types = [meta.ModelType.DATA_EXPORT_MODEL]
+        if job_def.jobType == _meta.JobType.IMPORT_DATA:
+            allowed_model_types = [_meta.ModelType.DATA_IMPORT_MODEL]
+        elif job_def.jobType == _meta.JobType.EXPORT_DATA:
+            allowed_model_types = [_meta.ModelType.DATA_EXPORT_MODEL]
         else:
-            allowed_model_types = [meta.ModelType.STANDARD_MODEL]
+            allowed_model_types = [_meta.ModelType.STANDARD_MODEL]
         if model_def.modelType not in allowed_model_types:
             job_type = job_def.jobType.name
@@ -1008,8 +896,8 @@ class GraphBuilder:
     @staticmethod
     def build_context_push(
-            namespace: NodeNamespace, input_mapping: tp.Dict[str, NodeId],
-            explicit_deps: tp.Optional[tp.List[NodeId]] = None) \
+            namespace: NodeNamespace, input_mapping: _tp.Dict[str, NodeId],
+            explicit_deps: _tp.Optional[_tp.List[NodeId]] = None) \
             -> GraphSection:
         """
@@ -1021,7 +909,7 @@ class GraphBuilder:
             for input_name, outer_id
             in input_mapping.items()}
-        push_id = NodeId("trac_ctx_push", namespace, Bundle[tp.Any])
+        push_id = NodeId("trac_ctx_push", namespace, Bundle[_tp.Any])
         push_node = ContextPushNode(push_id, namespace, push_mapping, explicit_deps, bundle=push_id.namespace)
         nodes = {push_id: push_node}
@@ -1038,8 +926,8 @@ class GraphBuilder:
     @staticmethod
     def build_context_pop(
-            namespace: NodeNamespace, output_mapping: tp.Dict[str, NodeId],
-            explicit_deps: tp.Optional[tp.List[NodeId]] = None) \
+            namespace: NodeNamespace, output_mapping: _tp.Dict[str, NodeId],
+            explicit_deps: _tp.Optional[_tp.List[NodeId]] = None) \
             -> GraphSection:
         """
@@ -1051,8 +939,14 @@ class GraphBuilder:
             for output_name, outer_id
             in output_mapping.items()}
-        pop_id = NodeId("trac_ctx_pop", namespace, Bundle[tp.Any])
-        pop_node = ContextPopNode(pop_id, namespace, pop_mapping, explicit_deps, bundle=pop_id.namespace.parent)
+        push_id = NodeId("trac_ctx_push", namespace, Bundle[_tp.Any])
+        explicit_deps = [push_id, *explicit_deps] if explicit_deps else [push_id]
+        pop_id = NodeId("trac_ctx_pop", namespace, Bundle[_tp.Any])
+        pop_node = ContextPopNode(
+            pop_id, namespace, pop_mapping,
+            explicit_deps=explicit_deps,
+            bundle=pop_id.namespace.parent)
         nodes = {pop_id: pop_node}
@@ -1066,6 +960,91 @@ class GraphBuilder:
             outputs={*pop_mapping.values()},
             must_run=[pop_id])
+    def build_job_result(
+            self, output_ids: _tp.List[NodeId[JOB_OUTPUT_TYPE]],
+            output_keys: _tp.Optional[_tp.Dict[NodeId, str]] = None,
+            explicit_deps: _tp.Optional[_tp.List[NodeId]] = None) \
+            -> GraphSection:
+        if output_keys:
+            named_outputs = dict((output_keys[oid], oid) for oid in filter(lambda oid: oid in output_keys, output_ids))
+            unnamed_outputs = list(filter(lambda oid: oid not in output_keys, output_ids))
+        else:
+            named_outputs = dict()
+            unnamed_outputs = output_ids
+        result_node_id = NodeId.of("trac_job_result", self._job_namespace, _cfg.JobResult)
+        result_node = JobResultNode(
+            result_node_id,
+            self._job_config.jobId,
+            self._job_config.resultId,
+            named_outputs, unnamed_outputs,
+            explicit_deps=explicit_deps)
+        result_nodes = {result_node_id: result_node}
+        return GraphSection(result_nodes, inputs=set(output_ids), must_run=[result_node_id])
+    def build_dynamic_outputs(self, source_id: NodeId, output_names: _tp.List[str]) -> GraphUpdate:
+        nodes = dict()
+        dependencies = dict()
+        # All dynamic outputs are DATA with dynamic schemas for now
+        dynamic_schema = _meta.ModelOutputSchema(
+            objectType=_meta.ObjectType.DATA,
+            schema=None, dynamic=True)
+        for output_name in output_names:
+            # Node to extract dynamic outputs from the source node (a model or flow output bundle)
+            output_id = NodeId.of(output_name, source_id.namespace, _data.DataView)
+            output_node = BundleItemNode(output_id, source_id, output_name)
+            nodes[output_id] = output_node
+            # All dynamic outputs are DATA for now
+            self._build_data_output(output_name, dynamic_schema, output_id, prior_selector=None, nodes=nodes,
+                                    explicit_deps=[source_id])
+        named_outputs = dict(
+            (nid.name, nid) for nid, n in nodes.items()
+            if nid.result_type == GraphOutput or isinstance(n, SaveDataNode))
+        dynamic_outputs_id = NodeId.of("trac_dynamic_outputs", source_id.namespace, DynamicOutputsNode)
+        dynamic_outputs_node = DynamicOutputsNode(
+            dynamic_outputs_id, named_outputs,
+            explicit_deps=[source_id])
+        job_result_id = NodeId.of("trac_job_result", self._job_namespace, _cfg.JobResult)
+        nodes[dynamic_outputs_id] = dynamic_outputs_node
+        dependencies[job_result_id] = [Dependency(dynamic_outputs_id, DependencyType.HARD)]
+        return GraphUpdate(nodes, dependencies)
+    def _allocate_id(self, object_type: _meta.ObjectType):
+        preallocated_ids = self._preallocated_ids.get(object_type)
+        if preallocated_ids:
+            # Preallocated IDs have objectVersion = 0, use a new version to get objectVersion = 1
+            return _util.new_object_version(preallocated_ids.pop())
+        else:
+            return _util.new_object_id(object_type)
+    def _attach_metadata(self, obj: _tp.Any, selector: _meta.TagSelector):
+        item_id = _util.get_job_mapping(selector, self._job_config)
+        tag = _util.get_job_metadata_tag(selector, self._job_config, optional=True)
+        attributes = dict() if tag is None else dict(
+            (attr_name, _type_system.MetadataCodec.decode_value(attr_value))
+            for attr_name, attr_value in tag.attrs.items())
+        metadata = _api.RuntimeMetadata(objectId=item_id, attributes=attributes)
+        return _util.attach_runtime_metadata(obj, metadata)
     def _join_sections(self, *sections: GraphSection, allow_partial_inputs: bool = False):
         n_sections = len(sections)
@@ -1097,7 +1076,7 @@ class GraphBuilder:
         return GraphSection(nodes, inputs, last_section.outputs, must_run)
-    def _invalid_graph_error(self, missing_dependencies: tp.Iterable[NodeId]):
+    def _invalid_graph_error(self, missing_dependencies: _tp.Iterable[NodeId]):
         missing_ids = ", ".join(map(self._missing_item_display_name, missing_dependencies))
         message = f"The execution graph has unsatisfied dependencies: [{missing_ids}]"

tracdap-runtime 0.8.0rc2__py3-none-any.whl → 0.9.0b2__py3-none-any.whl

tracdap-runtime 0.8.0rc2py3-none-any.whl → 0.9.0b2py3-none-any.whl