PyPI - tracdap-runtime - Versions diffs - 0.6.4__py3-none-any.whl → 0.6.6__py3-none-any.whl - Mend

tracdap-runtime 0.6.4py3-none-any.whl → 0.6.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

tracdap/rt/_exec/context.py +556 -36
tracdap/rt/_exec/dev_mode.py +320 -198
tracdap/rt/_exec/engine.py +331 -62
tracdap/rt/_exec/functions.py +151 -22
tracdap/rt/_exec/graph.py +47 -13
tracdap/rt/_exec/graph_builder.py +383 -175
tracdap/rt/_exec/runtime.py +7 -5
tracdap/rt/_impl/config_parser.py +11 -4
tracdap/rt/_impl/data.py +329 -152
tracdap/rt/_impl/ext/__init__.py +13 -0
tracdap/rt/_impl/ext/sql.py +116 -0
tracdap/rt/_impl/ext/storage.py +57 -0
tracdap/rt/_impl/grpc/tracdap/metadata/job_pb2.py +82 -30
tracdap/rt/_impl/grpc/tracdap/metadata/job_pb2.pyi +155 -2
tracdap/rt/_impl/grpc/tracdap/metadata/model_pb2.py +12 -10
tracdap/rt/_impl/grpc/tracdap/metadata/model_pb2.pyi +14 -2
tracdap/rt/_impl/grpc/tracdap/metadata/resource_pb2.py +29 -0
tracdap/rt/_impl/grpc/tracdap/metadata/resource_pb2.pyi +16 -0
tracdap/rt/_impl/models.py +8 -0
tracdap/rt/_impl/static_api.py +29 -0
tracdap/rt/_impl/storage.py +39 -27
tracdap/rt/_impl/util.py +10 -0
tracdap/rt/_impl/validation.py +140 -18
tracdap/rt/_plugins/repo_git.py +1 -1
tracdap/rt/_plugins/storage_sql.py +417 -0
tracdap/rt/_plugins/storage_sql_dialects.py +117 -0
tracdap/rt/_version.py +1 -1
tracdap/rt/api/experimental.py +267 -0
tracdap/rt/api/hook.py +14 -0
tracdap/rt/api/model_api.py +48 -6
tracdap/rt/config/__init__.py +2 -2
tracdap/rt/config/common.py +6 -0
tracdap/rt/metadata/__init__.py +29 -20
tracdap/rt/metadata/job.py +99 -0
tracdap/rt/metadata/model.py +18 -0
tracdap/rt/metadata/resource.py +24 -0
{tracdap_runtime-0.6.4.dist-info → tracdap_runtime-0.6.6.dist-info}/METADATA +5 -1
{tracdap_runtime-0.6.4.dist-info → tracdap_runtime-0.6.6.dist-info}/RECORD +41 -32
{tracdap_runtime-0.6.4.dist-info → tracdap_runtime-0.6.6.dist-info}/WHEEL +1 -1
{tracdap_runtime-0.6.4.dist-info → tracdap_runtime-0.6.6.dist-info}/LICENSE +0 -0
{tracdap_runtime-0.6.4.dist-info → tracdap_runtime-0.6.6.dist-info}/top_level.txt +0 -0

tracdap/rt/_exec/graph_builder.py CHANGED Viewed

@@ -12,8 +12,6 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
-from __future__ import annotations
 import tracdap.rt.config as config
 import tracdap.rt.exceptions as _ex
 import tracdap.rt._impl.data as _data  # noqa
@@ -24,72 +22,111 @@ from .graph import *
 class GraphBuilder:
-    __JOB_BUILD_FUNC = tp.Callable[
-        [config.JobConfig, JobResultSpec, NodeNamespace, NodeId],
-        GraphSection]
+    __JOB_DETAILS = tp.TypeVar(
+        "__JOB_DETAILS",
+        meta.RunModelJob,
+        meta.RunFlowJob,
+        meta.ImportModelJob,
+        meta.ImportDataJob,
+        meta.ExportDataJob)
-    @classmethod
-    def build_job(
-            cls, job_config: config.JobConfig,
-            result_spec: JobResultSpec) -> Graph:
+    __JOB_BUILD_FUNC = tp.Callable[[meta.JobDefinition, NodeId], GraphSection]
-        if job_config.job.jobType == meta.JobType.IMPORT_MODEL:
-            return cls.build_standard_job(job_config, result_spec, cls.build_import_model_job)
+    def __init__(self, job_config: config.JobConfig, result_spec: JobResultSpec):
-        if job_config.job.jobType == meta.JobType.RUN_MODEL:
-            return cls.build_standard_job(job_config, result_spec, cls.build_run_model_job)
+        self._job_config = job_config
+        self._result_spec = result_spec
-        if job_config.job.jobType == meta.JobType.RUN_FLOW:
-            return cls.build_standard_job(job_config, result_spec, cls.build_run_flow_job)
+        self._job_key = _util.object_key(job_config.jobId)
+        self._job_namespace = NodeNamespace(self._job_key)
-        raise _ex.EConfigParse(f"Job type [{job_config.job.jobType}] is not supported yet")
+        self._errors = []
-    @classmethod
-    def build_standard_job(
-            cls, job_config: config.JobConfig, result_spec: JobResultSpec,
-            build_func: __JOB_BUILD_FUNC):
+    def _child_builder(self, job_id: meta.TagHeader) -> "GraphBuilder":
-        # Set up the job context
+        builder = GraphBuilder(self._job_config, JobResultSpec(save_result=False))
+        builder._job_key = _util.object_key(job_id)
+        builder._job_namespace = NodeNamespace(builder._job_key)
+        return builder
+    def build_job(self, job_def: meta.JobDefinition,) -> Graph:
+        try:
+            if job_def.jobType == meta.JobType.IMPORT_MODEL:
+                return self.build_standard_job(job_def, self.build_import_model_job)
+            if job_def.jobType == meta.JobType.RUN_MODEL:
+                return self.build_standard_job(job_def, self.build_run_model_job)
+            if job_def.jobType == meta.JobType.RUN_FLOW:
+                return self.build_standard_job(job_def, self.build_run_flow_job)
+            if job_def.jobType in [meta.JobType.IMPORT_DATA, meta.JobType.EXPORT_DATA]:
+                return self.build_standard_job(job_def, self.build_import_export_data_job)
+            if job_def.jobType == meta.JobType.JOB_GROUP:
+                return self.build_standard_job(job_def, self.build_job_group)
+            self._error(_ex.EJobValidation(f"Job type [{job_def.jobType.name}] is not supported yet"))
+        except Exception as e:
+            # If there are recorded, errors, assume unhandled exceptions are a result of those
+            # Only report the recorded errors, to reduce noise
+            if any(self._errors):
+                pass
+            # If no errors are recorded, an exception here would be a bug
+            raise _ex.ETracInternal(f"Unexpected error preparing the job execution graph") from e
+        finally:
-        job_key = _util.object_key(job_config.jobId)
-        job_namespace = NodeNamespace(job_key)
+            if any(self._errors):
-        push_id = NodeId("trac_job_push", job_namespace, Bundle[tp.Any])
-        push_node = ContextPushNode(push_id, job_namespace)
+                if len(self._errors) == 1:
+                    raise self._errors[0]
+                else:
+                    err_text = "\n".join(map(str, self._errors))
+                    raise _ex.EJobValidation("Invalid job configuration\n" + err_text)
+    def build_standard_job(self, job_def: meta.JobDefinition, build_func: __JOB_BUILD_FUNC):
+        # Set up the job context
+        push_id = NodeId("trac_job_push", self._job_namespace, Bundle[tp.Any])
+        push_node = ContextPushNode(push_id, self._job_namespace)
         push_section = GraphSection({push_id: push_node}, must_run=[push_id])
         # Build the execution graphs for the main job and results recording
-        main_section = build_func(job_config, result_spec, job_namespace, push_id)
-        main_result_id = NodeId.of("trac_build_result", job_namespace, config.JobResult)
+        main_section = build_func(job_def, push_id)
+        main_result_id = NodeId.of("trac_job_result", self._job_namespace, config.JobResult)
         # Clean up the job context
-        global_result_id = NodeId.of(job_key, NodeNamespace.root(), config.JobResult)
+        global_result_id = NodeId.of(self._job_key, NodeNamespace.root(), config.JobResult)
-        pop_id = NodeId("trac_job_pop", job_namespace, Bundle[tp.Any])
+        pop_id = NodeId("trac_job_pop", self._job_namespace, Bundle[tp.Any])
         pop_mapping = {main_result_id: global_result_id}
         pop_node = ContextPopNode(
-            pop_id, job_namespace, pop_mapping,
+            pop_id, self._job_namespace, pop_mapping,
             explicit_deps=main_section.must_run,
             bundle=NodeNamespace.root())
-        global_result_node = BundleItemNode(global_result_id, pop_id, job_key)
+        global_result_node = BundleItemNode(global_result_id, pop_id, self._job_key)
         pop_section = GraphSection({
             pop_id: pop_node,
             global_result_id: global_result_node})
-        job = cls._join_sections(push_section, main_section, pop_section)
+        job = self._join_sections(push_section, main_section, pop_section)
         return Graph(job.nodes, global_result_id)
-    @classmethod
-    def build_import_model_job(
-            cls, job_config: config.JobConfig, result_spec: JobResultSpec,
-            job_namespace: NodeNamespace, job_push_id: NodeId) \
-            -> GraphSection:
+    def build_import_model_job(self, job_def: meta.JobDefinition, job_push_id: NodeId) -> GraphSection:
         # Main section: run the model import
@@ -97,60 +134,142 @@ class GraphBuilder:
         new_model_id = _util.new_object_id(meta.ObjectType.MODEL)
         new_model_key = _util.object_key(new_model_id)
-        model_scope = _util.object_key(job_config.jobId)
-        import_details = job_config.job.importModel
+        model_scope = self._job_key
+        import_details = job_def.importModel
-        import_id = NodeId.of("trac_import_model", job_namespace, meta.ObjectDefinition)
+        import_id = NodeId.of("trac_import_model", self._job_namespace, meta.ObjectDefinition)
         import_node = ImportModelNode(import_id, model_scope, import_details, explicit_deps=[job_push_id])
         main_section = GraphSection(nodes={import_id: import_node})
         # Build job-level metadata outputs
-        result_section = cls.build_job_results(
-            job_config, job_namespace, result_spec,
+        result_section = self.build_job_results(
             objects={new_model_key: import_id},
             explicit_deps=[job_push_id, *main_section.must_run])
-        return cls._join_sections(main_section, result_section)
+        return self._join_sections(main_section, result_section)
-    @classmethod
-    def build_run_model_job(
-            cls, job_config: config.JobConfig, result_spec: JobResultSpec,
-            job_namespace: NodeNamespace, job_push_id: NodeId) \
-            -> GraphSection:
+    def build_import_export_data_job(self, job_def: meta.JobDefinition, job_push_id: NodeId) -> GraphSection:
-        target_selector = job_config.job.runModel.model
-        target_obj = _util.get_job_resource(target_selector, job_config)
+        # TODO: These are processed as regular calculation jobs for now
+        # That might be ok, but is worth reviewing
+        if job_def.jobType == meta.JobType.IMPORT_DATA:
+            job_details = job_def.importData
+        else:
+            job_details = job_def.exportData
+        target_selector = job_details.model
+        target_obj = _util.get_job_resource(target_selector, self._job_config)
         target_def = target_obj.model
-        job_def = job_config.job.runModel
-        return cls.build_calculation_job(
-            job_config, result_spec, job_namespace, job_push_id,
-            target_selector, target_def, job_def)
+        return self.build_calculation_job(
+            job_def, job_push_id,
+            target_selector, target_def,
+            job_details)
-    @classmethod
-    def build_run_flow_job(
-            cls, job_config: config.JobConfig, result_spec: JobResultSpec,
-            job_namespace: NodeNamespace, job_push_id: NodeId) \
-            -> GraphSection:
+    def build_run_model_job(self, job_def: meta.JobDefinition, job_push_id: NodeId) -> GraphSection:
+        job_details = job_def.runModel
+        target_selector = job_details.model
+        target_obj = _util.get_job_resource(target_selector, self._job_config)
+        target_def = target_obj.model
-        target_selector = job_config.job.runFlow.flow
-        target_obj = _util.get_job_resource(target_selector, job_config)
+        return self.build_calculation_job(
+            job_def, job_push_id,
+            target_selector, target_def,
+            job_details)
+    def build_run_flow_job(self, job_def: meta.JobDefinition, job_push_id: NodeId) -> GraphSection:
+        job_details = job_def.runFlow
+        target_selector = job_details.flow
+        target_obj = _util.get_job_resource(target_selector, self._job_config)
         target_def = target_obj.flow
-        job_def = job_config.job.runFlow
-        return cls.build_calculation_job(
-            job_config, result_spec, job_namespace, job_push_id,
-            target_selector, target_def, job_def)
+        return self.build_calculation_job(
+            job_def, job_push_id,
+            target_selector, target_def,
+            job_details)
+    def build_job_group(self, job_def: meta.JobDefinition, job_push_id: NodeId) -> GraphSection:
+        job_group = job_def.jobGroup
+        if job_group.jobGroupType == meta.JobGroupType.SEQUENTIAL_JOB_GROUP:
+            return self.build_sequential_job_group(job_group, job_push_id)
+        if job_group.jobGroupType == meta.JobGroupType.PARALLEL_JOB_GROUP:
+            return self.build_parallel_job_group(job_group, job_push_id)
+        else:
+            self._error(_ex.EJobValidation(f"Job group type [{job_group.jobGroupType.name}] is not supported yet"))
+            return GraphSection(dict(), inputs={job_push_id})
+    def build_sequential_job_group(self, job_group: meta.JobGroup, job_push_id: NodeId) -> GraphSection:
+        nodes = dict()
+        prior_id = job_push_id
+        for child_def in job_group.sequential.jobs:
+            child_node = self.build_child_job(child_def, explicit_deps=[prior_id])
+            nodes[child_node.id] = child_node
+            prior_id = child_node.id
+        # No real results from job groups yet (they cannot be executed from the platform)
+        job_result =  cfg.JobResult()
+        result_id = NodeId.of("trac_job_result", self._job_namespace, cfg.JobResult)
+        result_node = StaticValueNode(result_id, job_result, explicit_deps=[prior_id])
+        nodes[result_id] = result_node
+        return GraphSection(nodes, inputs={job_push_id}, outputs={result_id})
+    def build_parallel_job_group(self, job_group: meta.JobGroup, job_push_id: NodeId) -> GraphSection:
+        nodes = dict()
+        parallel_ids = [job_push_id]
+        for child_def in job_group.parallel.jobs:
+            child_node = self.build_child_job(child_def, explicit_deps=[job_push_id])
+            nodes[child_node.id] = child_node
+            parallel_ids.append(child_node.id)
+        # No real results from job groups yet (they cannot be executed from the platform)
+        job_result =  cfg.JobResult()
+        result_id = NodeId.of("trac_job_result", self._job_namespace, cfg.JobResult)
+        result_node = StaticValueNode(result_id, job_result, explicit_deps=parallel_ids)
+        nodes[result_id] = result_node
+        return GraphSection(nodes, inputs={job_push_id}, outputs={result_id})
+    def build_child_job(self, child_job_def: meta.JobDefinition, explicit_deps) -> Node[config.JobResult]:
+        child_job_id = _util.new_object_id(meta.ObjectType.JOB)
+        child_builder = self._child_builder(child_job_id)
+        child_graph = child_builder.build_job(child_job_def)
+        child_node_name = _util.object_key(child_job_id)
+        child_node_id = NodeId.of(child_node_name, self._job_namespace, cfg.JobResult)
+        child_node = ChildJobNode(
+            child_node_id, child_job_id, child_job_def,
+            child_graph, explicit_deps)
+        return child_node
-    @classmethod
     def build_calculation_job(
-            cls, job_config: config.JobConfig, result_spec: JobResultSpec,
-            job_namespace: NodeNamespace, job_push_id: NodeId,
+            self, job_def: meta.JobDefinition, job_push_id: NodeId,
             target_selector: meta.TagSelector,
             target_def: tp.Union[meta.ModelDefinition, meta.FlowDefinition],
-            job_def: tp.Union[meta.RunModelJob, meta.RunFlowJob]) \
+            job_details: __JOB_DETAILS) \
             -> GraphSection:
         # The main execution graph can run directly in the job context, no need to do a context push
@@ -162,29 +281,30 @@ class GraphBuilder:
         required_inputs = target_def.inputs
         required_outputs = target_def.outputs
-        provided_params = job_def.parameters
-        provided_inputs = job_def.inputs
-        provided_outputs = job_def.outputs
+        provided_params = job_details.parameters
+        provided_inputs = job_details.inputs
+        provided_outputs = job_details.outputs
-        params_section = cls.build_job_parameters(
-            job_namespace, required_params, provided_params,
+        params_section = self.build_job_parameters(
+            required_params, provided_params,
             explicit_deps=[job_push_id])
-        input_section = cls.build_job_inputs(
-            job_config, job_namespace, required_inputs, provided_inputs,
+        input_section = self.build_job_inputs(
+            required_inputs, provided_inputs,
             explicit_deps=[job_push_id])
-        exec_obj = _util.get_job_resource(target_selector, job_config)
+        exec_namespace = self._job_namespace
+        exec_obj = _util.get_job_resource(target_selector, self._job_config)
-        exec_section = cls.build_model_or_flow(
-            job_config, job_namespace, exec_obj,
+        exec_section = self.build_model_or_flow(
+            exec_namespace, job_def, exec_obj,
             explicit_deps=[job_push_id])
-        output_section = cls.build_job_outputs(
-            job_config, job_namespace, required_outputs, provided_outputs,
+        output_section = self.build_job_outputs(
+            required_outputs, provided_outputs,
             explicit_deps=[job_push_id])
-        main_section = cls._join_sections(params_section, input_section, exec_section, output_section)
+        main_section = self._join_sections(params_section, input_section, exec_section, output_section)
         # Build job-level metadata outputs
@@ -192,16 +312,14 @@ class GraphBuilder:
             nid for nid, n in main_section.nodes.items()
             if isinstance(n, DataResultNode))
-        result_section = cls.build_job_results(
-            job_config, job_namespace,
-            result_spec, bundles=data_result_ids,
+        result_section = self.build_job_results(
+            bundles=data_result_ids,
             explicit_deps=[job_push_id, *main_section.must_run])
-        return cls._join_sections(main_section, result_section)
+        return self._join_sections(main_section, result_section)
-    @classmethod
     def build_job_parameters(
-            cls, job_namespace: NodeNamespace,
+            self,
             required_params: tp.Dict[str, meta.ModelParameter],
             supplied_params: tp.Dict[str, meta.Value],
             explicit_deps: tp.Optional[tp.List[NodeId]] = None) \
@@ -217,18 +335,18 @@ class GraphBuilder:
                 if param_schema.defaultValue is not None:
                     param_def = param_schema.defaultValue
                 else:
-                    raise _ex.EJobValidation(f"Missing required parameter: [{param_name}]")
+                    self._error(_ex.EJobValidation(f"Missing required parameter: [{param_name}]"))
+                    continue
-            param_id = NodeId(param_name, job_namespace, meta.Value)
+            param_id = NodeId(param_name, self._job_namespace, meta.Value)
             param_node = StaticValueNode(param_id, param_def, explicit_deps=explicit_deps)
             nodes[param_id] = param_node
         return GraphSection(nodes, outputs=set(nodes.keys()), must_run=list(nodes.keys()))
-    @classmethod
     def build_job_inputs(
-            cls, job_config: config.JobConfig, job_namespace: NodeNamespace,
+            self,
             required_inputs: tp.Dict[str, meta.ModelInputSchema],
             supplied_inputs: tp.Dict[str, meta.TagSelector],
             explicit_deps: tp.Optional[tp.List[NodeId]] = None) \
@@ -244,20 +362,21 @@ class GraphBuilder:
             if data_selector is None:
                 if input_schema.optional:
-                    data_view_id = NodeId.of(input_name, job_namespace, _data.DataView)
+                    data_view_id = NodeId.of(input_name, self._job_namespace, _data.DataView)
                     nodes[data_view_id] = StaticValueNode(data_view_id, _data.DataView.create_empty())
                     outputs.add(data_view_id)
                     continue
                 else:
-                    raise _ex.EJobValidation(f"Missing required input: [{input_name}]")
+                    self._error(_ex.EJobValidation(f"Missing required input: [{input_name}]"))
+                    continue
             # Build a data spec using metadata from the job config
             # For now we are always loading the root part, snap 0, delta 0
-            data_def = _util.get_job_resource(data_selector, job_config).data
-            storage_def = _util.get_job_resource(data_def.storageId, job_config).storage
+            data_def = _util.get_job_resource(data_selector, self._job_config).data
+            storage_def = _util.get_job_resource(data_def.storageId, self._job_config).storage
             if data_def.schemaId:
-                schema_def = _util.get_job_resource(data_def.schemaId, job_config).schema
+                schema_def = _util.get_job_resource(data_def.schemaId, self._job_config).schema
             else:
                 schema_def = data_def.schema
@@ -266,16 +385,16 @@ class GraphBuilder:
             data_spec = _data.DataSpec(data_item, data_def, storage_def, schema_def)
             # Data spec node is static, using the assembled data spec
-            data_spec_id = NodeId.of(f"{input_name}:SPEC", job_namespace, _data.DataSpec)
+            data_spec_id = NodeId.of(f"{input_name}:SPEC", self._job_namespace, _data.DataSpec)
             data_spec_node = StaticValueNode(data_spec_id, data_spec, explicit_deps=explicit_deps)
             # Physical load of data items from disk
             # Currently one item per input, since inputs are single part/delta
-            data_load_id = NodeId.of(f"{input_name}:LOAD", job_namespace, _data.DataItem)
+            data_load_id = NodeId.of(f"{input_name}:LOAD", self._job_namespace, _data.DataItem)
             data_load_node = LoadDataNode(data_load_id, data_spec_id, explicit_deps=explicit_deps)
             # Input views assembled by mapping one root part to each view
-            data_view_id = NodeId.of(input_name, job_namespace, _data.DataView)
+            data_view_id = NodeId.of(input_name, self._job_namespace, _data.DataView)
             data_view_node = DataViewNode(data_view_id, schema_def, data_load_id)
             nodes[data_spec_id] = data_spec_node
@@ -288,9 +407,8 @@ class GraphBuilder:
         return GraphSection(nodes, outputs=outputs, must_run=must_run)
-    @classmethod
     def build_job_outputs(
-            cls, job_config: config.JobConfig, job_namespace: NodeNamespace,
+            self,
             required_outputs: tp.Dict[str, meta.ModelOutputSchema],
             supplied_outputs: tp.Dict[str, meta.TagSelector],
             explicit_deps: tp.Optional[tp.List[NodeId]] = None) \
@@ -306,25 +424,27 @@ class GraphBuilder:
             if data_selector is None:
                 if output_schema.optional:
                     optional_info = "(configuration is required for all optional outputs, in case they are produced)"
-                    raise _ex.EJobValidation(f"Missing optional output: [{output_name}] {optional_info}")
+                    self._error(_ex.EJobValidation(f"Missing optional output: [{output_name}] {optional_info}"))
+                    continue
                 else:
-                    raise _ex.EJobValidation(f"Missing required output: [{output_name}]")
+                    self._error(_ex.EJobValidation(f"Missing required output: [{output_name}]"))
+                    continue
             # Output data view must already exist in the namespace
-            data_view_id = NodeId.of(output_name, job_namespace, _data.DataView)
-            data_spec_id = NodeId.of(f"{output_name}:SPEC", job_namespace, _data.DataSpec)
+            data_view_id = NodeId.of(output_name, self._job_namespace, _data.DataView)
+            data_spec_id = NodeId.of(f"{output_name}:SPEC", self._job_namespace, _data.DataSpec)
-            data_obj = _util.get_job_resource(data_selector, job_config, optional=True)
+            data_obj = _util.get_job_resource(data_selector, self._job_config, optional=True)
             if data_obj is not None:
                 # If data def for the output has been built in advance, use a static data spec
                 data_def = data_obj.data
-                storage_def = _util.get_job_resource(data_def.storageId, job_config).storage
+                storage_def = _util.get_job_resource(data_def.storageId, self._job_config).storage
                 if data_def.schemaId:
-                    schema_def = _util.get_job_resource(data_def.schemaId, job_config).schema
+                    schema_def = _util.get_job_resource(data_def.schemaId, self._job_config).schema
                 else:
                     schema_def = data_def.schema
@@ -343,19 +463,74 @@ class GraphBuilder:
                 # Dynamic data def will always use an embedded schema (this is no ID for an external schema)
                 data_key = output_name + ":DATA"
-                data_id = job_config.resultMapping[data_key]
+                data_id = self._job_config.resultMapping[data_key]
                 storage_key = output_name + ":STORAGE"
-                storage_id = job_config.resultMapping[storage_key]
+                storage_id = self._job_config.resultMapping[storage_key]
                 data_spec_node = DynamicDataSpecNode(
-                        data_spec_id, data_view_id,
-                        data_id, storage_id,
-                        prior_data_spec=None,
-                        explicit_deps=explicit_deps)
+                    data_spec_id, data_view_id,
+                    data_id, storage_id,
+                    prior_data_spec=None,
+                    explicit_deps=explicit_deps)
                 output_data_key = _util.object_key(data_id)
                 output_storage_key = _util.object_key(storage_id)
+            # Map one data item from each view, since outputs are single part/delta
+            data_item_id = NodeId(f"{output_name}:ITEM", self._job_namespace, _data.DataItem)
+            data_item_node = DataItemNode(data_item_id, data_view_id)
+            # Create a physical save operation for the data item
+            data_save_id = NodeId.of(f"{output_name}:SAVE", self._job_namespace, None)
+            data_save_node = SaveDataNode(data_save_id, data_spec_id, data_item_id)
+            data_result_id = NodeId.of(f"{output_name}:RESULT", self._job_namespace, ObjectBundle)
+            data_result_node = DataResultNode(
+                data_result_id, output_name,
+                data_item_id, data_spec_id, data_save_id,
+                output_data_key, output_storage_key)
+            nodes[data_spec_id] = data_spec_node
+            nodes[data_item_id] = data_item_node
+            nodes[data_save_id] = data_save_node
+            nodes[data_result_id] = data_result_node
+            # Job-level data view is an input to the save operation
+            inputs.add(data_view_id)
+        return GraphSection(nodes, inputs=inputs)
+    @classmethod
+    def build_runtime_outputs(cls, output_names: tp.List[str], job_namespace: NodeNamespace):
+        # This method is called dynamically during job execution
+        # So it cannot use stateful information like self._job_config or self._job_namespace
+        # TODO: Factor out common logic with regular job outputs (including static / dynamic)
+        nodes = {}
+        inputs = set()
+        outputs = list()
+        for output_name in output_names:
+            # Output data view must already exist in the namespace
+            data_view_id = NodeId.of(output_name, job_namespace, _data.DataView)
+            data_spec_id = NodeId.of(f"{output_name}:SPEC", job_namespace, _data.DataSpec)
+            data_key = output_name + ":DATA"
+            data_id = _util.new_object_id(meta.ObjectType.DATA)
+            storage_key = output_name + ":STORAGE"
+            storage_id = _util.new_object_id(meta.ObjectType.STORAGE)
+            data_spec_node = DynamicDataSpecNode(
+                data_spec_id, data_view_id,
+                data_id, storage_id,
+                prior_data_spec=None)
+            output_data_key = _util.object_key(data_id)
+            output_storage_key = _util.object_key(storage_id)
             # Map one data item from each view, since outputs are single part/delta
             data_item_id = NodeId(f"{output_name}:ITEM", job_namespace, _data.DataItem)
             data_item_node = DataItemNode(data_item_id, data_view_id)
@@ -377,42 +552,49 @@ class GraphBuilder:
             # Job-level data view is an input to the save operation
             inputs.add(data_view_id)
+            outputs.append(data_result_id)
-        return GraphSection(nodes, inputs=inputs)
+        runtime_outputs = JobOutputs(bundles=outputs)
+        runtime_outputs_id = NodeId.of("trac_runtime_outputs", job_namespace, JobOutputs)
+        runtime_outputs_node = RuntimeOutputsNode(runtime_outputs_id, runtime_outputs)
+        nodes[runtime_outputs_id] = runtime_outputs_node
+        return GraphSection(nodes, inputs=inputs, outputs={runtime_outputs_id})
-    @classmethod
     def build_job_results(
-            cls, job_config: cfg.JobConfig, job_namespace: NodeNamespace, result_spec: JobResultSpec,
+            self,
             objects: tp.Dict[str, NodeId[meta.ObjectDefinition]] = None,
             bundles: tp.List[NodeId[ObjectBundle]] = None,
             explicit_deps: tp.Optional[tp.List[NodeId]] = None) \
             -> GraphSection:
-        build_result_id = NodeId.of("trac_build_result", job_namespace, cfg.JobResult)
+        build_result_id = NodeId.of("trac_job_result", self._job_namespace, cfg.JobResult)
         if objects is not None:
             results_inputs = set(objects.values())
             build_result_node = BuildJobResultNode(
-                build_result_id, job_config.jobId,
-                objects=objects, explicit_deps=explicit_deps)
+                build_result_id, self._job_config.jobId,
+                outputs = JobOutputs(objects=objects),
+                explicit_deps=explicit_deps)
         elif bundles is not None:
             results_inputs = set(bundles)
             build_result_node = BuildJobResultNode(
-                build_result_id, job_config.jobId,
-                bundles=bundles, explicit_deps=explicit_deps)
+                build_result_id, self._job_config.jobId,
+                outputs = JobOutputs(bundles=bundles),
+                explicit_deps=explicit_deps)
         else:
             raise _ex.EUnexpected()
-        save_result_id = NodeId("trac_save_result", job_namespace)
-        save_result_node = SaveJobResultNode(save_result_id, build_result_id, result_spec)
-        if result_spec.save_result:
+        if self._result_spec.save_result:
+            save_result_id = NodeId("trac_save_result", self._job_namespace)
+            save_result_node = SaveJobResultNode(save_result_id, build_result_id, self._result_spec)
             result_nodes = {build_result_id: build_result_node, save_result_id: save_result_node}
             job_result_id = save_result_id
         else:
@@ -421,10 +603,9 @@ class GraphBuilder:
         return GraphSection(result_nodes, inputs=results_inputs, must_run=[job_result_id])
-    @classmethod
     def build_model_or_flow_with_context(
-            cls, job_config: config.JobConfig, namespace: NodeNamespace,
-            model_or_flow_name: str, model_or_flow: meta.ObjectDefinition,
+            self, namespace: NodeNamespace, model_or_flow_name: str,
+            job_def: meta.JobDefinition, model_or_flow: meta.ObjectDefinition,
             input_mapping: tp.Dict[str, NodeId], output_mapping: tp.Dict[str, NodeId],
             explicit_deps: tp.Optional[tp.List[NodeId]] = None) \
             -> GraphSection:
@@ -437,43 +618,46 @@ class GraphBuilder:
         # Execute in the sub-context by doing PUSH, EXEC, POP
         # Note that POP node must be in the sub namespace too
-        push_section = cls.build_context_push(
+        push_section = self.build_context_push(
             sub_namespace, input_mapping,
             explicit_deps)
-        exec_section = cls.build_model_or_flow(
-            job_config, sub_namespace, model_or_flow,
+        exec_section = self.build_model_or_flow(
+            sub_namespace, job_def, model_or_flow,
             explicit_deps=push_section.must_run)
-        pop_section = cls.build_context_pop(
+        pop_section = self.build_context_pop(
             sub_namespace, output_mapping,
             explicit_deps=exec_section.must_run)
-        return cls._join_sections(push_section, exec_section, pop_section)
+        return self._join_sections(push_section, exec_section, pop_section)
-    @classmethod
     def build_model_or_flow(
-            cls, job_config: config.JobConfig, namespace: NodeNamespace,
+            self, namespace: NodeNamespace,
+            job_def: meta.JobDefinition,
             model_or_flow: meta.ObjectDefinition,
             explicit_deps: tp.Optional[tp.List[NodeId]] = None) \
             -> GraphSection:
         if model_or_flow.objectType == meta.ObjectType.MODEL:
-            return cls.build_model(namespace, model_or_flow.model, explicit_deps)
+            return self.build_model(namespace, job_def, model_or_flow.model, explicit_deps)
         elif model_or_flow.objectType == meta.ObjectType.FLOW:
-            return cls.build_flow(job_config, namespace, model_or_flow.flow)
+            return self.build_flow(namespace, job_def, model_or_flow.flow)
         else:
-            raise _ex.EConfigParse("Invalid job config given to the execution engine")
+            message = f"Invalid job config, expected model or flow, got [{model_or_flow.objectType}]"
+            self._error(_ex.EJobValidation(message))
-    @classmethod
     def build_model(
-            cls, namespace: NodeNamespace,
+            self, namespace: NodeNamespace,
+            job_def: meta.JobDefinition,
             model_def: meta.ModelDefinition,
             explicit_deps: tp.Optional[tp.List[NodeId]] = None) \
             -> GraphSection:
+        self.check_model_type(job_def, model_def)
         def param_id(node_name):
             return NodeId(node_name, namespace, meta.Value)
@@ -485,6 +669,14 @@ class GraphBuilder:
         input_ids = set(map(data_id, model_def.inputs))
         output_ids = set(map(data_id, model_def.outputs))
+        # Set up storage access for import / export data jobs
+        if job_def.jobType == meta.JobType.IMPORT_DATA:
+            storage_access = job_def.importData.storageAccess
+        elif job_def.jobType == meta.JobType.EXPORT_DATA:
+            storage_access = job_def.exportData.storageAccess
+        else:
+            storage_access = None
         # Create the model node
         # Always add the prior graph root ID as a dependency
         # This is to ensure dependencies are still pulled in for models with no inputs!
@@ -500,7 +692,8 @@ class GraphBuilder:
         model_node = RunModelNode(
             model_id, model_scope, model_def,
             frozenset(parameter_ids), frozenset(input_ids),
-            explicit_deps=explicit_deps, bundle=model_id.namespace)
+            explicit_deps=explicit_deps, bundle=model_id.namespace,
+            storage_access=storage_access)
         model_result_id = NodeId(f"{model_name}:RESULT", namespace)
         model_result_node = RunModelResultNode(model_result_id, model_id)
@@ -520,9 +713,9 @@ class GraphBuilder:
         # Assemble a graph to include the model and its outputs
         return GraphSection(nodes, inputs={*parameter_ids, *input_ids}, outputs=output_ids, must_run=[model_result_id])
-    @classmethod
     def build_flow(
-            cls, job_config: config.JobConfig, namespace: NodeNamespace,
+            self, namespace: NodeNamespace,
+            job_def: meta.JobDefinition,
             flow_def: meta.FlowDefinition,
             explicit_deps: tp.Optional[tp.List[NodeId]] = None) \
             -> GraphSection:
@@ -555,11 +748,11 @@ class GraphBuilder:
             node_name, node = reachable_nodes.popitem()
-            sub_section = cls.build_flow_node(
-                job_config, namespace, target_edges,
+            sub_section = self.build_flow_node(
+                namespace, job_def, target_edges,
                 node_name, node, explicit_deps)
-            graph_section = cls._join_sections(graph_section, sub_section, allow_partial_inputs=True)
+            graph_section = self._join_sections(graph_section, sub_section, allow_partial_inputs=True)
             if node.nodeType != meta.FlowNodeType.OUTPUT_NODE:
@@ -579,20 +772,18 @@ class GraphBuilder:
             missing_targets = [edge.target for node in remaining_edges_by_target.values() for edge in node]
             missing_target_names = [f"{t.node}.{t.socket}" if t.socket else t.node for t in missing_targets]
             missing_nodes = list(map(lambda n: NodeId(n, namespace), missing_target_names))
-            cls._invalid_graph_error(missing_nodes)
+            self._invalid_graph_error(missing_nodes)
         return graph_section
-    @classmethod
     def build_flow_node(
-            cls, job_config: config.JobConfig, namespace: NodeNamespace,
+            self, namespace: NodeNamespace,
+            job_def: meta.JobDefinition,
             target_edges: tp.Dict[meta.FlowSocket, meta.FlowEdge],
             node_name: str, node: meta.FlowNode,
             explicit_deps: tp.Optional[tp.List[NodeId]] = None) \
             -> GraphSection:
-        flow_job = job_config.job.runFlow
         def socket_key(socket):
             return f"{socket.node}.{socket.socket}" if socket.socket else socket.node
@@ -605,7 +796,7 @@ class GraphBuilder:
             edge = target_edges.get(socket)
             # Report missing edges as a job consistency error (this might happen sometimes in dev mode)
             if edge is None:
-                raise _ex.EJobValidation(f"Inconsistent flow: Socket [{socket}] is not connected")
+                self._error(_ex.EJobValidation(f"Inconsistent flow: Socket [{socket}] is not connected"))
             return socket_id(edge.source.node, edge.source.socket, result_type)
         if node.nodeType == meta.FlowNodeType.PARAMETER_NODE:
@@ -628,27 +819,28 @@ class GraphBuilder:
             push_mapping = {**input_mapping, **param_mapping}
             pop_mapping = output_mapping
-            model_selector = flow_job.models.get(node_name)
-            model_obj = _util.get_job_resource(model_selector, job_config)
+            model_selector = job_def.runFlow.models.get(node_name)
+            model_obj = _util.get_job_resource(model_selector, self._job_config)
             # Missing models in the job config is a job consistency error
             if model_obj is None or model_obj.objectType != meta.ObjectType.MODEL:
-                raise _ex.EJobValidation(f"No model was provided for flow node [{node_name}]")
+                self._error(_ex.EJobValidation(f"No model was provided for flow node [{node_name}]"))
             # Explicit check for model compatibility - report an error now, do not try build_model()
-            cls.check_model_compatibility(model_selector, model_obj.model, node_name, node)
+            self.check_model_compatibility(model_selector, model_obj.model, node_name, node)
+            self.check_model_type(job_def, model_obj.model)
-            return cls.build_model_or_flow_with_context(
-                job_config, namespace, node_name, model_obj,
-                push_mapping, pop_mapping, explicit_deps)
+            return self.build_model_or_flow_with_context(
+                namespace, node_name,
+                job_def, model_obj,
+                push_mapping, pop_mapping,
+                explicit_deps)
-        # Missing / invalid node type - should be caught in static validation
-        raise _ex.ETracInternal(f"Flow node [{node_name}] has invalid node type [{node.nodeType}]")
+        self._error(_ex.EJobValidation(f"Flow node [{node_name}] has invalid node type [{node.nodeType}]"))
-    @classmethod
     def check_model_compatibility(
-            cls, model_selector: meta.TagSelector, model_def: meta.ModelDefinition,
-            node_name: str, flow_node: meta.FlowNode):
+            self, model_selector: meta.TagSelector,
+            model_def: meta.ModelDefinition, node_name: str, flow_node: meta.FlowNode):
         model_params = list(sorted(model_def.parameters.keys()))
         model_inputs = list(sorted(model_def.inputs.keys()))
@@ -660,7 +852,21 @@ class GraphBuilder:
         if model_params != node_params or model_inputs != node_inputs or model_outputs != node_outputs:
             model_key = _util.object_key(model_selector)
-            raise _ex.EJobValidation(f"Incompatible model for flow node [{node_name}] (Model: [{model_key}])")
+            self._error(_ex.EJobValidation(f"Incompatible model for flow node [{node_name}] (Model: [{model_key}])"))
+    def check_model_type(self, job_def: meta.JobDefinition, model_def: meta.ModelDefinition):
+        if job_def.jobType == meta.JobType.IMPORT_DATA:
+            allowed_model_types = [meta.ModelType.DATA_IMPORT_MODEL]
+        elif job_def.jobType == meta.JobType.EXPORT_DATA:
+            allowed_model_types = [meta.ModelType.DATA_EXPORT_MODEL]
+        else:
+            allowed_model_types = [meta.ModelType.STANDARD_MODEL]
+        if model_def.modelType not in allowed_model_types:
+            job_type = job_def.jobType.name
+            model_type = model_def.modelType.name
+            self._error(_ex.EJobValidation(f"Job type [{job_type}] cannot use model type [{model_type}]"))
     @staticmethod
     def build_context_push(
@@ -722,8 +928,7 @@ class GraphBuilder:
             outputs={*pop_mapping.values()},
             must_run=[pop_id])
-    @classmethod
-    def _join_sections(cls, *sections: GraphSection, allow_partial_inputs: bool = False):
+    def _join_sections(self, *sections: GraphSection, allow_partial_inputs: bool = False):
         n_sections = len(sections)
         first_section = sections[0]
@@ -745,7 +950,7 @@ class GraphBuilder:
                 if allow_partial_inputs:
                     inputs.update(requirements_not_met)
                 else:
-                    cls._invalid_graph_error(requirements_not_met)
+                    self._invalid_graph_error(requirements_not_met)
             nodes.update(current_section.nodes)
@@ -754,13 +959,12 @@ class GraphBuilder:
         return GraphSection(nodes, inputs, last_section.outputs, must_run)
-    @classmethod
-    def _invalid_graph_error(cls, missing_dependencies: tp.Iterable[NodeId]):
+    def _invalid_graph_error(self, missing_dependencies: tp.Iterable[NodeId]):
-        missing_ids = ", ".join(map(cls._missing_item_display_name, missing_dependencies))
-        message = f"Invalid job config: The execution graph has unsatisfied dependencies: [{missing_ids}]"
+        missing_ids = ", ".join(map(self._missing_item_display_name, missing_dependencies))
+        message = f"The execution graph has unsatisfied dependencies: [{missing_ids}]"
-        raise _ex.EJobValidation(message)
+        self._error(_ex.EJobValidation(message))
     @classmethod
     def _missing_item_display_name(cls, node_id: NodeId):
@@ -775,3 +979,7 @@ class GraphBuilder:
             return node_id.name
         else:
             return f"{node_id.name} / {', '.join(components[:-1])}"
+    def _error(self, error: Exception):
+        self._errors.append(error)

tracdap-runtime 0.6.4__py3-none-any.whl → 0.6.6__py3-none-any.whl

tracdap-runtime 0.6.4py3-none-any.whl → 0.6.6py3-none-any.whl