PyPI - tracdap-runtime - Versions diffs - 0.6.5__py3-none-any.whl → 0.7.0__py3-none-any.whl - Mend

tracdap-runtime 0.6.5py3-none-any.whl → 0.7.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (89) hide show

tracdap/rt/__init__.py +6 -5
tracdap/rt/_exec/actors.py +6 -5
tracdap/rt/_exec/context.py +278 -110
tracdap/rt/_exec/dev_mode.py +237 -143
tracdap/rt/_exec/engine.py +223 -64
tracdap/rt/_exec/functions.py +31 -6
tracdap/rt/_exec/graph.py +15 -5
tracdap/rt/_exec/graph_builder.py +301 -203
tracdap/rt/_exec/runtime.py +13 -10
tracdap/rt/_exec/server.py +6 -5
tracdap/rt/_impl/__init__.py +6 -5
tracdap/rt/_impl/config_parser.py +17 -9
tracdap/rt/_impl/data.py +284 -172
tracdap/rt/_impl/ext/__init__.py +14 -0
tracdap/rt/_impl/ext/sql.py +117 -0
tracdap/rt/_impl/ext/storage.py +58 -0
tracdap/rt/_impl/grpc/__init__.py +6 -5
tracdap/rt/_impl/grpc/codec.py +6 -5
tracdap/rt/_impl/grpc/tracdap/metadata/job_pb2.py +62 -54
tracdap/rt/_impl/grpc/tracdap/metadata/job_pb2.pyi +37 -2
tracdap/rt/_impl/guard_rails.py +6 -5
tracdap/rt/_impl/models.py +6 -5
tracdap/rt/_impl/repos.py +6 -5
tracdap/rt/_impl/schemas.py +6 -5
tracdap/rt/_impl/shim.py +6 -5
tracdap/rt/_impl/static_api.py +30 -16
tracdap/rt/_impl/storage.py +8 -7
tracdap/rt/_impl/type_system.py +6 -5
tracdap/rt/_impl/util.py +16 -5
tracdap/rt/_impl/validation.py +72 -18
tracdap/rt/_plugins/__init__.py +6 -5
tracdap/rt/_plugins/_helpers.py +6 -5
tracdap/rt/_plugins/config_local.py +6 -5
tracdap/rt/_plugins/format_arrow.py +6 -5
tracdap/rt/_plugins/format_csv.py +6 -5
tracdap/rt/_plugins/format_parquet.py +6 -5
tracdap/rt/_plugins/repo_git.py +6 -5
tracdap/rt/_plugins/repo_local.py +6 -5
tracdap/rt/_plugins/repo_pypi.py +6 -5
tracdap/rt/_plugins/storage_aws.py +6 -5
tracdap/rt/_plugins/storage_azure.py +6 -5
tracdap/rt/_plugins/storage_gcp.py +6 -5
tracdap/rt/_plugins/storage_local.py +6 -5
tracdap/rt/_plugins/storage_sql.py +418 -0
tracdap/rt/_plugins/storage_sql_dialects.py +118 -0
tracdap/rt/_version.py +7 -6
tracdap/rt/api/__init__.py +23 -5
tracdap/rt/api/experimental.py +85 -37
tracdap/rt/api/hook.py +16 -5
tracdap/rt/api/model_api.py +110 -90
tracdap/rt/api/static_api.py +142 -100
tracdap/rt/config/common.py +26 -27
tracdap/rt/config/job.py +5 -6
tracdap/rt/config/platform.py +41 -42
tracdap/rt/config/result.py +5 -6
tracdap/rt/config/runtime.py +6 -7
tracdap/rt/exceptions.py +13 -7
tracdap/rt/ext/__init__.py +6 -5
tracdap/rt/ext/config.py +6 -5
tracdap/rt/ext/embed.py +6 -5
tracdap/rt/ext/plugins.py +6 -5
tracdap/rt/ext/repos.py +6 -5
tracdap/rt/ext/storage.py +6 -5
tracdap/rt/launch/__init__.py +10 -5
tracdap/rt/launch/__main__.py +6 -5
tracdap/rt/launch/cli.py +6 -5
tracdap/rt/launch/launch.py +38 -15
tracdap/rt/metadata/__init__.py +4 -0
tracdap/rt/metadata/common.py +2 -3
tracdap/rt/metadata/custom.py +3 -4
tracdap/rt/metadata/data.py +30 -31
tracdap/rt/metadata/file.py +6 -7
tracdap/rt/metadata/flow.py +22 -23
tracdap/rt/metadata/job.py +89 -45
tracdap/rt/metadata/model.py +26 -27
tracdap/rt/metadata/object.py +11 -12
tracdap/rt/metadata/object_id.py +23 -24
tracdap/rt/metadata/resource.py +0 -1
tracdap/rt/metadata/search.py +15 -16
tracdap/rt/metadata/stoarge.py +22 -23
tracdap/rt/metadata/tag.py +8 -9
tracdap/rt/metadata/tag_update.py +11 -12
tracdap/rt/metadata/type.py +38 -38
{tracdap_runtime-0.6.5.dist-info → tracdap_runtime-0.7.0.dist-info}/LICENSE +1 -1
{tracdap_runtime-0.6.5.dist-info → tracdap_runtime-0.7.0.dist-info}/METADATA +4 -2
tracdap_runtime-0.7.0.dist-info/RECORD +121 -0
{tracdap_runtime-0.6.5.dist-info → tracdap_runtime-0.7.0.dist-info}/WHEEL +1 -1
tracdap_runtime-0.6.5.dist-info/RECORD +0 -116
{tracdap_runtime-0.6.5.dist-info → tracdap_runtime-0.7.0.dist-info}/top_level.txt +0 -0

tracdap/rt/_exec/graph_builder.py CHANGED Viewed

@@ -1,8 +1,9 @@
-#  Copyright 2022 Accenture Global Solutions Limited
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
+#  Licensed to the Fintech Open Source Foundation (FINOS) under one or
+#  more contributor license agreements. See the NOTICE file distributed
+#  with this work for additional information regarding copyright ownership.
+#  FINOS licenses this file to you under the Apache License, Version 2.0
+#  (the "License"); you may not use this file except in compliance with the
+#  License. You may obtain a copy of the License at
 #
 #      http://www.apache.org/licenses/LICENSE-2.0
 #
@@ -22,75 +23,111 @@ from .graph import *
 class GraphBuilder:
-    __JOB_BUILD_FUNC = tp.Callable[
-        [config.JobConfig, JobResultSpec, NodeNamespace, NodeId],
-        GraphSection]
+    __JOB_DETAILS = tp.TypeVar(
+        "__JOB_DETAILS",
+        meta.RunModelJob,
+        meta.RunFlowJob,
+        meta.ImportModelJob,
+        meta.ImportDataJob,
+        meta.ExportDataJob)
-    @classmethod
-    def build_job(
-            cls, job_config: config.JobConfig,
-            result_spec: JobResultSpec) -> Graph:
+    __JOB_BUILD_FUNC = tp.Callable[[meta.JobDefinition, NodeId], GraphSection]
-        if job_config.job.jobType == meta.JobType.IMPORT_MODEL:
-            return cls.build_standard_job(job_config, result_spec, cls.build_import_model_job)
+    def __init__(self, job_config: config.JobConfig, result_spec: JobResultSpec):
-        if job_config.job.jobType == meta.JobType.RUN_MODEL:
-            return cls.build_standard_job(job_config, result_spec, cls.build_run_model_job)
+        self._job_config = job_config
+        self._result_spec = result_spec
-        if job_config.job.jobType == meta.JobType.RUN_FLOW:
-            return cls.build_standard_job(job_config, result_spec, cls.build_run_flow_job)
+        self._job_key = _util.object_key(job_config.jobId)
+        self._job_namespace = NodeNamespace(self._job_key)
-        if job_config.job.jobType in [meta.JobType.IMPORT_DATA, meta.JobType.EXPORT_DATA]:
-            return cls.build_standard_job(job_config, result_spec, cls.build_import_export_data_job)
+        self._errors = []
-        raise _ex.EConfigParse(f"Job type [{job_config.job.jobType}] is not supported yet")
+    def _child_builder(self, job_id: meta.TagHeader) -> "GraphBuilder":
-    @classmethod
-    def build_standard_job(
-            cls, job_config: config.JobConfig, result_spec: JobResultSpec,
-            build_func: __JOB_BUILD_FUNC):
+        builder = GraphBuilder(self._job_config, JobResultSpec(save_result=False))
+        builder._job_key = _util.object_key(job_id)
+        builder._job_namespace = NodeNamespace(builder._job_key)
-        # Set up the job context
+        return builder
+    def build_job(self, job_def: meta.JobDefinition,) -> Graph:
+        try:
+            if job_def.jobType == meta.JobType.IMPORT_MODEL:
+                return self.build_standard_job(job_def, self.build_import_model_job)
+            if job_def.jobType == meta.JobType.RUN_MODEL:
+                return self.build_standard_job(job_def, self.build_run_model_job)
+            if job_def.jobType == meta.JobType.RUN_FLOW:
+                return self.build_standard_job(job_def, self.build_run_flow_job)
+            if job_def.jobType in [meta.JobType.IMPORT_DATA, meta.JobType.EXPORT_DATA]:
+                return self.build_standard_job(job_def, self.build_import_export_data_job)
+            if job_def.jobType == meta.JobType.JOB_GROUP:
+                return self.build_standard_job(job_def, self.build_job_group)
-        job_key = _util.object_key(job_config.jobId)
-        job_namespace = NodeNamespace(job_key)
+            self._error(_ex.EJobValidation(f"Job type [{job_def.jobType.name}] is not supported yet"))
-        push_id = NodeId("trac_job_push", job_namespace, Bundle[tp.Any])
-        push_node = ContextPushNode(push_id, job_namespace)
+        except Exception as e:
+            # If there are recorded, errors, assume unhandled exceptions are a result of those
+            # Only report the recorded errors, to reduce noise
+            if any(self._errors):
+                pass
+            # If no errors are recorded, an exception here would be a bug
+            raise _ex.ETracInternal(f"Unexpected error preparing the job execution graph") from e
+        finally:
+            if any(self._errors):
+                if len(self._errors) == 1:
+                    raise self._errors[0]
+                else:
+                    err_text = "\n".join(map(str, self._errors))
+                    raise _ex.EJobValidation("Invalid job configuration\n" + err_text)
+    def build_standard_job(self, job_def: meta.JobDefinition, build_func: __JOB_BUILD_FUNC):
+        # Set up the job context
+        push_id = NodeId("trac_job_push", self._job_namespace, Bundle[tp.Any])
+        push_node = ContextPushNode(push_id, self._job_namespace)
         push_section = GraphSection({push_id: push_node}, must_run=[push_id])
         # Build the execution graphs for the main job and results recording
-        main_section = build_func(job_config, result_spec, job_namespace, push_id)
-        main_result_id = NodeId.of("trac_build_result", job_namespace, config.JobResult)
+        main_section = build_func(job_def, push_id)
+        main_result_id = NodeId.of("trac_job_result", self._job_namespace, config.JobResult)
         # Clean up the job context
-        global_result_id = NodeId.of(job_key, NodeNamespace.root(), config.JobResult)
+        global_result_id = NodeId.of(self._job_key, NodeNamespace.root(), config.JobResult)
-        pop_id = NodeId("trac_job_pop", job_namespace, Bundle[tp.Any])
+        pop_id = NodeId("trac_job_pop", self._job_namespace, Bundle[tp.Any])
         pop_mapping = {main_result_id: global_result_id}
         pop_node = ContextPopNode(
-            pop_id, job_namespace, pop_mapping,
+            pop_id, self._job_namespace, pop_mapping,
             explicit_deps=main_section.must_run,
             bundle=NodeNamespace.root())
-        global_result_node = BundleItemNode(global_result_id, pop_id, job_key)
+        global_result_node = BundleItemNode(global_result_id, pop_id, self._job_key)
         pop_section = GraphSection({
             pop_id: pop_node,
             global_result_id: global_result_node})
-        job = cls._join_sections(push_section, main_section, pop_section)
+        job = self._join_sections(push_section, main_section, pop_section)
         return Graph(job.nodes, global_result_id)
-    @classmethod
-    def build_import_model_job(
-            cls, job_config: config.JobConfig, result_spec: JobResultSpec,
-            job_namespace: NodeNamespace, job_push_id: NodeId) \
-            -> GraphSection:
+    def build_import_model_job(self, job_def: meta.JobDefinition, job_push_id: NodeId) -> GraphSection:
         # Main section: run the model import
@@ -98,82 +135,142 @@ class GraphBuilder:
         new_model_id = _util.new_object_id(meta.ObjectType.MODEL)
         new_model_key = _util.object_key(new_model_id)
-        model_scope = _util.object_key(job_config.jobId)
-        import_details = job_config.job.importModel
+        model_scope = self._job_key
+        import_details = job_def.importModel
-        import_id = NodeId.of("trac_import_model", job_namespace, meta.ObjectDefinition)
+        import_id = NodeId.of("trac_import_model", self._job_namespace, meta.ObjectDefinition)
         import_node = ImportModelNode(import_id, model_scope, import_details, explicit_deps=[job_push_id])
         main_section = GraphSection(nodes={import_id: import_node})
         # Build job-level metadata outputs
-        result_section = cls.build_job_results(
-            job_config, job_namespace, result_spec,
+        result_section = self.build_job_results(
             objects={new_model_key: import_id},
             explicit_deps=[job_push_id, *main_section.must_run])
-        return cls._join_sections(main_section, result_section)
+        return self._join_sections(main_section, result_section)
-    @classmethod
-    def build_import_export_data_job(
-            cls, job_config: config.JobConfig, result_spec: JobResultSpec,
-            job_namespace: NodeNamespace, job_push_id: NodeId) \
-            -> GraphSection:
+    def build_import_export_data_job(self, job_def: meta.JobDefinition, job_push_id: NodeId) -> GraphSection:
         # TODO: These are processed as regular calculation jobs for now
         # That might be ok, but is worth reviewing
-        if job_config.job.jobType == meta.JobType.IMPORT_DATA:
-            job_def = job_config.job.importData
+        if job_def.jobType == meta.JobType.IMPORT_DATA:
+            job_details = job_def.importData
         else:
-            job_def = job_config.job.exportData
+            job_details = job_def.exportData
-        target_selector = job_def.model
-        target_obj = _util.get_job_resource(target_selector, job_config)
+        target_selector = job_details.model
+        target_obj = _util.get_job_resource(target_selector, self._job_config)
         target_def = target_obj.model
-        return cls.build_calculation_job(
-            job_config, result_spec, job_namespace, job_push_id,
-            target_selector, target_def, job_def)
+        return self.build_calculation_job(
+            job_def, job_push_id,
+            target_selector, target_def,
+            job_details)
-    @classmethod
-    def build_run_model_job(
-            cls, job_config: config.JobConfig, result_spec: JobResultSpec,
-            job_namespace: NodeNamespace, job_push_id: NodeId) \
-            -> GraphSection:
+    def build_run_model_job(self, job_def: meta.JobDefinition, job_push_id: NodeId) -> GraphSection:
+        job_details = job_def.runModel
-        target_selector = job_config.job.runModel.model
-        target_obj = _util.get_job_resource(target_selector, job_config)
+        target_selector = job_details.model
+        target_obj = _util.get_job_resource(target_selector, self._job_config)
         target_def = target_obj.model
-        job_def = job_config.job.runModel
-        return cls.build_calculation_job(
-            job_config, result_spec, job_namespace, job_push_id,
-            target_selector, target_def, job_def)
+        return self.build_calculation_job(
+            job_def, job_push_id,
+            target_selector, target_def,
+            job_details)
-    @classmethod
-    def build_run_flow_job(
-            cls, job_config: config.JobConfig, result_spec: JobResultSpec,
-            job_namespace: NodeNamespace, job_push_id: NodeId) \
-            -> GraphSection:
+    def build_run_flow_job(self, job_def: meta.JobDefinition, job_push_id: NodeId) -> GraphSection:
+        job_details = job_def.runFlow
-        target_selector = job_config.job.runFlow.flow
-        target_obj = _util.get_job_resource(target_selector, job_config)
+        target_selector = job_details.flow
+        target_obj = _util.get_job_resource(target_selector, self._job_config)
         target_def = target_obj.flow
-        job_def = job_config.job.runFlow
-        return cls.build_calculation_job(
-            job_config, result_spec, job_namespace, job_push_id,
-            target_selector, target_def, job_def)
+        return self.build_calculation_job(
+            job_def, job_push_id,
+            target_selector, target_def,
+            job_details)
+    def build_job_group(self, job_def: meta.JobDefinition, job_push_id: NodeId) -> GraphSection:
+        job_group = job_def.jobGroup
+        if job_group.jobGroupType == meta.JobGroupType.SEQUENTIAL_JOB_GROUP:
+            return self.build_sequential_job_group(job_group, job_push_id)
+        if job_group.jobGroupType == meta.JobGroupType.PARALLEL_JOB_GROUP:
+            return self.build_parallel_job_group(job_group, job_push_id)
+        else:
+            self._error(_ex.EJobValidation(f"Job group type [{job_group.jobGroupType.name}] is not supported yet"))
+            return GraphSection(dict(), inputs={job_push_id})
+    def build_sequential_job_group(self, job_group: meta.JobGroup, job_push_id: NodeId) -> GraphSection:
+        nodes = dict()
+        prior_id = job_push_id
+        for child_def in job_group.sequential.jobs:
+            child_node = self.build_child_job(child_def, explicit_deps=[prior_id])
+            nodes[child_node.id] = child_node
+            prior_id = child_node.id
+        # No real results from job groups yet (they cannot be executed from the platform)
+        job_result =  cfg.JobResult()
+        result_id = NodeId.of("trac_job_result", self._job_namespace, cfg.JobResult)
+        result_node = StaticValueNode(result_id, job_result, explicit_deps=[prior_id])
+        nodes[result_id] = result_node
+        return GraphSection(nodes, inputs={job_push_id}, outputs={result_id})
+    def build_parallel_job_group(self, job_group: meta.JobGroup, job_push_id: NodeId) -> GraphSection:
+        nodes = dict()
+        parallel_ids = [job_push_id]
+        for child_def in job_group.parallel.jobs:
+            child_node = self.build_child_job(child_def, explicit_deps=[job_push_id])
+            nodes[child_node.id] = child_node
+            parallel_ids.append(child_node.id)
+        # No real results from job groups yet (they cannot be executed from the platform)
+        job_result =  cfg.JobResult()
+        result_id = NodeId.of("trac_job_result", self._job_namespace, cfg.JobResult)
+        result_node = StaticValueNode(result_id, job_result, explicit_deps=parallel_ids)
+        nodes[result_id] = result_node
+        return GraphSection(nodes, inputs={job_push_id}, outputs={result_id})
+    def build_child_job(self, child_job_def: meta.JobDefinition, explicit_deps) -> Node[config.JobResult]:
+        child_job_id = _util.new_object_id(meta.ObjectType.JOB)
+        child_builder = self._child_builder(child_job_id)
+        child_graph = child_builder.build_job(child_job_def)
+        child_node_name = _util.object_key(child_job_id)
+        child_node_id = NodeId.of(child_node_name, self._job_namespace, cfg.JobResult)
+        child_node = ChildJobNode(
+            child_node_id, child_job_id, child_job_def,
+            child_graph, explicit_deps)
+        return child_node
-    @classmethod
     def build_calculation_job(
-            cls, job_config: config.JobConfig, result_spec: JobResultSpec,
-            job_namespace: NodeNamespace, job_push_id: NodeId,
+            self, job_def: meta.JobDefinition, job_push_id: NodeId,
             target_selector: meta.TagSelector,
             target_def: tp.Union[meta.ModelDefinition, meta.FlowDefinition],
-            job_def: tp.Union[meta.RunModelJob, meta.RunFlowJob]) \
+            job_details: __JOB_DETAILS) \
             -> GraphSection:
         # The main execution graph can run directly in the job context, no need to do a context push
@@ -185,29 +282,30 @@ class GraphBuilder:
         required_inputs = target_def.inputs
         required_outputs = target_def.outputs
-        provided_params = job_def.parameters
-        provided_inputs = job_def.inputs
-        provided_outputs = job_def.outputs
+        provided_params = job_details.parameters
+        provided_inputs = job_details.inputs
+        provided_outputs = job_details.outputs
-        params_section = cls.build_job_parameters(
-            job_namespace, required_params, provided_params,
+        params_section = self.build_job_parameters(
+            required_params, provided_params,
             explicit_deps=[job_push_id])
-        input_section = cls.build_job_inputs(
-            job_config, job_namespace, required_inputs, provided_inputs,
+        input_section = self.build_job_inputs(
+            required_inputs, provided_inputs,
             explicit_deps=[job_push_id])
-        exec_obj = _util.get_job_resource(target_selector, job_config)
+        exec_namespace = self._job_namespace
+        exec_obj = _util.get_job_resource(target_selector, self._job_config)
-        exec_section = cls.build_model_or_flow(
-            job_config, job_namespace, exec_obj,
+        exec_section = self.build_model_or_flow(
+            exec_namespace, job_def, exec_obj,
             explicit_deps=[job_push_id])
-        output_section = cls.build_job_outputs(
-            job_config, job_namespace, required_outputs, provided_outputs,
+        output_section = self.build_job_outputs(
+            required_outputs, provided_outputs,
             explicit_deps=[job_push_id])
-        main_section = cls._join_sections(params_section, input_section, exec_section, output_section)
+        main_section = self._join_sections(params_section, input_section, exec_section, output_section)
         # Build job-level metadata outputs
@@ -215,16 +313,14 @@ class GraphBuilder:
             nid for nid, n in main_section.nodes.items()
             if isinstance(n, DataResultNode))
-        result_section = cls.build_job_results(
-            job_config, job_namespace,
-            result_spec, bundles=data_result_ids,
+        result_section = self.build_job_results(
+            bundles=data_result_ids,
             explicit_deps=[job_push_id, *main_section.must_run])
-        return cls._join_sections(main_section, result_section)
+        return self._join_sections(main_section, result_section)
-    @classmethod
     def build_job_parameters(
-            cls, job_namespace: NodeNamespace,
+            self,
             required_params: tp.Dict[str, meta.ModelParameter],
             supplied_params: tp.Dict[str, meta.Value],
             explicit_deps: tp.Optional[tp.List[NodeId]] = None) \
@@ -240,18 +336,18 @@ class GraphBuilder:
                 if param_schema.defaultValue is not None:
                     param_def = param_schema.defaultValue
                 else:
-                    raise _ex.EJobValidation(f"Missing required parameter: [{param_name}]")
+                    self._error(_ex.EJobValidation(f"Missing required parameter: [{param_name}]"))
+                    continue
-            param_id = NodeId(param_name, job_namespace, meta.Value)
+            param_id = NodeId(param_name, self._job_namespace, meta.Value)
             param_node = StaticValueNode(param_id, param_def, explicit_deps=explicit_deps)
             nodes[param_id] = param_node
         return GraphSection(nodes, outputs=set(nodes.keys()), must_run=list(nodes.keys()))
-    @classmethod
     def build_job_inputs(
-            cls, job_config: config.JobConfig, job_namespace: NodeNamespace,
+            self,
             required_inputs: tp.Dict[str, meta.ModelInputSchema],
             supplied_inputs: tp.Dict[str, meta.TagSelector],
             explicit_deps: tp.Optional[tp.List[NodeId]] = None) \
@@ -267,20 +363,21 @@ class GraphBuilder:
             if data_selector is None:
                 if input_schema.optional:
-                    data_view_id = NodeId.of(input_name, job_namespace, _data.DataView)
+                    data_view_id = NodeId.of(input_name, self._job_namespace, _data.DataView)
                     nodes[data_view_id] = StaticValueNode(data_view_id, _data.DataView.create_empty())
                     outputs.add(data_view_id)
                     continue
                 else:
-                    raise _ex.EJobValidation(f"Missing required input: [{input_name}]")
+                    self._error(_ex.EJobValidation(f"Missing required input: [{input_name}]"))
+                    continue
             # Build a data spec using metadata from the job config
             # For now we are always loading the root part, snap 0, delta 0
-            data_def = _util.get_job_resource(data_selector, job_config).data
-            storage_def = _util.get_job_resource(data_def.storageId, job_config).storage
+            data_def = _util.get_job_resource(data_selector, self._job_config).data
+            storage_def = _util.get_job_resource(data_def.storageId, self._job_config).storage
             if data_def.schemaId:
-                schema_def = _util.get_job_resource(data_def.schemaId, job_config).schema
+                schema_def = _util.get_job_resource(data_def.schemaId, self._job_config).schema
             else:
                 schema_def = data_def.schema
@@ -289,16 +386,16 @@ class GraphBuilder:
             data_spec = _data.DataSpec(data_item, data_def, storage_def, schema_def)
             # Data spec node is static, using the assembled data spec
-            data_spec_id = NodeId.of(f"{input_name}:SPEC", job_namespace, _data.DataSpec)
+            data_spec_id = NodeId.of(f"{input_name}:SPEC", self._job_namespace, _data.DataSpec)
             data_spec_node = StaticValueNode(data_spec_id, data_spec, explicit_deps=explicit_deps)
             # Physical load of data items from disk
             # Currently one item per input, since inputs are single part/delta
-            data_load_id = NodeId.of(f"{input_name}:LOAD", job_namespace, _data.DataItem)
+            data_load_id = NodeId.of(f"{input_name}:LOAD", self._job_namespace, _data.DataItem)
             data_load_node = LoadDataNode(data_load_id, data_spec_id, explicit_deps=explicit_deps)
             # Input views assembled by mapping one root part to each view
-            data_view_id = NodeId.of(input_name, job_namespace, _data.DataView)
+            data_view_id = NodeId.of(input_name, self._job_namespace, _data.DataView)
             data_view_node = DataViewNode(data_view_id, schema_def, data_load_id)
             nodes[data_spec_id] = data_spec_node
@@ -311,9 +408,8 @@ class GraphBuilder:
         return GraphSection(nodes, outputs=outputs, must_run=must_run)
-    @classmethod
     def build_job_outputs(
-            cls, job_config: config.JobConfig, job_namespace: NodeNamespace,
+            self,
             required_outputs: tp.Dict[str, meta.ModelOutputSchema],
             supplied_outputs: tp.Dict[str, meta.TagSelector],
             explicit_deps: tp.Optional[tp.List[NodeId]] = None) \
@@ -329,25 +425,27 @@ class GraphBuilder:
             if data_selector is None:
                 if output_schema.optional:
                     optional_info = "(configuration is required for all optional outputs, in case they are produced)"
-                    raise _ex.EJobValidation(f"Missing optional output: [{output_name}] {optional_info}")
+                    self._error(_ex.EJobValidation(f"Missing optional output: [{output_name}] {optional_info}"))
+                    continue
                 else:
-                    raise _ex.EJobValidation(f"Missing required output: [{output_name}]")
+                    self._error(_ex.EJobValidation(f"Missing required output: [{output_name}]"))
+                    continue
             # Output data view must already exist in the namespace
-            data_view_id = NodeId.of(output_name, job_namespace, _data.DataView)
-            data_spec_id = NodeId.of(f"{output_name}:SPEC", job_namespace, _data.DataSpec)
+            data_view_id = NodeId.of(output_name, self._job_namespace, _data.DataView)
+            data_spec_id = NodeId.of(f"{output_name}:SPEC", self._job_namespace, _data.DataSpec)
-            data_obj = _util.get_job_resource(data_selector, job_config, optional=True)
+            data_obj = _util.get_job_resource(data_selector, self._job_config, optional=True)
             if data_obj is not None:
                 # If data def for the output has been built in advance, use a static data spec
                 data_def = data_obj.data
-                storage_def = _util.get_job_resource(data_def.storageId, job_config).storage
+                storage_def = _util.get_job_resource(data_def.storageId, self._job_config).storage
                 if data_def.schemaId:
-                    schema_def = _util.get_job_resource(data_def.schemaId, job_config).schema
+                    schema_def = _util.get_job_resource(data_def.schemaId, self._job_config).schema
                 else:
                     schema_def = data_def.schema
@@ -366,28 +464,28 @@ class GraphBuilder:
                 # Dynamic data def will always use an embedded schema (this is no ID for an external schema)
                 data_key = output_name + ":DATA"
-                data_id = job_config.resultMapping[data_key]
+                data_id = self._job_config.resultMapping[data_key]
                 storage_key = output_name + ":STORAGE"
-                storage_id = job_config.resultMapping[storage_key]
+                storage_id = self._job_config.resultMapping[storage_key]
                 data_spec_node = DynamicDataSpecNode(
-                        data_spec_id, data_view_id,
-                        data_id, storage_id,
-                        prior_data_spec=None,
-                        explicit_deps=explicit_deps)
+                    data_spec_id, data_view_id,
+                    data_id, storage_id,
+                    prior_data_spec=None,
+                    explicit_deps=explicit_deps)
                 output_data_key = _util.object_key(data_id)
                 output_storage_key = _util.object_key(storage_id)
             # Map one data item from each view, since outputs are single part/delta
-            data_item_id = NodeId(f"{output_name}:ITEM", job_namespace, _data.DataItem)
+            data_item_id = NodeId(f"{output_name}:ITEM", self._job_namespace, _data.DataItem)
             data_item_node = DataItemNode(data_item_id, data_view_id)
             # Create a physical save operation for the data item
-            data_save_id = NodeId.of(f"{output_name}:SAVE", job_namespace, None)
+            data_save_id = NodeId.of(f"{output_name}:SAVE", self._job_namespace, None)
             data_save_node = SaveDataNode(data_save_id, data_spec_id, data_item_id)
-            data_result_id = NodeId.of(f"{output_name}:RESULT", job_namespace, ObjectBundle)
+            data_result_id = NodeId.of(f"{output_name}:RESULT", self._job_namespace, ObjectBundle)
             data_result_node = DataResultNode(
                 data_result_id, output_name,
                 data_item_id, data_spec_id, data_save_id,
@@ -406,6 +504,9 @@ class GraphBuilder:
     @classmethod
     def build_runtime_outputs(cls, output_names: tp.List[str], job_namespace: NodeNamespace):
+        # This method is called dynamically during job execution
+        # So it cannot use stateful information like self._job_config or self._job_namespace
         # TODO: Factor out common logic with regular job outputs (including static / dynamic)
         nodes = {}
@@ -462,22 +563,21 @@ class GraphBuilder:
         return GraphSection(nodes, inputs=inputs, outputs={runtime_outputs_id})
-    @classmethod
     def build_job_results(
-            cls, job_config: cfg.JobConfig, job_namespace: NodeNamespace, result_spec: JobResultSpec,
+            self,
             objects: tp.Dict[str, NodeId[meta.ObjectDefinition]] = None,
             bundles: tp.List[NodeId[ObjectBundle]] = None,
             explicit_deps: tp.Optional[tp.List[NodeId]] = None) \
             -> GraphSection:
-        build_result_id = NodeId.of("trac_build_result", job_namespace, cfg.JobResult)
+        build_result_id = NodeId.of("trac_job_result", self._job_namespace, cfg.JobResult)
         if objects is not None:
             results_inputs = set(objects.values())
             build_result_node = BuildJobResultNode(
-                build_result_id, job_config.jobId,
+                build_result_id, self._job_config.jobId,
                 outputs = JobOutputs(objects=objects),
                 explicit_deps=explicit_deps)
@@ -486,17 +586,16 @@ class GraphBuilder:
             results_inputs = set(bundles)
             build_result_node = BuildJobResultNode(
-                build_result_id, job_config.jobId,
+                build_result_id, self._job_config.jobId,
                 outputs = JobOutputs(bundles=bundles),
                 explicit_deps=explicit_deps)
         else:
             raise _ex.EUnexpected()
-        save_result_id = NodeId("trac_save_result", job_namespace)
-        save_result_node = SaveJobResultNode(save_result_id, build_result_id, result_spec)
-        if result_spec.save_result:
+        if self._result_spec.save_result:
+            save_result_id = NodeId("trac_save_result", self._job_namespace)
+            save_result_node = SaveJobResultNode(save_result_id, build_result_id, self._result_spec)
             result_nodes = {build_result_id: build_result_node, save_result_id: save_result_node}
             job_result_id = save_result_id
         else:
@@ -505,10 +604,9 @@ class GraphBuilder:
         return GraphSection(result_nodes, inputs=results_inputs, must_run=[job_result_id])
-    @classmethod
     def build_model_or_flow_with_context(
-            cls, job_config: config.JobConfig, namespace: NodeNamespace,
-            model_or_flow_name: str, model_or_flow: meta.ObjectDefinition,
+            self, namespace: NodeNamespace, model_or_flow_name: str,
+            job_def: meta.JobDefinition, model_or_flow: meta.ObjectDefinition,
             input_mapping: tp.Dict[str, NodeId], output_mapping: tp.Dict[str, NodeId],
             explicit_deps: tp.Optional[tp.List[NodeId]] = None) \
             -> GraphSection:
@@ -521,44 +619,45 @@ class GraphBuilder:
         # Execute in the sub-context by doing PUSH, EXEC, POP
         # Note that POP node must be in the sub namespace too
-        push_section = cls.build_context_push(
+        push_section = self.build_context_push(
             sub_namespace, input_mapping,
             explicit_deps)
-        exec_section = cls.build_model_or_flow(
-            job_config, sub_namespace, model_or_flow,
+        exec_section = self.build_model_or_flow(
+            sub_namespace, job_def, model_or_flow,
             explicit_deps=push_section.must_run)
-        pop_section = cls.build_context_pop(
+        pop_section = self.build_context_pop(
             sub_namespace, output_mapping,
             explicit_deps=exec_section.must_run)
-        return cls._join_sections(push_section, exec_section, pop_section)
+        return self._join_sections(push_section, exec_section, pop_section)
-    @classmethod
     def build_model_or_flow(
-            cls, job_config: config.JobConfig, namespace: NodeNamespace,
+            self, namespace: NodeNamespace,
+            job_def: meta.JobDefinition,
             model_or_flow: meta.ObjectDefinition,
             explicit_deps: tp.Optional[tp.List[NodeId]] = None) \
             -> GraphSection:
         if model_or_flow.objectType == meta.ObjectType.MODEL:
-            return cls.build_model(job_config, namespace, model_or_flow.model, explicit_deps)
+            return self.build_model(namespace, job_def, model_or_flow.model, explicit_deps)
         elif model_or_flow.objectType == meta.ObjectType.FLOW:
-            return cls.build_flow(job_config, namespace, model_or_flow.flow)
+            return self.build_flow(namespace, job_def, model_or_flow.flow)
         else:
-            raise _ex.EConfigParse("Invalid job config given to the execution engine")
+            message = f"Invalid job config, expected model or flow, got [{model_or_flow.objectType}]"
+            self._error(_ex.EJobValidation(message))
-    @classmethod
     def build_model(
-            cls, job_config: config.JobConfig, namespace: NodeNamespace,
+            self, namespace: NodeNamespace,
+            job_def: meta.JobDefinition,
             model_def: meta.ModelDefinition,
             explicit_deps: tp.Optional[tp.List[NodeId]] = None) \
             -> GraphSection:
-        cls.check_model_type(job_config, model_def)
+        self.check_model_type(job_def, model_def)
         def param_id(node_name):
             return NodeId(node_name, namespace, meta.Value)
@@ -572,10 +671,10 @@ class GraphBuilder:
         output_ids = set(map(data_id, model_def.outputs))
         # Set up storage access for import / export data jobs
-        if job_config.job.jobType == meta.JobType.IMPORT_DATA:
-            storage_access = job_config.job.importData.storageAccess
-        elif job_config.job.jobType == meta.JobType.EXPORT_DATA:
-            storage_access = job_config.job.exportData.storageAccess
+        if job_def.jobType == meta.JobType.IMPORT_DATA:
+            storage_access = job_def.importData.storageAccess
+        elif job_def.jobType == meta.JobType.EXPORT_DATA:
+            storage_access = job_def.exportData.storageAccess
         else:
             storage_access = None
@@ -615,9 +714,9 @@ class GraphBuilder:
         # Assemble a graph to include the model and its outputs
         return GraphSection(nodes, inputs={*parameter_ids, *input_ids}, outputs=output_ids, must_run=[model_result_id])
-    @classmethod
     def build_flow(
-            cls, job_config: config.JobConfig, namespace: NodeNamespace,
+            self, namespace: NodeNamespace,
+            job_def: meta.JobDefinition,
             flow_def: meta.FlowDefinition,
             explicit_deps: tp.Optional[tp.List[NodeId]] = None) \
             -> GraphSection:
@@ -650,11 +749,11 @@ class GraphBuilder:
             node_name, node = reachable_nodes.popitem()
-            sub_section = cls.build_flow_node(
-                job_config, namespace, target_edges,
+            sub_section = self.build_flow_node(
+                namespace, job_def, target_edges,
                 node_name, node, explicit_deps)
-            graph_section = cls._join_sections(graph_section, sub_section, allow_partial_inputs=True)
+            graph_section = self._join_sections(graph_section, sub_section, allow_partial_inputs=True)
             if node.nodeType != meta.FlowNodeType.OUTPUT_NODE:
@@ -674,20 +773,18 @@ class GraphBuilder:
             missing_targets = [edge.target for node in remaining_edges_by_target.values() for edge in node]
             missing_target_names = [f"{t.node}.{t.socket}" if t.socket else t.node for t in missing_targets]
             missing_nodes = list(map(lambda n: NodeId(n, namespace), missing_target_names))
-            cls._invalid_graph_error(missing_nodes)
+            self._invalid_graph_error(missing_nodes)
         return graph_section
-    @classmethod
     def build_flow_node(
-            cls, job_config: config.JobConfig, namespace: NodeNamespace,
+            self, namespace: NodeNamespace,
+            job_def: meta.JobDefinition,
             target_edges: tp.Dict[meta.FlowSocket, meta.FlowEdge],
             node_name: str, node: meta.FlowNode,
             explicit_deps: tp.Optional[tp.List[NodeId]] = None) \
             -> GraphSection:
-        flow_job = job_config.job.runFlow
         def socket_key(socket):
             return f"{socket.node}.{socket.socket}" if socket.socket else socket.node
@@ -700,7 +797,7 @@ class GraphBuilder:
             edge = target_edges.get(socket)
             # Report missing edges as a job consistency error (this might happen sometimes in dev mode)
             if edge is None:
-                raise _ex.EJobValidation(f"Inconsistent flow: Socket [{socket}] is not connected")
+                self._error(_ex.EJobValidation(f"Inconsistent flow: Socket [{socket}] is not connected"))
             return socket_id(edge.source.node, edge.source.socket, result_type)
         if node.nodeType == meta.FlowNodeType.PARAMETER_NODE:
@@ -723,27 +820,27 @@ class GraphBuilder:
             push_mapping = {**input_mapping, **param_mapping}
             pop_mapping = output_mapping
-            model_selector = flow_job.models.get(node_name)
-            model_obj = _util.get_job_resource(model_selector, job_config)
+            model_selector = job_def.runFlow.models.get(node_name)
+            model_obj = _util.get_job_resource(model_selector, self._job_config)
             # Missing models in the job config is a job consistency error
             if model_obj is None or model_obj.objectType != meta.ObjectType.MODEL:
-                raise _ex.EJobValidation(f"No model was provided for flow node [{node_name}]")
+                self._error(_ex.EJobValidation(f"No model was provided for flow node [{node_name}]"))
             # Explicit check for model compatibility - report an error now, do not try build_model()
-            cls.check_model_compatibility(model_selector, model_obj.model, node_name, node)
-            cls.check_model_type(job_config, model_obj.model)
+            self.check_model_compatibility(model_selector, model_obj.model, node_name, node)
+            self.check_model_type(job_def, model_obj.model)
-            return cls.build_model_or_flow_with_context(
-                job_config, namespace, node_name, model_obj,
-                push_mapping, pop_mapping, explicit_deps)
+            return self.build_model_or_flow_with_context(
+                namespace, node_name,
+                job_def, model_obj,
+                push_mapping, pop_mapping,
+                explicit_deps)
-        # Missing / invalid node type - should be caught in static validation
-        raise _ex.ETracInternal(f"Flow node [{node_name}] has invalid node type [{node.nodeType}]")
+        self._error(_ex.EJobValidation(f"Flow node [{node_name}] has invalid node type [{node.nodeType}]"))
-    @classmethod
     def check_model_compatibility(
-            cls, model_selector: meta.TagSelector,
+            self, model_selector: meta.TagSelector,
             model_def: meta.ModelDefinition, node_name: str, flow_node: meta.FlowNode):
         model_params = list(sorted(model_def.parameters.keys()))
@@ -756,22 +853,21 @@ class GraphBuilder:
         if model_params != node_params or model_inputs != node_inputs or model_outputs != node_outputs:
             model_key = _util.object_key(model_selector)
-            raise _ex.EJobValidation(f"Incompatible model for flow node [{node_name}] (Model: [{model_key}])")
+            self._error(_ex.EJobValidation(f"Incompatible model for flow node [{node_name}] (Model: [{model_key}])"))
-    @classmethod
-    def check_model_type(cls, job_config: config.JobConfig, model_def: meta.ModelDefinition):
+    def check_model_type(self, job_def: meta.JobDefinition, model_def: meta.ModelDefinition):
-        if job_config.job.jobType == meta.JobType.IMPORT_DATA:
+        if job_def.jobType == meta.JobType.IMPORT_DATA:
             allowed_model_types = [meta.ModelType.DATA_IMPORT_MODEL]
-        elif job_config.job.jobType == meta.JobType.EXPORT_DATA:
+        elif job_def.jobType == meta.JobType.EXPORT_DATA:
             allowed_model_types = [meta.ModelType.DATA_EXPORT_MODEL]
         else:
             allowed_model_types = [meta.ModelType.STANDARD_MODEL]
         if model_def.modelType not in allowed_model_types:
-            job_type = job_config.job.jobType.name
+            job_type = job_def.jobType.name
             model_type = model_def.modelType.name
-            raise _ex.EJobValidation(f"Job type [{job_type}] cannot use model type [{model_type}]")
+            self._error(_ex.EJobValidation(f"Job type [{job_type}] cannot use model type [{model_type}]"))
     @staticmethod
     def build_context_push(
@@ -833,8 +929,7 @@ class GraphBuilder:
             outputs={*pop_mapping.values()},
             must_run=[pop_id])
-    @classmethod
-    def _join_sections(cls, *sections: GraphSection, allow_partial_inputs: bool = False):
+    def _join_sections(self, *sections: GraphSection, allow_partial_inputs: bool = False):
         n_sections = len(sections)
         first_section = sections[0]
@@ -856,7 +951,7 @@ class GraphBuilder:
                 if allow_partial_inputs:
                     inputs.update(requirements_not_met)
                 else:
-                    cls._invalid_graph_error(requirements_not_met)
+                    self._invalid_graph_error(requirements_not_met)
             nodes.update(current_section.nodes)
@@ -865,13 +960,12 @@ class GraphBuilder:
         return GraphSection(nodes, inputs, last_section.outputs, must_run)
-    @classmethod
-    def _invalid_graph_error(cls, missing_dependencies: tp.Iterable[NodeId]):
+    def _invalid_graph_error(self, missing_dependencies: tp.Iterable[NodeId]):
-        missing_ids = ", ".join(map(cls._missing_item_display_name, missing_dependencies))
-        message = f"Invalid job config: The execution graph has unsatisfied dependencies: [{missing_ids}]"
+        missing_ids = ", ".join(map(self._missing_item_display_name, missing_dependencies))
+        message = f"The execution graph has unsatisfied dependencies: [{missing_ids}]"
-        raise _ex.EJobValidation(message)
+        self._error(_ex.EJobValidation(message))
     @classmethod
     def _missing_item_display_name(cls, node_id: NodeId):
@@ -886,3 +980,7 @@ class GraphBuilder:
             return node_id.name
         else:
             return f"{node_id.name} / {', '.join(components[:-1])}"
+    def _error(self, error: Exception):
+        self._errors.append(error)

tracdap-runtime 0.6.5__py3-none-any.whl → 0.7.0__py3-none-any.whl

tracdap-runtime 0.6.5py3-none-any.whl → 0.7.0py3-none-any.whl