tracdap-runtime 0.6.3__py3-none-any.whl → 0.6.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tracdap/rt/_exec/context.py +572 -112
- tracdap/rt/_exec/dev_mode.py +166 -97
- tracdap/rt/_exec/engine.py +120 -9
- tracdap/rt/_exec/functions.py +137 -35
- tracdap/rt/_exec/graph.py +38 -13
- tracdap/rt/_exec/graph_builder.py +120 -9
- tracdap/rt/_impl/data.py +183 -52
- tracdap/rt/_impl/grpc/tracdap/metadata/data_pb2.py +18 -18
- tracdap/rt/_impl/grpc/tracdap/metadata/job_pb2.py +74 -30
- tracdap/rt/_impl/grpc/tracdap/metadata/job_pb2.pyi +120 -2
- tracdap/rt/_impl/grpc/tracdap/metadata/model_pb2.py +20 -18
- tracdap/rt/_impl/grpc/tracdap/metadata/model_pb2.pyi +22 -6
- tracdap/rt/_impl/grpc/tracdap/metadata/resource_pb2.py +29 -0
- tracdap/rt/_impl/grpc/tracdap/metadata/resource_pb2.pyi +16 -0
- tracdap/rt/_impl/models.py +8 -0
- tracdap/rt/_impl/static_api.py +42 -10
- tracdap/rt/_impl/storage.py +37 -25
- tracdap/rt/_impl/validation.py +113 -11
- tracdap/rt/_plugins/repo_git.py +1 -1
- tracdap/rt/_version.py +1 -1
- tracdap/rt/api/experimental.py +220 -0
- tracdap/rt/api/hook.py +6 -4
- tracdap/rt/api/model_api.py +98 -13
- tracdap/rt/api/static_api.py +14 -6
- tracdap/rt/config/__init__.py +2 -2
- tracdap/rt/config/common.py +23 -17
- tracdap/rt/config/job.py +2 -2
- tracdap/rt/config/platform.py +25 -25
- tracdap/rt/config/result.py +2 -2
- tracdap/rt/config/runtime.py +3 -3
- tracdap/rt/launch/cli.py +7 -4
- tracdap/rt/launch/launch.py +19 -3
- tracdap/rt/metadata/__init__.py +25 -20
- tracdap/rt/metadata/common.py +2 -2
- tracdap/rt/metadata/custom.py +3 -3
- tracdap/rt/metadata/data.py +12 -12
- tracdap/rt/metadata/file.py +6 -6
- tracdap/rt/metadata/flow.py +6 -6
- tracdap/rt/metadata/job.py +62 -8
- tracdap/rt/metadata/model.py +33 -11
- tracdap/rt/metadata/object_id.py +8 -8
- tracdap/rt/metadata/resource.py +24 -0
- tracdap/rt/metadata/search.py +5 -5
- tracdap/rt/metadata/stoarge.py +6 -6
- tracdap/rt/metadata/tag.py +1 -1
- tracdap/rt/metadata/tag_update.py +1 -1
- tracdap/rt/metadata/type.py +4 -4
- {tracdap_runtime-0.6.3.dist-info → tracdap_runtime-0.6.5.dist-info}/METADATA +3 -1
- {tracdap_runtime-0.6.3.dist-info → tracdap_runtime-0.6.5.dist-info}/RECORD +52 -48
- {tracdap_runtime-0.6.3.dist-info → tracdap_runtime-0.6.5.dist-info}/LICENSE +0 -0
- {tracdap_runtime-0.6.3.dist-info → tracdap_runtime-0.6.5.dist-info}/WHEEL +0 -0
- {tracdap_runtime-0.6.3.dist-info → tracdap_runtime-0.6.5.dist-info}/top_level.txt +0 -0
tracdap/rt/_exec/dev_mode.py
CHANGED
@@ -12,8 +12,6 @@
|
|
12
12
|
# See the License for the specific language governing permissions and
|
13
13
|
# limitations under the License.
|
14
14
|
|
15
|
-
from __future__ import annotations
|
16
|
-
|
17
15
|
import re
|
18
16
|
import typing as tp
|
19
17
|
import copy
|
@@ -31,12 +29,12 @@ import tracdap.rt._impl.util as _util # noqa
|
|
31
29
|
|
32
30
|
|
33
31
|
DEV_MODE_JOB_CONFIG = [
|
34
|
-
re.compile(r"job
|
35
|
-
re.compile(r"job
|
36
|
-
re.compile(r"job
|
37
|
-
re.compile(r"job
|
38
|
-
re.compile(r"job
|
39
|
-
re.compile(r"job
|
32
|
+
re.compile(r"job\.\w+\.parameters\.\w+"),
|
33
|
+
re.compile(r"job\.\w+\.inputs\.\w+"),
|
34
|
+
re.compile(r"job\.\w+\.outputs\.\w+"),
|
35
|
+
re.compile(r"job\.\w+\.models\.\w+"),
|
36
|
+
re.compile(r"job\.\w+\.model"),
|
37
|
+
re.compile(r"job\.\w+\.flow")]
|
40
38
|
|
41
39
|
DEV_MODE_SYS_CONFIG = []
|
42
40
|
|
@@ -56,7 +54,7 @@ class DevModeTranslator:
|
|
56
54
|
sys_config.storage = _cfg.StorageConfig()
|
57
55
|
|
58
56
|
sys_config = cls._add_integrated_repo(sys_config)
|
59
|
-
sys_config = cls.
|
57
|
+
sys_config = cls._process_storage(sys_config, config_mgr)
|
60
58
|
|
61
59
|
return sys_config
|
62
60
|
|
@@ -72,24 +70,23 @@ class DevModeTranslator:
|
|
72
70
|
|
73
71
|
cls._log.info(f"Applying dev mode config translation to job config")
|
74
72
|
|
75
|
-
|
73
|
+
# Protobuf semantics for a blank jobId should be an object, but objectId will be an empty string
|
74
|
+
if not job_config.jobId or not job_config.jobId.objectId:
|
76
75
|
job_config = cls._process_job_id(job_config)
|
77
76
|
|
78
77
|
if job_config.job.jobType is None or job_config.job.jobType == _meta.JobType.JOB_TYPE_NOT_SET:
|
79
78
|
job_config = cls._process_job_type(job_config)
|
80
79
|
|
81
80
|
# Load and populate any models provided as a Python class or class name
|
82
|
-
|
83
|
-
job_config = cls._process_models(sys_config, job_config, scratch_dir, model_class)
|
81
|
+
job_config = cls._process_models(sys_config, job_config, scratch_dir, model_class)
|
84
82
|
|
85
83
|
# Fow flows, load external flow definitions then perform auto-wiring and type inference
|
86
84
|
if job_config.job.jobType == _meta.JobType.RUN_FLOW:
|
87
85
|
job_config = cls._process_flow_definition(job_config, config_mgr)
|
88
86
|
|
89
|
-
#
|
90
|
-
|
91
|
-
|
92
|
-
job_config = cls._process_inputs_and_outputs(sys_config, job_config)
|
87
|
+
# Apply processing to the parameters, inputs and outputs
|
88
|
+
job_config = cls._process_parameters(job_config)
|
89
|
+
job_config = cls._process_inputs_and_outputs(sys_config, job_config)
|
93
90
|
|
94
91
|
return job_config
|
95
92
|
|
@@ -107,51 +104,60 @@ class DevModeTranslator:
|
|
107
104
|
return sys_config
|
108
105
|
|
109
106
|
@classmethod
|
110
|
-
def
|
107
|
+
def _process_storage(
|
111
108
|
cls, sys_config: _cfg.RuntimeConfig,
|
112
109
|
config_mgr: _cfg_p.ConfigManager):
|
113
110
|
|
114
111
|
storage_config = copy.deepcopy(sys_config.storage)
|
115
112
|
|
116
113
|
for bucket_key, bucket_config in storage_config.buckets.items():
|
114
|
+
storage_config.buckets[bucket_key] = cls._resolve_storage_location(
|
115
|
+
bucket_key, bucket_config, config_mgr)
|
116
|
+
|
117
|
+
for bucket_key, bucket_config in storage_config.external.items():
|
118
|
+
storage_config.external[bucket_key] = cls._resolve_storage_location(
|
119
|
+
bucket_key, bucket_config, config_mgr)
|
117
120
|
|
118
|
-
|
119
|
-
|
121
|
+
sys_config = copy.copy(sys_config)
|
122
|
+
sys_config.storage = storage_config
|
120
123
|
|
121
|
-
|
122
|
-
|
124
|
+
return sys_config
|
125
|
+
|
126
|
+
@classmethod
|
127
|
+
def _resolve_storage_location(cls, bucket_key, bucket_config, config_mgr: _cfg_p.ConfigManager):
|
123
128
|
|
124
|
-
|
129
|
+
if bucket_config.protocol != "LOCAL":
|
130
|
+
return bucket_config
|
125
131
|
|
126
|
-
|
127
|
-
|
132
|
+
if "rootPath" not in bucket_config.properties:
|
133
|
+
return bucket_config
|
128
134
|
|
129
|
-
|
135
|
+
root_path = pathlib.Path(bucket_config.properties["rootPath"])
|
130
136
|
|
131
|
-
|
132
|
-
|
133
|
-
absolute_path = sys_config_path.joinpath(root_path).resolve()
|
134
|
-
if absolute_path.exists():
|
135
|
-
cls._log.info(f"Resolved [{root_path}] -> [{absolute_path}]")
|
136
|
-
bucket_config.properties["rootPath"] = str(absolute_path)
|
137
|
-
continue
|
137
|
+
if root_path.is_absolute():
|
138
|
+
return bucket_config
|
138
139
|
|
139
|
-
|
140
|
-
absolute_path = cwd.joinpath(root_path).resolve()
|
140
|
+
cls._log.info(f"Resolving relative path for [{bucket_key}] local storage...")
|
141
141
|
|
142
|
+
sys_config_path = config_mgr.config_dir_path()
|
143
|
+
if sys_config_path is not None:
|
144
|
+
absolute_path = sys_config_path.joinpath(root_path).resolve()
|
142
145
|
if absolute_path.exists():
|
143
146
|
cls._log.info(f"Resolved [{root_path}] -> [{absolute_path}]")
|
144
147
|
bucket_config.properties["rootPath"] = str(absolute_path)
|
145
|
-
|
148
|
+
return bucket_config
|
146
149
|
|
147
|
-
|
148
|
-
|
149
|
-
raise _ex.EConfigParse(msg)
|
150
|
+
cwd = pathlib.Path.cwd()
|
151
|
+
absolute_path = cwd.joinpath(root_path).resolve()
|
150
152
|
|
151
|
-
|
152
|
-
|
153
|
+
if absolute_path.exists():
|
154
|
+
cls._log.info(f"Resolved [{root_path}] -> [{absolute_path}]")
|
155
|
+
bucket_config.properties["rootPath"] = str(absolute_path)
|
156
|
+
return bucket_config
|
153
157
|
|
154
|
-
|
158
|
+
msg = f"Failed to resolve relative storage path [{root_path}]"
|
159
|
+
cls._log.error(msg)
|
160
|
+
raise _ex.EConfigParse(msg)
|
155
161
|
|
156
162
|
@classmethod
|
157
163
|
def _add_job_resource(
|
@@ -188,6 +194,12 @@ class DevModeTranslator:
|
|
188
194
|
elif job_config.job.importModel is not None:
|
189
195
|
job_type = _meta.JobType.IMPORT_MODEL
|
190
196
|
|
197
|
+
elif job_config.job.importData is not None:
|
198
|
+
job_type = _meta.JobType.IMPORT_DATA
|
199
|
+
|
200
|
+
elif job_config.job.exportData is not None:
|
201
|
+
job_type = _meta.JobType.EXPORT_DATA
|
202
|
+
|
191
203
|
else:
|
192
204
|
cls._log.error("Could not infer job type")
|
193
205
|
raise _ex.EConfigParse("Could not infer job type")
|
@@ -202,6 +214,26 @@ class DevModeTranslator:
|
|
202
214
|
|
203
215
|
return job_config
|
204
216
|
|
217
|
+
@classmethod
|
218
|
+
def _get_job_detail(cls, job_config: _cfg.JobConfig):
|
219
|
+
|
220
|
+
if job_config.job.jobType == _meta.JobType.RUN_MODEL:
|
221
|
+
return job_config.job.runModel
|
222
|
+
|
223
|
+
if job_config.job.jobType == _meta.JobType.RUN_FLOW:
|
224
|
+
return job_config.job.runFlow
|
225
|
+
|
226
|
+
if job_config.job.jobType == _meta.JobType.IMPORT_MODEL:
|
227
|
+
return job_config.job.importModel
|
228
|
+
|
229
|
+
if job_config.job.jobType == _meta.JobType.IMPORT_DATA:
|
230
|
+
return job_config.job.importData
|
231
|
+
|
232
|
+
if job_config.job.jobType == _meta.JobType.EXPORT_DATA:
|
233
|
+
return job_config.job.exportData
|
234
|
+
|
235
|
+
raise _ex.EConfigParse(f"Could not get job details for job type [{job_config.job.jobType}]")
|
236
|
+
|
205
237
|
@classmethod
|
206
238
|
def _process_models(
|
207
239
|
cls,
|
@@ -214,41 +246,39 @@ class DevModeTranslator:
|
|
214
246
|
model_loader = _models.ModelLoader(sys_config, scratch_dir)
|
215
247
|
model_loader.create_scope("DEV_MODE_TRANSLATION")
|
216
248
|
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
job_config.job = copy.copy(job_config.job)
|
221
|
-
job_config.resources = copy.copy(job_config.resources)
|
222
|
-
|
223
|
-
if job_config.job.jobType == _meta.JobType.RUN_MODEL:
|
249
|
+
# This processing works on the assumption that job details follow a convention for addressing models
|
250
|
+
# Jobs requiring a single model have a field called "model"
|
251
|
+
# Jobs requiring multiple models have a field called "models@, which is a dict
|
224
252
|
|
225
|
-
|
253
|
+
job_detail = cls._get_job_detail(job_config)
|
226
254
|
|
227
|
-
|
228
|
-
|
229
|
-
model_id, model_obj = cls._generate_model_for_class(model_loader, model_class)
|
230
|
-
job_config = cls._add_job_resource(job_config, model_id, model_obj)
|
231
|
-
job_config.job.runModel.model = _util.selector_for(model_id)
|
255
|
+
# If a model class is supplied in code, use that to generate the model def
|
256
|
+
if model_class is not None:
|
232
257
|
|
233
|
-
#
|
234
|
-
|
235
|
-
|
236
|
-
model_id, model_obj = cls._generate_model_for_entry_point(model_loader, model_detail) # noqa
|
237
|
-
job_config = cls._add_job_resource(job_config, model_id, model_obj)
|
238
|
-
job_config.job.runModel.model = _util.selector_for(model_id)
|
258
|
+
# Passing a model class via launch_model() is only supported for job types with a single model
|
259
|
+
if not hasattr(job_detail, "model"):
|
260
|
+
raise _ex.EJobValidation(f"Job type [{job_config.job.jobType}] cannot be launched using launch_model()")
|
239
261
|
|
240
|
-
|
262
|
+
model_id, model_obj = cls._generate_model_for_class(model_loader, model_class)
|
263
|
+
job_detail.model = _util.selector_for(model_id)
|
264
|
+
job_config = cls._add_job_resource(job_config, model_id, model_obj)
|
241
265
|
|
242
|
-
|
243
|
-
|
266
|
+
# Otherwise look for models specified as a single string, and take that as the entry point
|
267
|
+
else:
|
244
268
|
|
245
|
-
|
269
|
+
# Jobs with a single model
|
270
|
+
if hasattr(job_detail, "model") and isinstance(job_detail.model, str):
|
271
|
+
model_id, model_obj = cls._generate_model_for_entry_point(model_loader, job_detail.model) # noqa
|
272
|
+
job_detail.model = _util.selector_for(model_id)
|
273
|
+
job_config = cls._add_job_resource(job_config, model_id, model_obj)
|
246
274
|
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
275
|
+
# Jobs with multiple modlels
|
276
|
+
elif hasattr(job_detail, "models") and isinstance(job_detail.models, dict):
|
277
|
+
for model_key, model_detail in job_detail.models.items():
|
278
|
+
if isinstance(model_detail, str):
|
279
|
+
model_id, model_obj = cls._generate_model_for_entry_point(model_loader, model_detail)
|
280
|
+
job_detail.models[model_key] = _util.selector_for(model_id)
|
281
|
+
job_config = cls._add_job_resource(job_config, model_id, model_obj)
|
252
282
|
|
253
283
|
model_loader.destroy_scope("DEV_MODE_TRANSLATION")
|
254
284
|
|
@@ -313,6 +343,9 @@ class DevModeTranslator:
|
|
313
343
|
|
314
344
|
flow_def = config_mgr.load_config_object(flow_details, _meta.FlowDefinition)
|
315
345
|
|
346
|
+
# Validate models against the flow (this could move to _impl.validation and check prod jobs as well)
|
347
|
+
cls._check_models_for_flow(flow_def, job_config)
|
348
|
+
|
316
349
|
# Auto-wiring and inference only applied to externally loaded flows for now
|
317
350
|
flow_def = cls._autowire_flow(flow_def, job_config)
|
318
351
|
flow_def = cls._apply_type_inference(flow_def, job_config)
|
@@ -331,6 +364,37 @@ class DevModeTranslator:
|
|
331
364
|
|
332
365
|
return job_config
|
333
366
|
|
367
|
+
@classmethod
|
368
|
+
def _check_models_for_flow(cls, flow: _meta.FlowDefinition, job_config: _cfg.JobConfig):
|
369
|
+
|
370
|
+
model_nodes = dict(filter(lambda n: n[1].nodeType == _meta.FlowNodeType.MODEL_NODE, flow.nodes.items()))
|
371
|
+
|
372
|
+
missing_models = list(filter(lambda m: m not in job_config.job.runFlow.models, model_nodes.keys()))
|
373
|
+
extra_models = list(filter(lambda m: m not in model_nodes, job_config.job.runFlow.models.keys()))
|
374
|
+
|
375
|
+
if any(missing_models):
|
376
|
+
error = f"Missing models in job definition: {', '.join(missing_models)}"
|
377
|
+
cls._log.error(error)
|
378
|
+
raise _ex.EJobValidation(error)
|
379
|
+
|
380
|
+
if any (extra_models):
|
381
|
+
error = f"Extra models in job definition: {', '.join(extra_models)}"
|
382
|
+
cls._log.error(error)
|
383
|
+
raise _ex.EJobValidation(error)
|
384
|
+
|
385
|
+
for model_name, model_node in model_nodes.items():
|
386
|
+
|
387
|
+
model_selector = job_config.job.runFlow.models[model_name]
|
388
|
+
model_obj = _util.get_job_resource(model_selector, job_config)
|
389
|
+
|
390
|
+
model_inputs = set(model_obj.model.inputs.keys())
|
391
|
+
model_outputs = set(model_obj.model.outputs.keys())
|
392
|
+
|
393
|
+
if model_inputs != set(model_node.inputs) or model_outputs != set(model_node.outputs):
|
394
|
+
error = f"The model supplied for [{model_name}] does not match the flow definition"
|
395
|
+
cls._log.error(error)
|
396
|
+
raise _ex.EJobValidation(error)
|
397
|
+
|
334
398
|
@classmethod
|
335
399
|
def _autowire_flow(cls, flow: _meta.FlowDefinition, job_config: _cfg.JobConfig):
|
336
400
|
|
@@ -562,35 +626,34 @@ class DevModeTranslator:
|
|
562
626
|
@classmethod
|
563
627
|
def _process_parameters(cls, job_config: _cfg.JobConfig) -> _cfg.JobConfig:
|
564
628
|
|
565
|
-
|
566
|
-
|
567
|
-
job_details = job_config.job.runModel
|
568
|
-
model_key = _util.object_key(job_details.model)
|
569
|
-
model_or_flow = job_config.resources[model_key].model
|
629
|
+
# This relies on convention for naming properties across similar job types
|
570
630
|
|
571
|
-
|
631
|
+
job_detail = cls._get_job_detail(job_config)
|
572
632
|
|
573
|
-
|
574
|
-
|
633
|
+
if hasattr(job_detail, "model"):
|
634
|
+
model_key = _util.object_key(job_detail.model)
|
635
|
+
model_or_flow = job_config.resources[model_key].model
|
636
|
+
elif hasattr(job_detail, "flow"):
|
637
|
+
flow_key = _util.object_key(job_detail.flow)
|
575
638
|
model_or_flow = job_config.resources[flow_key].flow
|
576
|
-
|
577
639
|
else:
|
578
|
-
|
640
|
+
model_or_flow = None
|
641
|
+
|
642
|
+
if model_or_flow is not None:
|
579
643
|
|
580
|
-
|
581
|
-
|
644
|
+
param_specs = model_or_flow.parameters
|
645
|
+
raw_values = job_detail.parameters
|
582
646
|
|
583
|
-
|
584
|
-
job_details.parameters = cls._process_parameters_dict(param_specs, param_values)
|
647
|
+
job_detail.parameters = cls._process_parameters_dict(param_specs, raw_values)
|
585
648
|
|
586
649
|
return job_config
|
587
650
|
|
588
651
|
@classmethod
|
589
652
|
def _process_parameters_dict(
|
590
653
|
cls, param_specs: tp.Dict[str, _meta.ModelParameter],
|
591
|
-
|
654
|
+
raw_values: tp.Dict[str, _meta.Value]) -> tp.Dict[str, _meta.Value]:
|
592
655
|
|
593
|
-
unknown_params = list(filter(lambda p: p not in param_specs,
|
656
|
+
unknown_params = list(filter(lambda p: p not in param_specs, raw_values))
|
594
657
|
|
595
658
|
if any(unknown_params):
|
596
659
|
msg = f"Unknown parameters cannot be translated: [{', '.join(unknown_params)}]"
|
@@ -599,7 +662,7 @@ class DevModeTranslator:
|
|
599
662
|
|
600
663
|
encoded_values = dict()
|
601
664
|
|
602
|
-
for p_name, p_value in
|
665
|
+
for p_name, p_value in raw_values.items():
|
603
666
|
|
604
667
|
if isinstance(p_value, _meta.Value):
|
605
668
|
encoded_values[p_name] = p_value
|
@@ -617,27 +680,30 @@ class DevModeTranslator:
|
|
617
680
|
@classmethod
|
618
681
|
def _process_inputs_and_outputs(cls, sys_config: _cfg.RuntimeConfig, job_config: _cfg.JobConfig) -> _cfg.JobConfig:
|
619
682
|
|
620
|
-
|
621
|
-
|
622
|
-
|
683
|
+
job_detail = cls._get_job_detail(job_config)
|
684
|
+
|
685
|
+
if hasattr(job_detail, "model"):
|
686
|
+
model_obj = _util.get_job_resource(job_detail.model, job_config)
|
623
687
|
required_inputs = model_obj.model.inputs
|
688
|
+
required_outputs = model_obj.model.outputs
|
624
689
|
|
625
|
-
elif
|
626
|
-
|
627
|
-
flow_obj = _util.get_job_resource(job_details.flow, job_config)
|
690
|
+
elif hasattr(job_detail, "flow"):
|
691
|
+
flow_obj = _util.get_job_resource(job_detail.flow, job_config)
|
628
692
|
required_inputs = flow_obj.flow.inputs
|
693
|
+
required_outputs = flow_obj.flow.outputs
|
629
694
|
|
630
695
|
else:
|
631
696
|
return job_config
|
632
697
|
|
633
|
-
job_inputs =
|
634
|
-
job_outputs =
|
698
|
+
job_inputs = job_detail.inputs
|
699
|
+
job_outputs = job_detail.outputs
|
635
700
|
job_resources = job_config.resources
|
636
701
|
|
637
702
|
for input_key, input_value in job_inputs.items():
|
638
703
|
if not (isinstance(input_value, str) and input_value in job_resources):
|
639
704
|
|
640
|
-
|
705
|
+
model_input = required_inputs[input_key]
|
706
|
+
input_schema = model_input.schema if model_input and not model_input.dynamic else None
|
641
707
|
|
642
708
|
input_id = cls._process_input_or_output(
|
643
709
|
sys_config, input_key, input_value, job_resources,
|
@@ -648,9 +714,12 @@ class DevModeTranslator:
|
|
648
714
|
for output_key, output_value in job_outputs.items():
|
649
715
|
if not (isinstance(output_value, str) and output_value in job_resources):
|
650
716
|
|
717
|
+
model_output= required_outputs[output_key]
|
718
|
+
output_schema = model_output.schema if model_output and not model_output.dynamic else None
|
719
|
+
|
651
720
|
output_id = cls._process_input_or_output(
|
652
721
|
sys_config, output_key, output_value, job_resources,
|
653
|
-
new_unique_file=True, schema=
|
722
|
+
new_unique_file=True, schema=output_schema)
|
654
723
|
|
655
724
|
job_outputs[output_key] = _util.selector_for(output_id)
|
656
725
|
|
@@ -768,7 +837,7 @@ class DevModeTranslator:
|
|
768
837
|
if schema is not None:
|
769
838
|
data_def.schema = schema
|
770
839
|
else:
|
771
|
-
data_def.schema =
|
840
|
+
data_def.schema = None
|
772
841
|
|
773
842
|
data_def.storageId = _meta.TagSelector(
|
774
843
|
_meta.ObjectType.STORAGE, storage_id.objectId,
|
tracdap/rt/_exec/engine.py
CHANGED
@@ -12,8 +12,6 @@
|
|
12
12
|
# See the License for the specific language governing permissions and
|
13
13
|
# limitations under the License.
|
14
14
|
|
15
|
-
from __future__ import annotations
|
16
|
-
|
17
15
|
import copy as cp
|
18
16
|
import dataclasses as dc
|
19
17
|
import enum
|
@@ -272,12 +270,13 @@ class JobProcessor(_actors.Actor):
|
|
272
270
|
self.result_spec = result_spec
|
273
271
|
self._models = models
|
274
272
|
self._storage = storage
|
273
|
+
self._resolver = _func.FunctionResolver(models, storage)
|
275
274
|
self._log = _util.logger_for_object(self)
|
276
275
|
|
277
276
|
def on_start(self):
|
278
277
|
self._log.info(f"Starting job [{self.job_key}]")
|
279
278
|
self._models.create_scope(self.job_key)
|
280
|
-
self.actors().spawn(GraphBuilder(self.job_config, self.result_spec, self.
|
279
|
+
self.actors().spawn(GraphBuilder(self.job_config, self.result_spec, self._resolver))
|
281
280
|
|
282
281
|
def on_stop(self):
|
283
282
|
self._log.info(f"Cleaning up job [{self.job_key}]")
|
@@ -305,7 +304,7 @@ class JobProcessor(_actors.Actor):
|
|
305
304
|
|
306
305
|
@_actors.Message
|
307
306
|
def job_graph(self, graph: _EngineContext, root_id: NodeId):
|
308
|
-
self.actors().spawn(GraphProcessor(graph, root_id))
|
307
|
+
self.actors().spawn(GraphProcessor(graph, root_id, self._resolver))
|
309
308
|
self.actors().stop(self.actors().sender)
|
310
309
|
|
311
310
|
@_actors.Message
|
@@ -331,15 +330,14 @@ class GraphBuilder(_actors.Actor):
|
|
331
330
|
def __init__(
|
332
331
|
self, job_config: _cfg.JobConfig,
|
333
332
|
result_spec: _graph.JobResultSpec,
|
334
|
-
|
335
|
-
storage: _storage.StorageManager):
|
333
|
+
resolver: _func.FunctionResolver):
|
336
334
|
|
337
335
|
super().__init__()
|
338
336
|
self.job_config = job_config
|
339
337
|
self.result_spec = result_spec
|
340
338
|
self.graph: tp.Optional[_EngineContext] = None
|
341
339
|
|
342
|
-
self._resolver =
|
340
|
+
self._resolver = resolver
|
343
341
|
self._log = _util.logger_for_object(self)
|
344
342
|
|
345
343
|
def on_start(self):
|
@@ -378,11 +376,12 @@ class GraphProcessor(_actors.Actor):
|
|
378
376
|
Once all running nodes are stopped, an error is reported to the parent
|
379
377
|
"""
|
380
378
|
|
381
|
-
def __init__(self, graph: _EngineContext, root_id: NodeId):
|
379
|
+
def __init__(self, graph: _EngineContext, root_id: NodeId, resolver: _func.FunctionResolver):
|
382
380
|
super().__init__()
|
383
381
|
self.graph = graph
|
384
382
|
self.root_id = root_id
|
385
383
|
self.processors: tp.Dict[NodeId, _actors.ActorId] = dict()
|
384
|
+
self._resolver = resolver
|
386
385
|
self._log = _util.logger_for_object(self)
|
387
386
|
|
388
387
|
def on_start(self):
|
@@ -463,6 +462,62 @@ class GraphProcessor(_actors.Actor):
|
|
463
462
|
# Job may have completed due to error propagation
|
464
463
|
self.check_job_status(do_submit=False)
|
465
464
|
|
465
|
+
@_actors.Message
|
466
|
+
def update_graph(
|
467
|
+
self, requestor_id: NodeId,
|
468
|
+
new_nodes: tp.Dict[NodeId, _graph.Node],
|
469
|
+
new_deps: tp.Dict[NodeId, tp.List[_graph.Dependency]]):
|
470
|
+
|
471
|
+
new_graph = cp.copy(self.graph)
|
472
|
+
new_graph.nodes = cp.copy(new_graph.nodes)
|
473
|
+
|
474
|
+
# Attempt to insert a duplicate node is always an error
|
475
|
+
node_collision = list(filter(lambda nid: nid in self.graph.nodes, new_nodes))
|
476
|
+
|
477
|
+
# Only allow adding deps to pending nodes for now (adding deps to active nodes will require more work)
|
478
|
+
dep_collision = list(filter(lambda nid: nid not in self.graph.pending_nodes, new_deps))
|
479
|
+
|
480
|
+
dep_invalid = list(filter(
|
481
|
+
lambda dds: any(filter(lambda dd: dd.node_id not in new_nodes, dds)),
|
482
|
+
new_deps.values()))
|
483
|
+
|
484
|
+
if any(node_collision) or any(dep_collision) or any(dep_invalid):
|
485
|
+
|
486
|
+
self._log.error(f"Node collision during graph update (requested by {requestor_id})")
|
487
|
+
self._log.error(f"Duplicate node IDs: {node_collision or 'None'}")
|
488
|
+
self._log.error(f"Dependency updates for dead nodes: {dep_collision or 'None'}")
|
489
|
+
self._log.error(f"Dependencies added for existing nodes: {dep_invalid or 'None'}")
|
490
|
+
|
491
|
+
# Set an error on the node, and wait for it to complete normally
|
492
|
+
# The error will be picked up when the result is recorded
|
493
|
+
# If dependencies are added for an active node, more signalling will be needed
|
494
|
+
requestor = cp.copy(new_graph.nodes[requestor_id])
|
495
|
+
requestor.error = _ex.ETracInternal("Node collision during graph update")
|
496
|
+
new_graph.nodes[requestor_id] = requestor
|
497
|
+
|
498
|
+
return
|
499
|
+
|
500
|
+
new_graph.pending_nodes = cp.copy(new_graph.pending_nodes)
|
501
|
+
|
502
|
+
for node_id, node in new_nodes.items():
|
503
|
+
GraphLogger.log_node_add(node)
|
504
|
+
node_func = self._resolver.resolve_node(node)
|
505
|
+
new_node = _EngineNode(node, {}, function=node_func)
|
506
|
+
new_graph.nodes[node_id] = new_node
|
507
|
+
new_graph.pending_nodes.add(node_id)
|
508
|
+
|
509
|
+
for node_id, deps in new_deps.items():
|
510
|
+
engine_node = cp.copy(new_graph.nodes[node_id])
|
511
|
+
engine_node.dependencies = cp.copy(engine_node.dependencies)
|
512
|
+
for dep in deps:
|
513
|
+
GraphLogger.log_dependency_add(node_id, dep.node_id)
|
514
|
+
engine_node.dependencies[dep.node_id] = dep.dependency_type
|
515
|
+
new_graph.nodes[node_id] = engine_node
|
516
|
+
|
517
|
+
self.graph = new_graph
|
518
|
+
|
519
|
+
self.actors().send(self.actors().id, "submit_viable_nodes")
|
520
|
+
|
466
521
|
@classmethod
|
467
522
|
def _is_required_node(cls, node: _EngineNode, graph: _EngineContext):
|
468
523
|
|
@@ -654,8 +709,15 @@ class NodeProcessor(_actors.Actor):
|
|
654
709
|
|
655
710
|
NodeLogger.log_node_start(self.node)
|
656
711
|
|
712
|
+
# Context contains only node states available when the context is set up
|
657
713
|
ctx = NodeContextImpl(self.graph.nodes)
|
658
|
-
|
714
|
+
|
715
|
+
# Callback remains valid because it only lives inside the call stack for this message
|
716
|
+
callback = NodeCallbackImpl(self.actors(), self.node_id)
|
717
|
+
|
718
|
+
# Execute the node function
|
719
|
+
result = self.node.function(ctx, callback)
|
720
|
+
|
659
721
|
self._check_result_type(result)
|
660
722
|
|
661
723
|
NodeLogger.log_node_succeeded(self.node)
|
@@ -730,6 +792,37 @@ class DataNodeProcessor(NodeProcessor):
|
|
730
792
|
super().__init__(graph, node_id, node)
|
731
793
|
|
732
794
|
|
795
|
+
class GraphLogger:
|
796
|
+
|
797
|
+
"""
|
798
|
+
Log the activity of the GraphProcessor
|
799
|
+
"""
|
800
|
+
|
801
|
+
_log = _util.logger_for_class(GraphProcessor)
|
802
|
+
|
803
|
+
@classmethod
|
804
|
+
def log_node_add(cls, node: _graph.Node):
|
805
|
+
|
806
|
+
node_name = node.id.name
|
807
|
+
namespace = node.id.namespace
|
808
|
+
|
809
|
+
cls._log.info(f"ADD {cls._func_type(node)} [{node_name}] / {namespace}")
|
810
|
+
|
811
|
+
@classmethod
|
812
|
+
def log_dependency_add(cls, node_id: NodeId, dep_id: NodeId):
|
813
|
+
|
814
|
+
if node_id.namespace == dep_id.namespace:
|
815
|
+
cls._log.info(f"ADD DEPENDENCY [{node_id.name}] -> [{dep_id.name}] / {node_id.namespace}")
|
816
|
+
else:
|
817
|
+
cls._log.info(f"ADD DEPENDENCY [{node_id.name}] / {node_id.namespace} -> [{dep_id.name}] / {dep_id.namespace}")
|
818
|
+
|
819
|
+
@classmethod
|
820
|
+
def _func_type(cls, node: _graph.Node):
|
821
|
+
|
822
|
+
func_type = type(node)
|
823
|
+
return func_type.__name__[:-4]
|
824
|
+
|
825
|
+
|
733
826
|
class NodeLogger:
|
734
827
|
|
735
828
|
"""
|
@@ -912,3 +1005,21 @@ class NodeContextImpl(_func.NodeContext):
|
|
912
1005
|
for node_id, node in self.__nodes.items():
|
913
1006
|
if node.complete and not node.error:
|
914
1007
|
yield node_id, node.result
|
1008
|
+
|
1009
|
+
|
1010
|
+
class NodeCallbackImpl(_func.NodeCallback):
|
1011
|
+
|
1012
|
+
"""
|
1013
|
+
Callback impl is passed to node functions so they can call into the engine
|
1014
|
+
It is only valid as long as the node function runs inside the call stack of a single message
|
1015
|
+
"""
|
1016
|
+
|
1017
|
+
def __init__(self, actor_ctx: _actors.ActorContext, node_id: NodeId):
|
1018
|
+
self.__actor_ctx = actor_ctx
|
1019
|
+
self.__node_id = node_id
|
1020
|
+
|
1021
|
+
def send_graph_updates(
|
1022
|
+
self, new_nodes: tp.Dict[NodeId, _graph.Node],
|
1023
|
+
new_deps: tp.Dict[NodeId, tp.List[_graph.Dependency]]):
|
1024
|
+
|
1025
|
+
self.__actor_ctx.send_parent("update_graph", self.__node_id, new_nodes, new_deps)
|