tracdap-runtime 0.6.2__py3-none-any.whl → 0.6.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. tracdap/rt/_exec/actors.py +87 -10
  2. tracdap/rt/_exec/context.py +207 -100
  3. tracdap/rt/_exec/dev_mode.py +52 -20
  4. tracdap/rt/_exec/engine.py +79 -14
  5. tracdap/rt/_exec/functions.py +14 -17
  6. tracdap/rt/_exec/runtime.py +83 -40
  7. tracdap/rt/_exec/server.py +306 -29
  8. tracdap/rt/_impl/config_parser.py +219 -49
  9. tracdap/rt/_impl/data.py +70 -5
  10. tracdap/rt/_impl/grpc/codec.py +60 -5
  11. tracdap/rt/_impl/grpc/tracdap/api/internal/runtime_pb2.py +19 -19
  12. tracdap/rt/_impl/grpc/tracdap/api/internal/runtime_pb2.pyi +11 -9
  13. tracdap/rt/_impl/grpc/tracdap/api/internal/runtime_pb2_grpc.py +25 -25
  14. tracdap/rt/_impl/grpc/tracdap/metadata/data_pb2.py +18 -18
  15. tracdap/rt/_impl/grpc/tracdap/metadata/model_pb2.py +28 -16
  16. tracdap/rt/_impl/grpc/tracdap/metadata/model_pb2.pyi +37 -6
  17. tracdap/rt/_impl/grpc/tracdap/metadata/object_pb2.py +8 -3
  18. tracdap/rt/_impl/grpc/tracdap/metadata/object_pb2.pyi +13 -2
  19. tracdap/rt/_impl/guard_rails.py +21 -0
  20. tracdap/rt/_impl/models.py +25 -0
  21. tracdap/rt/_impl/static_api.py +43 -13
  22. tracdap/rt/_impl/type_system.py +17 -0
  23. tracdap/rt/_impl/validation.py +47 -4
  24. tracdap/rt/_plugins/config_local.py +49 -0
  25. tracdap/rt/_version.py +1 -1
  26. tracdap/rt/api/hook.py +6 -5
  27. tracdap/rt/api/model_api.py +50 -7
  28. tracdap/rt/api/static_api.py +81 -23
  29. tracdap/rt/config/__init__.py +4 -4
  30. tracdap/rt/config/common.py +25 -15
  31. tracdap/rt/config/job.py +2 -2
  32. tracdap/rt/config/platform.py +25 -35
  33. tracdap/rt/config/result.py +2 -2
  34. tracdap/rt/config/runtime.py +4 -2
  35. tracdap/rt/ext/config.py +34 -0
  36. tracdap/rt/ext/embed.py +1 -3
  37. tracdap/rt/ext/plugins.py +47 -6
  38. tracdap/rt/launch/cli.py +11 -4
  39. tracdap/rt/launch/launch.py +53 -12
  40. tracdap/rt/metadata/__init__.py +17 -17
  41. tracdap/rt/metadata/common.py +2 -2
  42. tracdap/rt/metadata/custom.py +3 -3
  43. tracdap/rt/metadata/data.py +12 -12
  44. tracdap/rt/metadata/file.py +6 -6
  45. tracdap/rt/metadata/flow.py +6 -6
  46. tracdap/rt/metadata/job.py +8 -8
  47. tracdap/rt/metadata/model.py +21 -11
  48. tracdap/rt/metadata/object.py +3 -0
  49. tracdap/rt/metadata/object_id.py +8 -8
  50. tracdap/rt/metadata/search.py +5 -5
  51. tracdap/rt/metadata/stoarge.py +6 -6
  52. tracdap/rt/metadata/tag.py +1 -1
  53. tracdap/rt/metadata/tag_update.py +1 -1
  54. tracdap/rt/metadata/type.py +4 -4
  55. {tracdap_runtime-0.6.2.dist-info → tracdap_runtime-0.6.4.dist-info}/METADATA +4 -4
  56. tracdap_runtime-0.6.4.dist-info/RECORD +112 -0
  57. {tracdap_runtime-0.6.2.dist-info → tracdap_runtime-0.6.4.dist-info}/WHEEL +1 -1
  58. tracdap/rt/_impl/grpc/tracdap/config/common_pb2.py +0 -55
  59. tracdap/rt/_impl/grpc/tracdap/config/common_pb2.pyi +0 -103
  60. tracdap/rt/_impl/grpc/tracdap/config/job_pb2.py +0 -42
  61. tracdap/rt/_impl/grpc/tracdap/config/job_pb2.pyi +0 -44
  62. tracdap/rt/_impl/grpc/tracdap/config/platform_pb2.py +0 -71
  63. tracdap/rt/_impl/grpc/tracdap/config/platform_pb2.pyi +0 -197
  64. tracdap/rt/_impl/grpc/tracdap/config/result_pb2.py +0 -37
  65. tracdap/rt/_impl/grpc/tracdap/config/result_pb2.pyi +0 -35
  66. tracdap/rt/_impl/grpc/tracdap/config/runtime_pb2.py +0 -42
  67. tracdap/rt/_impl/grpc/tracdap/config/runtime_pb2.pyi +0 -46
  68. tracdap/rt/ext/_guard.py +0 -37
  69. tracdap_runtime-0.6.2.dist-info/RECORD +0 -121
  70. {tracdap_runtime-0.6.2.dist-info → tracdap_runtime-0.6.4.dist-info}/LICENSE +0 -0
  71. {tracdap_runtime-0.6.2.dist-info → tracdap_runtime-0.6.4.dist-info}/top_level.txt +0 -0
@@ -46,7 +46,7 @@ class DevModeTranslator:
46
46
  _log: tp.Optional[_util.logging.Logger] = None
47
47
 
48
48
  @classmethod
49
- def translate_sys_config(cls, sys_config: _cfg.RuntimeConfig, config_dir: tp.Optional[pathlib.Path]):
49
+ def translate_sys_config(cls, sys_config: _cfg.RuntimeConfig, config_mgr: _cfg_p.ConfigManager):
50
50
 
51
51
  cls._log.info(f"Applying dev mode config translation to system config")
52
52
 
@@ -56,7 +56,7 @@ class DevModeTranslator:
56
56
  sys_config.storage = _cfg.StorageConfig()
57
57
 
58
58
  sys_config = cls._add_integrated_repo(sys_config)
59
- sys_config = cls._resolve_relative_storage_root(sys_config, config_dir)
59
+ sys_config = cls._resolve_relative_storage_root(sys_config, config_mgr)
60
60
 
61
61
  return sys_config
62
62
 
@@ -66,7 +66,7 @@ class DevModeTranslator:
66
66
  sys_config: _cfg.RuntimeConfig,
67
67
  job_config: _cfg.JobConfig,
68
68
  scratch_dir: pathlib.Path,
69
- config_dir: tp.Optional[pathlib.Path],
69
+ config_mgr: _cfg_p.ConfigManager,
70
70
  model_class: tp.Optional[_api.TracModel.__class__]) \
71
71
  -> _cfg.JobConfig:
72
72
 
@@ -84,7 +84,7 @@ class DevModeTranslator:
84
84
 
85
85
  # Fow flows, load external flow definitions then perform auto-wiring and type inference
86
86
  if job_config.job.jobType == _meta.JobType.RUN_FLOW:
87
- job_config = cls._process_flow_definition(job_config, config_dir)
87
+ job_config = cls._process_flow_definition(job_config, config_mgr)
88
88
 
89
89
  # For run (model|flow) jobs, apply processing to the parameters, inputs and outputs
90
90
  if job_config.job.jobType in [_meta.JobType.RUN_MODEL, _meta.JobType.RUN_FLOW]:
@@ -109,7 +109,7 @@ class DevModeTranslator:
109
109
  @classmethod
110
110
  def _resolve_relative_storage_root(
111
111
  cls, sys_config: _cfg.RuntimeConfig,
112
- sys_config_path: tp.Optional[pathlib.Path]):
112
+ config_mgr: _cfg_p.ConfigManager):
113
113
 
114
114
  storage_config = copy.deepcopy(sys_config.storage)
115
115
 
@@ -128,6 +128,7 @@ class DevModeTranslator:
128
128
 
129
129
  cls._log.info(f"Resolving relative path for [{bucket_key}] local storage...")
130
130
 
131
+ sys_config_path = config_mgr.config_dir_path()
131
132
  if sys_config_path is not None:
132
133
  absolute_path = sys_config_path.joinpath(root_path).resolve()
133
134
  if absolute_path.exists():
@@ -291,7 +292,7 @@ class DevModeTranslator:
291
292
  return model_id, model_object
292
293
 
293
294
  @classmethod
294
- def _process_flow_definition(cls, job_config: _cfg.JobConfig, config_dir: pathlib.Path) -> _cfg.JobConfig:
295
+ def _process_flow_definition(cls, job_config: _cfg.JobConfig, config_mgr: _cfg_p.ConfigManager) -> _cfg.JobConfig:
295
296
 
296
297
  flow_details = job_config.job.runFlow.flow
297
298
 
@@ -305,21 +306,15 @@ class DevModeTranslator:
305
306
  cls._log.error(err)
306
307
  raise _ex.EConfigParse(err)
307
308
 
308
- flow_path = config_dir.joinpath(flow_details) if config_dir is not None else pathlib.Path(flow_details)
309
-
310
- if not flow_path.exists():
311
- err = f"Flow definition not available for [{flow_details}]: File not found ({flow_path})"
312
- cls._log.error(err)
313
- raise _ex.EConfigParse(err)
314
-
315
309
  flow_id = _util.new_object_id(_meta.ObjectType.FLOW)
316
310
  flow_key = _util.object_key(flow_id)
317
311
 
318
- cls._log.info(f"Generating flow definition for [{flow_details}] with ID = [{flow_key}]")
312
+ cls._log.info(f"Generating flow definition from [{flow_details}] with ID = [{flow_key}]")
313
+
314
+ flow_def = config_mgr.load_config_object(flow_details, _meta.FlowDefinition)
319
315
 
320
- flow_parser = _cfg_p.ConfigParser(_meta.FlowDefinition)
321
- flow_raw_data = flow_parser.load_raw_config(flow_path, flow_path.name)
322
- flow_def = flow_parser.parse(flow_raw_data, flow_path.name)
316
+ # Validate models against the flow (this could move to _impl.validation and check prod jobs as well)
317
+ cls._check_models_for_flow(flow_def, job_config)
323
318
 
324
319
  # Auto-wiring and inference only applied to externally loaded flows for now
325
320
  flow_def = cls._autowire_flow(flow_def, job_config)
@@ -339,6 +334,37 @@ class DevModeTranslator:
339
334
 
340
335
  return job_config
341
336
 
337
+ @classmethod
338
+ def _check_models_for_flow(cls, flow: _meta.FlowDefinition, job_config: _cfg.JobConfig):
339
+
340
+ model_nodes = dict(filter(lambda n: n[1].nodeType == _meta.FlowNodeType.MODEL_NODE, flow.nodes.items()))
341
+
342
+ missing_models = list(filter(lambda m: m not in job_config.job.runFlow.models, model_nodes.keys()))
343
+ extra_models = list(filter(lambda m: m not in model_nodes, job_config.job.runFlow.models.keys()))
344
+
345
+ if any(missing_models):
346
+ error = f"Missing models in job definition: {', '.join(missing_models)}"
347
+ cls._log.error(error)
348
+ raise _ex.EJobValidation(error)
349
+
350
+ if any (extra_models):
351
+ error = f"Extra models in job definition: {', '.join(extra_models)}"
352
+ cls._log.error(error)
353
+ raise _ex.EJobValidation(error)
354
+
355
+ for model_name, model_node in model_nodes.items():
356
+
357
+ model_selector = job_config.job.runFlow.models[model_name]
358
+ model_obj = _util.get_job_resource(model_selector, job_config)
359
+
360
+ model_inputs = set(model_obj.model.inputs.keys())
361
+ model_outputs = set(model_obj.model.outputs.keys())
362
+
363
+ if model_inputs != set(model_node.inputs) or model_outputs != set(model_node.outputs):
364
+ error = f"The model supplied for [{model_name}] does not match the flow definition"
365
+ cls._log.error(error)
366
+ raise _ex.EJobValidation(error)
367
+
342
368
  @classmethod
343
369
  def _autowire_flow(cls, flow: _meta.FlowDefinition, job_config: _cfg.JobConfig):
344
370
 
@@ -629,11 +655,13 @@ class DevModeTranslator:
629
655
  job_details = job_config.job.runModel
630
656
  model_obj = _util.get_job_resource(job_details.model, job_config)
631
657
  required_inputs = model_obj.model.inputs
658
+ required_outputs = model_obj.model.outputs
632
659
 
633
660
  elif job_config.job.jobType == _meta.JobType.RUN_FLOW:
634
661
  job_details = job_config.job.runFlow
635
662
  flow_obj = _util.get_job_resource(job_details.flow, job_config)
636
663
  required_inputs = flow_obj.flow.inputs
664
+ required_outputs = flow_obj.flow.outputs
637
665
 
638
666
  else:
639
667
  return job_config
@@ -645,7 +673,8 @@ class DevModeTranslator:
645
673
  for input_key, input_value in job_inputs.items():
646
674
  if not (isinstance(input_value, str) and input_value in job_resources):
647
675
 
648
- input_schema = required_inputs[input_key].schema
676
+ model_input = required_inputs[input_key]
677
+ input_schema = model_input.schema if model_input and not model_input.dynamic else None
649
678
 
650
679
  input_id = cls._process_input_or_output(
651
680
  sys_config, input_key, input_value, job_resources,
@@ -656,9 +685,12 @@ class DevModeTranslator:
656
685
  for output_key, output_value in job_outputs.items():
657
686
  if not (isinstance(output_value, str) and output_value in job_resources):
658
687
 
688
+ model_output= required_outputs[output_key]
689
+ output_schema = model_output.schema if model_output and not model_output.dynamic else None
690
+
659
691
  output_id = cls._process_input_or_output(
660
692
  sys_config, output_key, output_value, job_resources,
661
- new_unique_file=True, schema=None)
693
+ new_unique_file=True, schema=output_schema)
662
694
 
663
695
  job_outputs[output_key] = _util.selector_for(output_id)
664
696
 
@@ -776,7 +808,7 @@ class DevModeTranslator:
776
808
  if schema is not None:
777
809
  data_def.schema = schema
778
810
  else:
779
- data_def.schema = _meta.SchemaDefinition(schemaType=_meta.SchemaType.TABLE, table=_meta.TableSchema())
811
+ data_def.schema = None
780
812
 
781
813
  data_def.storageId = _meta.TagSelector(
782
814
  _meta.ObjectType.STORAGE, storage_id.objectId,
@@ -19,6 +19,7 @@ import dataclasses as dc
19
19
  import enum
20
20
  import typing as tp
21
21
 
22
+ import tracdap.rt.metadata as _meta
22
23
  import tracdap.rt.config as _cfg
23
24
  import tracdap.rt.exceptions as _ex
24
25
  import tracdap.rt._exec.actors as _actors
@@ -28,7 +29,6 @@ import tracdap.rt._impl.models as _models # noqa
28
29
  import tracdap.rt._impl.data as _data # noqa
29
30
  import tracdap.rt._impl.storage as _storage # noqa
30
31
  import tracdap.rt._impl.util as _util # noqa
31
- from .actors import Signal
32
32
 
33
33
  from .graph import NodeId
34
34
 
@@ -66,6 +66,18 @@ class _EngineContext:
66
66
  failed_nodes: tp.Set[NodeId] = dc.field(default_factory=set)
67
67
 
68
68
 
69
+ @dc.dataclass
70
+ class _JobState:
71
+
72
+ job_id: _meta.TagHeader
73
+ job_config: _cfg.JobConfig
74
+
75
+ actor_id: _actors.ActorId = None
76
+
77
+ job_result: _cfg.JobResult = None
78
+ job_error: Exception = None
79
+
80
+
69
81
  class TracEngine(_actors.Actor):
70
82
 
71
83
  """
@@ -88,7 +100,7 @@ class TracEngine(_actors.Actor):
88
100
  self._storage = storage
89
101
  self._notify_callback = notify_callback
90
102
 
91
- self._job_actors = dict()
103
+ self._jobs: tp.Dict[str, _JobState] = dict()
92
104
 
93
105
  def on_start(self):
94
106
 
@@ -98,7 +110,7 @@ class TracEngine(_actors.Actor):
98
110
 
99
111
  self._log.info("Engine shutdown complete")
100
112
 
101
- def on_signal(self, signal: Signal) -> tp.Optional[bool]:
113
+ def on_signal(self, signal: _actors.Signal) -> tp.Optional[bool]:
102
114
 
103
115
  # Failed signals can propagate from leaf nodes up the actor tree for a job
104
116
  # If the failure goes all the way up the tree without being handled, it will reach the engine node
@@ -110,8 +122,8 @@ class TracEngine(_actors.Actor):
110
122
  failed_job_key = None
111
123
 
112
124
  # Look for the job key corresponding to the failed actor
113
- for job_key, job_actor in self._job_actors.items():
114
- if job_actor == signal.sender:
125
+ for job_key, job_state in self._jobs.items():
126
+ if job_state.actor_id == signal.sender:
115
127
  failed_job_key = job_key
116
128
 
117
129
  # If the job is still live, call job_failed explicitly
@@ -147,19 +159,34 @@ class TracEngine(_actors.Actor):
147
159
  job_processor = JobProcessor(job_key, job_config, result_spec,self._models, self._storage)
148
160
  job_actor_id = self.actors().spawn(job_processor)
149
161
 
150
- job_actors = {**self._job_actors, job_key: job_actor_id}
151
- self._job_actors = job_actors
162
+ job_state = _JobState(job_config.jobId, job_config)
163
+ job_state.actor_id = job_actor_id
164
+
165
+ self._jobs[job_key] = job_state
166
+
167
+ @_actors.Message
168
+ def get_job_list(self):
169
+
170
+ job_list = list(map(self._get_job_info, self._jobs.keys()))
171
+ self.actors().reply("job_list", job_list)
172
+
173
+ @_actors.Message
174
+ def get_job_details(self, job_key: str, details: bool):
175
+
176
+ details = self._get_job_info(job_key, details)
177
+ self.actors().reply("job_details", details)
152
178
 
153
179
  @_actors.Message
154
180
  def job_succeeded(self, job_key: str, job_result: _cfg.JobResult):
155
181
 
156
182
  # Ignore duplicate messages from the job processor (can happen in unusual error cases)
157
- if job_key not in self._job_actors:
183
+ if job_key not in self._jobs:
158
184
  self._log.warning(f"Ignoring [job_succeeded] message, job [{job_key}] has already completed")
159
185
  return
160
186
 
161
187
  self._log.info(f"Recording job as successful: {job_key}")
162
188
 
189
+ self._jobs[job_key].job_result = job_result
163
190
  self._finalize_job(job_key)
164
191
 
165
192
  if self._notify_callback is not None:
@@ -169,12 +196,13 @@ class TracEngine(_actors.Actor):
169
196
  def job_failed(self, job_key: str, error: Exception):
170
197
 
171
198
  # Ignore duplicate messages from the job processor (can happen in unusual error cases)
172
- if job_key not in self._job_actors:
199
+ if job_key not in self._jobs:
173
200
  self._log.warning(f"Ignoring [job_failed] message, job [{job_key}] has already completed")
174
201
  return
175
202
 
176
203
  self._log.error(f"Recording job as failed: {job_key}")
177
204
 
205
+ self._jobs[job_key].job_error = error
178
206
  self._finalize_job(job_key)
179
207
 
180
208
  if self._notify_callback is not None:
@@ -182,10 +210,47 @@ class TracEngine(_actors.Actor):
182
210
 
183
211
  def _finalize_job(self, job_key: str):
184
212
 
185
- job_actors = self._job_actors
186
- job_actor_id = job_actors.pop(job_key)
187
- self.actors().stop(job_actor_id)
188
- self._job_actors = job_actors
213
+ # Stop the actor but keep the job state available for status / results queries
214
+
215
+ # In the future, job state will need to be expunged after some period of time
216
+ # For now each instance of the runtime only processes one job so no need to worry
217
+
218
+ job_state = self._jobs.get(job_key)
219
+ job_actor_id = job_state.actor_id if job_state is not None else None
220
+
221
+ if job_actor_id is not None:
222
+ self.actors().stop(job_actor_id)
223
+ job_state.actor_id = None
224
+
225
+ def _get_job_info(self, job_key: str, details: bool = False) -> tp.Optional[_cfg.JobResult]:
226
+
227
+ job_state = self._jobs.get(job_key)
228
+
229
+ if job_state is None:
230
+ return None
231
+
232
+ job_result = _cfg.JobResult()
233
+ job_result.jobId = job_state.job_id
234
+
235
+ if job_state.actor_id is not None:
236
+ job_result.statusCode = _meta.JobStatusCode.RUNNING
237
+
238
+ elif job_state.job_result is not None:
239
+ job_result.statusCode = job_state.job_result.statusCode
240
+ job_result.statusMessage = job_state.job_result.statusMessage
241
+ if details:
242
+ job_result.results = job_state.job_result.results or dict()
243
+
244
+ elif job_state.job_error is not None:
245
+ job_result.statusCode = _meta.JobStatusCode.FAILED
246
+ job_result.statusMessage = str(job_state.job_error.args[0])
247
+
248
+ else:
249
+ # Alternatively return UNKNOWN status or throw an error here
250
+ job_result.statusCode = _meta.JobStatusCode.FAILED
251
+ job_result.statusMessage = "No details available"
252
+
253
+ return job_result
189
254
 
190
255
 
191
256
  class JobProcessor(_actors.Actor):
@@ -218,7 +283,7 @@ class JobProcessor(_actors.Actor):
218
283
  self._log.info(f"Cleaning up job [{self.job_key}]")
219
284
  self._models.destroy_scope(self.job_key)
220
285
 
221
- def on_signal(self, signal: Signal) -> tp.Optional[bool]:
286
+ def on_signal(self, signal: _actors.Signal) -> tp.Optional[bool]:
222
287
 
223
288
  if signal.message == _actors.SignalNames.FAILED and isinstance(signal, _actors.ErrorSignal):
224
289
 
@@ -252,7 +252,13 @@ class DataViewFunc(NodeFunction[_data.DataView]):
252
252
  if root_item.is_empty():
253
253
  return _data.DataView.create_empty()
254
254
 
255
- data_view = _data.DataView.for_trac_schema(self.node.schema)
255
+ if self.node.schema is not None and len(self.node.schema.table.fields) > 0:
256
+ trac_schema = self.node.schema
257
+ else:
258
+ arrow_schema = root_item.schema
259
+ trac_schema = _data.DataMapping.arrow_to_trac_schema(arrow_schema)
260
+
261
+ data_view = _data.DataView.for_trac_schema(trac_schema)
256
262
  data_view = _data.DataMapping.add_item_to_view(data_view, root_part_key, root_item)
257
263
 
258
264
  return data_view
@@ -544,7 +550,6 @@ class RunModelFunc(NodeFunction[Bundle[_data.DataView]]):
544
550
  # Still, if any nodes are missing or have the wrong type TracContextImpl will raise ERuntimeValidation
545
551
 
546
552
  local_ctx = {}
547
- static_schemas = {}
548
553
 
549
554
  for node_id, node_result in _ctx_iter_items(ctx):
550
555
 
@@ -558,22 +563,10 @@ class RunModelFunc(NodeFunction[Bundle[_data.DataView]]):
558
563
  if node_id.name in model_def.inputs:
559
564
  input_name = node_id.name
560
565
  local_ctx[input_name] = node_result
561
- # At the moment, all model inputs have static schemas
562
- static_schemas[input_name] = model_def.inputs[input_name].schema
563
-
564
- # Add empty data views to the local context to hold model outputs
565
- # Assuming outputs are all defined with static schemas
566
-
567
- for output_name in model_def.outputs:
568
- output_schema = self.node.model_def.outputs[output_name].schema
569
- empty_data_view = _data.DataView.for_trac_schema(output_schema)
570
- local_ctx[output_name] = empty_data_view
571
- # At the moment, all model outputs have static schemas
572
- static_schemas[output_name] = output_schema
573
566
 
574
567
  # Run the model against the mapped local context
575
568
 
576
- trac_ctx = _ctx.TracContextImpl(self.node.model_def, self.model_class, local_ctx, static_schemas)
569
+ trac_ctx = _ctx.TracContextImpl(self.node.model_def, self.model_class, local_ctx, self.checkout_directory)
577
570
 
578
571
  try:
579
572
  model = self.model_class()
@@ -594,12 +587,16 @@ class RunModelFunc(NodeFunction[Bundle[_data.DataView]]):
594
587
  result: _data.DataView = local_ctx.get(output_name)
595
588
 
596
589
  if result is None or result.is_empty():
590
+
597
591
  if not output_schema.optional:
598
592
  model_name = self.model_class.__name__
599
593
  raise _ex.ERuntimeValidation(f"Missing required output [{output_name}] from model [{model_name}]")
600
594
 
601
- if result is not None:
602
- results[output_name] = result
595
+ # Create a placeholder for optional outputs that were not emitted
596
+ elif result is None:
597
+ result = _data.DataView.create_empty()
598
+
599
+ results[output_name] = result
603
600
 
604
601
  return results
605
602
 
@@ -16,6 +16,7 @@ from __future__ import annotations
16
16
 
17
17
  import dataclasses as dc
18
18
  import datetime as dt
19
+ import signal
19
20
  import threading
20
21
 
21
22
  import sys
@@ -54,6 +55,8 @@ class TracRuntime:
54
55
  _engine.ModelNodeProcessor: "model",
55
56
  _engine.DataNodeProcessor: "data"}
56
57
 
58
+ __DEFAULT_API_PORT = 9000
59
+
57
60
  def __init__(
58
61
  self,
59
62
  sys_config: tp.Union[str, pathlib.Path, _cfg.RuntimeConfig],
@@ -61,6 +64,7 @@ class TracRuntime:
61
64
  job_result_format: tp.Optional[str] = None,
62
65
  scratch_dir: tp.Union[str, pathlib.Path, None] = None,
63
66
  scratch_dir_persist: bool = False,
67
+ plugin_packages: tp.List[str] = None,
64
68
  dev_mode: bool = False):
65
69
 
66
70
  trac_version = _version.__version__
@@ -83,28 +87,34 @@ class TracRuntime:
83
87
  self._log.info(f"TRAC D.A.P. Python Runtime {trac_version}")
84
88
 
85
89
  self._sys_config = sys_config if isinstance(sys_config, _cfg.RuntimeConfig) else None
86
- self._sys_config_path = pathlib.Path(sys_config) if not self._sys_config else None
90
+ self._sys_config_path = sys_config if not self._sys_config else None
87
91
  self._job_result_dir = job_result_dir
88
92
  self._job_result_format = job_result_format
89
93
  self._scratch_dir = scratch_dir
90
94
  self._scratch_dir_provided = True if scratch_dir is not None else False
91
95
  self._scratch_dir_persist = scratch_dir_persist
96
+ self._plugin_packages = plugin_packages or []
92
97
  self._dev_mode = dev_mode
93
- self._server_enabled = False
94
- self._server_port = 0
95
98
 
99
+ # Runtime control
100
+ self._runtime_lock = threading.Lock()
101
+ self._runtime_event = threading.Condition(self._runtime_lock)
96
102
  self._pre_start_complete = False
103
+ self._shutdown_requested = False
104
+ self._oneshot_job = None
97
105
 
98
106
  # Top level resources
107
+ self._config_mgr: tp.Optional[_cparse.ConfigManager] = None
99
108
  self._models: tp.Optional[_models.ModelLoader] = None
100
109
  self._storage: tp.Optional[_storage.StorageManager] = None
101
110
 
102
111
  # The execution engine
103
112
  self._system: tp.Optional[_actors.ActorSystem] = None
104
113
  self._engine: tp.Optional[_engine.TracEngine] = None
105
- self._engine_event = threading.Condition()
106
114
 
107
115
  # Runtime API server
116
+ self._server_enabled = False
117
+ self._server_port = 0
108
118
  self._server = None
109
119
 
110
120
  self._jobs: tp.Dict[str, _RuntimeJobInfo] = dict()
@@ -134,21 +144,28 @@ class TracRuntime:
134
144
 
135
145
  self._prepare_scratch_dir()
136
146
 
137
- # Plugin manager and static API impl are singletons
138
- # If these methods are called multiple times, the second and subsequent calls are ignored
147
+ # Plugin manager, static API and guard rails are singletons
148
+ # Calling these methods multiple times is safe (e.g. for embedded or testing scenarios)
149
+ # However, plugins are never un-registered for the lifetime of the processes
139
150
 
140
151
  _plugins.PluginManager.register_core_plugins()
152
+
153
+ for plugin_package in self._plugin_packages:
154
+ _plugins.PluginManager.register_plugin_package(plugin_package)
155
+
141
156
  _static_api.StaticApiImpl.register_impl()
142
157
  _guard.PythonGuardRails.protect_dangerous_functions()
143
158
 
144
159
  # Load sys config (or use embedded), config errors are detected before start()
145
160
  # Job config can also be checked before start() by using load_job_config()
146
161
 
162
+ self._config_mgr = _cparse.ConfigManager.for_root_config(self._sys_config_path)
163
+
147
164
  if self._sys_config is None:
148
165
  sys_config_dev_mode = _dev_mode.DEV_MODE_SYS_CONFIG if self._dev_mode else None
149
- sys_config_parser = _cparse.ConfigParser(_cfg.RuntimeConfig, sys_config_dev_mode)
150
- sys_config_raw = sys_config_parser.load_raw_config(self._sys_config_path, config_file_name="system")
151
- self._sys_config = sys_config_parser.parse(sys_config_raw, self._sys_config_path)
166
+ self._sys_config = self._config_mgr.load_root_object(
167
+ _cfg.RuntimeConfig, sys_config_dev_mode,
168
+ config_file_name="system")
152
169
  else:
153
170
  self._log.info("Using embedded system config")
154
171
 
@@ -156,8 +173,15 @@ class TracRuntime:
156
173
  # I.e. it can be applied to embedded configs
157
174
 
158
175
  if self._dev_mode:
159
- config_dir = self._sys_config_path.parent if self._sys_config_path is not None else None
160
- self._sys_config = _dev_mode.DevModeTranslator.translate_sys_config(self._sys_config, config_dir)
176
+ self._sys_config = _dev_mode.DevModeTranslator.translate_sys_config(self._sys_config, self._config_mgr)
177
+
178
+ # Runtime API server is controlled by the sys config
179
+
180
+ if self._sys_config.runtimeApi is not None:
181
+ api_config = self._sys_config.runtimeApi
182
+ if api_config.enabled:
183
+ self._server_enabled = True
184
+ self._server_port = api_config.port or self.__DEFAULT_API_PORT
161
185
 
162
186
  self._pre_start_complete = True
163
187
 
@@ -196,7 +220,7 @@ class TracRuntime:
196
220
  # The server module pulls in all the gRPC dependencies, don't import it unless we have to
197
221
  import tracdap.rt._exec.server as _server
198
222
 
199
- self._server = _server.RuntimeApiServer(self._server_port)
223
+ self._server = _server.RuntimeApiServer(self._system, self._server_port)
200
224
  self._server.start()
201
225
 
202
226
  except Exception as e:
@@ -237,6 +261,28 @@ class TracRuntime:
237
261
  else:
238
262
  self._log.info("TRAC runtime has gone down cleanly")
239
263
 
264
+ def is_oneshot(self):
265
+ return not self._server_enabled
266
+
267
+ def run_until_done(self):
268
+
269
+ if self._server_enabled == False and len(self._jobs) == 0:
270
+ self._log.error("No job config supplied, TRAC runtime will not run")
271
+ raise _ex.EStartup("No job config supplied")
272
+
273
+ signal.signal(signal.SIGTERM, self._request_shutdown)
274
+ signal.signal(signal.SIGINT, self._request_shutdown)
275
+
276
+ with self._runtime_lock:
277
+ while not self._shutdown_requested:
278
+ self._runtime_event.wait()
279
+
280
+ def _request_shutdown(self, _signum = None, _frame = None):
281
+
282
+ with self._runtime_lock:
283
+ self._shutdown_requested = True
284
+ self._runtime_event.notify()
285
+
240
286
  def _prepare_scratch_dir(self):
241
287
 
242
288
  if not self._scratch_dir_provided:
@@ -274,20 +320,18 @@ class TracRuntime:
274
320
 
275
321
  if isinstance(job_config, _cfg.JobConfig):
276
322
  self._log.info("Using embedded job config")
277
- job_config_path = None
278
323
 
279
324
  else:
280
- job_config_path = job_config
281
325
  job_config_dev_mode = _dev_mode.DEV_MODE_JOB_CONFIG if self._dev_mode else None
282
- job_config_parser = _cparse.ConfigParser(_cfg.JobConfig, job_config_dev_mode)
283
- job_config_raw = job_config_parser.load_raw_config(job_config_path, config_file_name="job")
284
- job_config = job_config_parser.parse(job_config_raw, job_config_path)
326
+ job_config = self._config_mgr.load_config_object(
327
+ job_config, _cfg.JobConfig,
328
+ job_config_dev_mode,
329
+ config_file_name="job")
285
330
 
286
331
  if self._dev_mode:
287
- config_dir = job_config_path.parent if job_config_path is not None else None
288
332
  job_config = _dev_mode.DevModeTranslator.translate_job_config(
289
333
  self._sys_config, job_config,
290
- self._scratch_dir, config_dir,
334
+ self._scratch_dir, self._config_mgr,
291
335
  model_class)
292
336
 
293
337
  return job_config
@@ -297,7 +341,7 @@ class TracRuntime:
297
341
  job_key = _util.object_key(job_config.jobId)
298
342
  self._jobs[job_key] = _RuntimeJobInfo()
299
343
 
300
- self._system.send(
344
+ self._system.send_main(
301
345
  "submit_job", job_config,
302
346
  str(self._job_result_dir) if self._job_result_dir else "",
303
347
  self._job_result_format if self._job_result_format else "")
@@ -309,35 +353,34 @@ class TracRuntime:
309
353
  if job_key not in self._jobs:
310
354
  raise _ex.ETracInternal(f"Attempt to wait for a job that was never started")
311
355
 
312
- with self._engine_event:
313
- while True:
356
+ self._oneshot_job = job_key
314
357
 
315
- job_info = self._jobs[job_key]
358
+ self.run_until_done()
316
359
 
317
- if job_info.error is not None:
318
- raise job_info.error
360
+ job_info = self._jobs[job_key]
319
361
 
320
- if job_info.result is not None:
321
- return job_info.result
362
+ if job_info.error is not None:
363
+ raise job_info.error
322
364
 
323
- # TODO: Timeout / heartbeat
365
+ elif job_info.result is not None:
366
+ return job_info.result
324
367
 
325
- self._engine_event.wait(1)
368
+ else:
369
+ err = f"No result or error information is available for job [{job_key}]"
370
+ self._log.error(err)
371
+ raise _ex.ETracInternal(err)
326
372
 
327
373
  def _engine_callback(self, job_key, job_result, job_error):
328
374
 
329
- with self._engine_event:
330
-
331
- if job_result is not None:
332
- self._jobs[job_key].done = True
333
- self._jobs[job_key].result = job_result
334
- elif job_error is not None:
335
- self._jobs[job_key].done = True
336
- self._jobs[job_key].error = job_error
337
- else:
338
- pass
375
+ if job_result is not None:
376
+ self._jobs[job_key].done = True
377
+ self._jobs[job_key].result = job_result
378
+ elif job_error is not None:
379
+ self._jobs[job_key].done = True
380
+ self._jobs[job_key].error = job_error
339
381
 
340
- self._engine_event.notify()
382
+ if self._oneshot_job == job_key:
383
+ self._request_shutdown()
341
384
 
342
385
  # ------------------------------------------------------------------------------------------------------------------
343
386
  # Error handling