tracdap-runtime 0.6.2__py3-none-any.whl → 0.6.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. tracdap/rt/_exec/actors.py +87 -10
  2. tracdap/rt/_exec/dev_mode.py +9 -17
  3. tracdap/rt/_exec/engine.py +79 -14
  4. tracdap/rt/_exec/runtime.py +83 -40
  5. tracdap/rt/_exec/server.py +306 -29
  6. tracdap/rt/_impl/config_parser.py +219 -49
  7. tracdap/rt/_impl/grpc/codec.py +60 -5
  8. tracdap/rt/_impl/grpc/tracdap/api/internal/runtime_pb2.py +19 -19
  9. tracdap/rt/_impl/grpc/tracdap/api/internal/runtime_pb2.pyi +11 -9
  10. tracdap/rt/_impl/grpc/tracdap/api/internal/runtime_pb2_grpc.py +25 -25
  11. tracdap/rt/_impl/grpc/tracdap/metadata/model_pb2.py +28 -16
  12. tracdap/rt/_impl/grpc/tracdap/metadata/model_pb2.pyi +33 -6
  13. tracdap/rt/_impl/grpc/tracdap/metadata/object_pb2.py +8 -3
  14. tracdap/rt/_impl/grpc/tracdap/metadata/object_pb2.pyi +13 -2
  15. tracdap/rt/_impl/guard_rails.py +21 -0
  16. tracdap/rt/_impl/models.py +25 -0
  17. tracdap/rt/_impl/static_api.py +23 -9
  18. tracdap/rt/_impl/type_system.py +17 -0
  19. tracdap/rt/_impl/validation.py +10 -0
  20. tracdap/rt/_plugins/config_local.py +49 -0
  21. tracdap/rt/_version.py +1 -1
  22. tracdap/rt/api/hook.py +6 -3
  23. tracdap/rt/api/static_api.py +71 -21
  24. tracdap/rt/config/__init__.py +4 -4
  25. tracdap/rt/config/common.py +10 -0
  26. tracdap/rt/config/platform.py +0 -10
  27. tracdap/rt/config/runtime.py +2 -0
  28. tracdap/rt/ext/config.py +34 -0
  29. tracdap/rt/ext/embed.py +1 -3
  30. tracdap/rt/ext/plugins.py +47 -6
  31. tracdap/rt/launch/cli.py +4 -0
  32. tracdap/rt/launch/launch.py +34 -9
  33. tracdap/rt/metadata/__init__.py +17 -17
  34. tracdap/rt/metadata/model.py +6 -0
  35. tracdap/rt/metadata/object.py +3 -0
  36. {tracdap_runtime-0.6.2.dist-info → tracdap_runtime-0.6.3.dist-info}/METADATA +4 -4
  37. {tracdap_runtime-0.6.2.dist-info → tracdap_runtime-0.6.3.dist-info}/RECORD +40 -49
  38. {tracdap_runtime-0.6.2.dist-info → tracdap_runtime-0.6.3.dist-info}/WHEEL +1 -1
  39. tracdap/rt/_impl/grpc/tracdap/config/common_pb2.py +0 -55
  40. tracdap/rt/_impl/grpc/tracdap/config/common_pb2.pyi +0 -103
  41. tracdap/rt/_impl/grpc/tracdap/config/job_pb2.py +0 -42
  42. tracdap/rt/_impl/grpc/tracdap/config/job_pb2.pyi +0 -44
  43. tracdap/rt/_impl/grpc/tracdap/config/platform_pb2.py +0 -71
  44. tracdap/rt/_impl/grpc/tracdap/config/platform_pb2.pyi +0 -197
  45. tracdap/rt/_impl/grpc/tracdap/config/result_pb2.py +0 -37
  46. tracdap/rt/_impl/grpc/tracdap/config/result_pb2.pyi +0 -35
  47. tracdap/rt/_impl/grpc/tracdap/config/runtime_pb2.py +0 -42
  48. tracdap/rt/_impl/grpc/tracdap/config/runtime_pb2.pyi +0 -46
  49. tracdap/rt/ext/_guard.py +0 -37
  50. {tracdap_runtime-0.6.2.dist-info → tracdap_runtime-0.6.3.dist-info}/LICENSE +0 -0
  51. {tracdap_runtime-0.6.2.dist-info → tracdap_runtime-0.6.3.dist-info}/top_level.txt +0 -0
@@ -25,6 +25,7 @@ import queue
25
25
  import time
26
26
 
27
27
  import tracdap.rt._impl.util as util # noqa
28
+ import tracdap.rt._impl.validation as _val # noqa
28
29
  import tracdap.rt.exceptions as _ex
29
30
 
30
31
 
@@ -180,6 +181,49 @@ class ActorContext:
180
181
  return self.__error or self.__node.error
181
182
 
182
183
 
184
+ class ThreadsafeActor(Actor):
185
+
186
+ def __init__(self):
187
+ super().__init__()
188
+ self.__threadsafe: tp.Optional[ThreadsafeContext] = None
189
+
190
+ def threadsafe(self) -> ThreadsafeContext:
191
+ return self.__threadsafe
192
+
193
+
194
+ class ThreadsafeContext:
195
+
196
+ def __init__(self, node: ActorNode):
197
+ self.__node = node
198
+ self.__id = node.actor_id
199
+ self.__parent = node.parent.actor_id if node.parent is not None else None
200
+
201
+ def spawn(self, actor: Actor):
202
+ self.__node.event_loop.post_message(
203
+ None, lambda _:
204
+ self.__node.spawn(actor) and None)
205
+
206
+ def send(self, target_id: ActorId, message: str, *args, **kwargs):
207
+ self.__node.event_loop.post_message(
208
+ None, lambda _:
209
+ self.__node.send_message(self.__id, target_id, message, args, kwargs))
210
+
211
+ def send_parent(self, message: str, *args, **kwargs):
212
+ self.__node.event_loop.post_message(
213
+ None, lambda _:
214
+ self.__node.send_message(self.__id, self.__parent, message, args, kwargs))
215
+
216
+ def stop(self):
217
+ self.__node.event_loop.post_message(
218
+ None, lambda _:
219
+ self.__node.send_signal(self.__id, self.__id, SignalNames.STOP))
220
+
221
+ def fail(self, error: Exception):
222
+ self.__node.event_loop.post_message(
223
+ None, lambda _:
224
+ self.__node.send_signal(self.__id, self.__id, SignalNames.STOP, error))
225
+
226
+
183
227
  class EventLoop:
184
228
 
185
229
  _T_MSG = tp.TypeVar("_T_MSG")
@@ -340,7 +384,7 @@ class ActorNode:
340
384
  self.state: ActorState = ActorState.NOT_STARTED
341
385
  self.error: tp.Optional[Exception] = None
342
386
 
343
- def spawn(self, child_actor: Actor):
387
+ def spawn(self, child_actor: Actor) -> ActorId:
344
388
 
345
389
  if self._log.isEnabledFor(logging.DEBUG):
346
390
  self._log.debug(f"spawn [{self.actor_id}]: [{type(child_actor)}]")
@@ -355,6 +399,11 @@ class ActorNode:
355
399
  child_node = ActorNode(child_id, child_actor, self, self.system, event_loop)
356
400
  self.children[child_id] = child_node
357
401
 
402
+ # If this is a threadsafe actor, set up the threadsafe context
403
+ if isinstance(child_actor, ThreadsafeActor):
404
+ threadsafe = ThreadsafeContext(child_node)
405
+ child_actor._ThreadsafeActor__threadsafe = threadsafe
406
+
358
407
  child_node.send_signal(self.actor_id, child_id, SignalNames.START)
359
408
 
360
409
  return child_id
@@ -542,6 +591,12 @@ class ActorNode:
542
591
  if not self._check_message_target(signal):
543
592
  return
544
593
 
594
+ # Do not process signals after the actor has stopped
595
+ # This is common with e.g. STOP signals that propagate up and down the tree
596
+
597
+ if self.state in [ActorState.STOPPED, ActorState.FAILED]:
598
+ return
599
+
545
600
  # Call the signal receiver function
546
601
  # This gives the actor a chance to respond to the signal
547
602
 
@@ -768,10 +823,12 @@ class ActorNode:
768
823
  # Positional arg types
769
824
  for pos_param, pos_arg in zip(pos_params, args):
770
825
 
826
+ # If no type hint is available, allow anything through
827
+ # Otherwise, reuse the validator logic to type check individual args
771
828
  type_hint = type_hints.get(pos_param.name)
829
+ type_check = type_hint is None or _val.check_type(type_hint, pos_arg)
772
830
 
773
- # If no type hint is available, allow anything through
774
- if type_hint is not None and not isinstance(pos_arg, type_hint):
831
+ if not type_check:
775
832
  error = f"Invalid message: [{message}] -> {target_id} (wrong parameter type for '{pos_param.name}')"
776
833
  self._log.error(error)
777
834
  raise EBadActor(error)
@@ -780,20 +837,20 @@ class ActorNode:
780
837
  for kw_param in kw_params:
781
838
 
782
839
  kw_arg = kwargs.get(kw_param.name)
783
- type_hint = type_hints.get(kw_param.name)
784
840
 
785
841
  # If param has taken a default value, no type check is needed
786
842
  if kw_arg is None:
787
843
  continue
788
844
 
789
- # If no type hint is available, allow anything through
790
- if type_hint is not None and not isinstance(kw_arg, type_hint):
845
+ # Otherwise use the same type-validation logic as positional args
846
+ type_hint = type_hints.get(kw_param.name)
847
+ type_check = type_hint is None or _val.check_type(type_hint, kw_arg)
848
+
849
+ if not type_check:
791
850
  error = f"Invalid message: [{message}] -> {target_id} (wrong parameter type for '{kw_param.name}')"
792
851
  self._log.error(error)
793
852
  raise EBadActor(error)
794
853
 
795
- # TODO: Verify generics for both args and kwargs
796
-
797
854
 
798
855
  class RootActor(Actor):
799
856
 
@@ -864,11 +921,17 @@ class ActorSystem:
864
921
 
865
922
  self.__root_started = threading.Event()
866
923
  self.__root_stopped = threading.Event()
924
+
867
925
  self.__root_actor = RootActor(main_actor, self.__root_started, self.__root_stopped)
868
926
  self.__root_node = ActorNode(self.ROOT_ID, self.__root_actor, None, self, self.__system_event_loop)
869
927
 
870
928
  # Public API
871
929
 
930
+ def main_id(self) -> ActorId:
931
+ if not self.__root_started.is_set():
932
+ raise EBadActor("System has not started yet")
933
+ return self.__root_actor.main_id
934
+
872
935
  def start(self, wait=True):
873
936
 
874
937
  self.__system_thread.start()
@@ -913,12 +976,26 @@ class ActorSystem:
913
976
 
914
977
  return self.__root_node.error
915
978
 
916
- def send(self, message: str, *args, **kwargs):
979
+ def spawn_agent(self, agent: Actor) -> ActorId:
980
+
981
+ if not self.__root_started.is_set():
982
+ raise EBadActor("System has not started yet")
983
+
984
+ return self.__root_node.spawn(agent)
985
+
986
+ def send_main(self, message: str, *args, **kwargs):
917
987
 
918
988
  if self.__root_actor.main_id is None:
919
989
  raise EBadActor("System has not started yet")
920
990
 
921
- self.__root_node.send_message("/external", self.__root_actor.main_id, message, args, kwargs)
991
+ self.__root_node.send_message("/external", self.__root_actor.main_id, message, args, kwargs) # TODO
992
+
993
+ def send(self, actor_id: ActorId, message: str, *args, **kwargs):
994
+
995
+ if not self.__root_started.is_set():
996
+ raise EBadActor("System has not started yet")
997
+
998
+ self.__root_node.send_message("/external", actor_id, message, args, kwargs)
922
999
 
923
1000
  def _setup_event_loops(self, thread_pools: tp.Dict[str, int]):
924
1001
 
@@ -46,7 +46,7 @@ class DevModeTranslator:
46
46
  _log: tp.Optional[_util.logging.Logger] = None
47
47
 
48
48
  @classmethod
49
- def translate_sys_config(cls, sys_config: _cfg.RuntimeConfig, config_dir: tp.Optional[pathlib.Path]):
49
+ def translate_sys_config(cls, sys_config: _cfg.RuntimeConfig, config_mgr: _cfg_p.ConfigManager):
50
50
 
51
51
  cls._log.info(f"Applying dev mode config translation to system config")
52
52
 
@@ -56,7 +56,7 @@ class DevModeTranslator:
56
56
  sys_config.storage = _cfg.StorageConfig()
57
57
 
58
58
  sys_config = cls._add_integrated_repo(sys_config)
59
- sys_config = cls._resolve_relative_storage_root(sys_config, config_dir)
59
+ sys_config = cls._resolve_relative_storage_root(sys_config, config_mgr)
60
60
 
61
61
  return sys_config
62
62
 
@@ -66,7 +66,7 @@ class DevModeTranslator:
66
66
  sys_config: _cfg.RuntimeConfig,
67
67
  job_config: _cfg.JobConfig,
68
68
  scratch_dir: pathlib.Path,
69
- config_dir: tp.Optional[pathlib.Path],
69
+ config_mgr: _cfg_p.ConfigManager,
70
70
  model_class: tp.Optional[_api.TracModel.__class__]) \
71
71
  -> _cfg.JobConfig:
72
72
 
@@ -84,7 +84,7 @@ class DevModeTranslator:
84
84
 
85
85
  # Fow flows, load external flow definitions then perform auto-wiring and type inference
86
86
  if job_config.job.jobType == _meta.JobType.RUN_FLOW:
87
- job_config = cls._process_flow_definition(job_config, config_dir)
87
+ job_config = cls._process_flow_definition(job_config, config_mgr)
88
88
 
89
89
  # For run (model|flow) jobs, apply processing to the parameters, inputs and outputs
90
90
  if job_config.job.jobType in [_meta.JobType.RUN_MODEL, _meta.JobType.RUN_FLOW]:
@@ -109,7 +109,7 @@ class DevModeTranslator:
109
109
  @classmethod
110
110
  def _resolve_relative_storage_root(
111
111
  cls, sys_config: _cfg.RuntimeConfig,
112
- sys_config_path: tp.Optional[pathlib.Path]):
112
+ config_mgr: _cfg_p.ConfigManager):
113
113
 
114
114
  storage_config = copy.deepcopy(sys_config.storage)
115
115
 
@@ -128,6 +128,7 @@ class DevModeTranslator:
128
128
 
129
129
  cls._log.info(f"Resolving relative path for [{bucket_key}] local storage...")
130
130
 
131
+ sys_config_path = config_mgr.config_dir_path()
131
132
  if sys_config_path is not None:
132
133
  absolute_path = sys_config_path.joinpath(root_path).resolve()
133
134
  if absolute_path.exists():
@@ -291,7 +292,7 @@ class DevModeTranslator:
291
292
  return model_id, model_object
292
293
 
293
294
  @classmethod
294
- def _process_flow_definition(cls, job_config: _cfg.JobConfig, config_dir: pathlib.Path) -> _cfg.JobConfig:
295
+ def _process_flow_definition(cls, job_config: _cfg.JobConfig, config_mgr: _cfg_p.ConfigManager) -> _cfg.JobConfig:
295
296
 
296
297
  flow_details = job_config.job.runFlow.flow
297
298
 
@@ -305,21 +306,12 @@ class DevModeTranslator:
305
306
  cls._log.error(err)
306
307
  raise _ex.EConfigParse(err)
307
308
 
308
- flow_path = config_dir.joinpath(flow_details) if config_dir is not None else pathlib.Path(flow_details)
309
-
310
- if not flow_path.exists():
311
- err = f"Flow definition not available for [{flow_details}]: File not found ({flow_path})"
312
- cls._log.error(err)
313
- raise _ex.EConfigParse(err)
314
-
315
309
  flow_id = _util.new_object_id(_meta.ObjectType.FLOW)
316
310
  flow_key = _util.object_key(flow_id)
317
311
 
318
- cls._log.info(f"Generating flow definition for [{flow_details}] with ID = [{flow_key}]")
312
+ cls._log.info(f"Generating flow definition from [{flow_details}] with ID = [{flow_key}]")
319
313
 
320
- flow_parser = _cfg_p.ConfigParser(_meta.FlowDefinition)
321
- flow_raw_data = flow_parser.load_raw_config(flow_path, flow_path.name)
322
- flow_def = flow_parser.parse(flow_raw_data, flow_path.name)
314
+ flow_def = config_mgr.load_config_object(flow_details, _meta.FlowDefinition)
323
315
 
324
316
  # Auto-wiring and inference only applied to externally loaded flows for now
325
317
  flow_def = cls._autowire_flow(flow_def, job_config)
@@ -19,6 +19,7 @@ import dataclasses as dc
19
19
  import enum
20
20
  import typing as tp
21
21
 
22
+ import tracdap.rt.metadata as _meta
22
23
  import tracdap.rt.config as _cfg
23
24
  import tracdap.rt.exceptions as _ex
24
25
  import tracdap.rt._exec.actors as _actors
@@ -28,7 +29,6 @@ import tracdap.rt._impl.models as _models # noqa
28
29
  import tracdap.rt._impl.data as _data # noqa
29
30
  import tracdap.rt._impl.storage as _storage # noqa
30
31
  import tracdap.rt._impl.util as _util # noqa
31
- from .actors import Signal
32
32
 
33
33
  from .graph import NodeId
34
34
 
@@ -66,6 +66,18 @@ class _EngineContext:
66
66
  failed_nodes: tp.Set[NodeId] = dc.field(default_factory=set)
67
67
 
68
68
 
69
+ @dc.dataclass
70
+ class _JobState:
71
+
72
+ job_id: _meta.TagHeader
73
+ job_config: _cfg.JobConfig
74
+
75
+ actor_id: _actors.ActorId = None
76
+
77
+ job_result: _cfg.JobResult = None
78
+ job_error: Exception = None
79
+
80
+
69
81
  class TracEngine(_actors.Actor):
70
82
 
71
83
  """
@@ -88,7 +100,7 @@ class TracEngine(_actors.Actor):
88
100
  self._storage = storage
89
101
  self._notify_callback = notify_callback
90
102
 
91
- self._job_actors = dict()
103
+ self._jobs: tp.Dict[str, _JobState] = dict()
92
104
 
93
105
  def on_start(self):
94
106
 
@@ -98,7 +110,7 @@ class TracEngine(_actors.Actor):
98
110
 
99
111
  self._log.info("Engine shutdown complete")
100
112
 
101
- def on_signal(self, signal: Signal) -> tp.Optional[bool]:
113
+ def on_signal(self, signal: _actors.Signal) -> tp.Optional[bool]:
102
114
 
103
115
  # Failed signals can propagate from leaf nodes up the actor tree for a job
104
116
  # If the failure goes all the way up the tree without being handled, it will reach the engine node
@@ -110,8 +122,8 @@ class TracEngine(_actors.Actor):
110
122
  failed_job_key = None
111
123
 
112
124
  # Look for the job key corresponding to the failed actor
113
- for job_key, job_actor in self._job_actors.items():
114
- if job_actor == signal.sender:
125
+ for job_key, job_state in self._jobs.items():
126
+ if job_state.actor_id == signal.sender:
115
127
  failed_job_key = job_key
116
128
 
117
129
  # If the job is still live, call job_failed explicitly
@@ -147,19 +159,34 @@ class TracEngine(_actors.Actor):
147
159
  job_processor = JobProcessor(job_key, job_config, result_spec,self._models, self._storage)
148
160
  job_actor_id = self.actors().spawn(job_processor)
149
161
 
150
- job_actors = {**self._job_actors, job_key: job_actor_id}
151
- self._job_actors = job_actors
162
+ job_state = _JobState(job_config.jobId, job_config)
163
+ job_state.actor_id = job_actor_id
164
+
165
+ self._jobs[job_key] = job_state
166
+
167
+ @_actors.Message
168
+ def get_job_list(self):
169
+
170
+ job_list = list(map(self._get_job_info, self._jobs.keys()))
171
+ self.actors().reply("job_list", job_list)
172
+
173
+ @_actors.Message
174
+ def get_job_details(self, job_key: str, details: bool):
175
+
176
+ details = self._get_job_info(job_key, details)
177
+ self.actors().reply("job_details", details)
152
178
 
153
179
  @_actors.Message
154
180
  def job_succeeded(self, job_key: str, job_result: _cfg.JobResult):
155
181
 
156
182
  # Ignore duplicate messages from the job processor (can happen in unusual error cases)
157
- if job_key not in self._job_actors:
183
+ if job_key not in self._jobs:
158
184
  self._log.warning(f"Ignoring [job_succeeded] message, job [{job_key}] has already completed")
159
185
  return
160
186
 
161
187
  self._log.info(f"Recording job as successful: {job_key}")
162
188
 
189
+ self._jobs[job_key].job_result = job_result
163
190
  self._finalize_job(job_key)
164
191
 
165
192
  if self._notify_callback is not None:
@@ -169,12 +196,13 @@ class TracEngine(_actors.Actor):
169
196
  def job_failed(self, job_key: str, error: Exception):
170
197
 
171
198
  # Ignore duplicate messages from the job processor (can happen in unusual error cases)
172
- if job_key not in self._job_actors:
199
+ if job_key not in self._jobs:
173
200
  self._log.warning(f"Ignoring [job_failed] message, job [{job_key}] has already completed")
174
201
  return
175
202
 
176
203
  self._log.error(f"Recording job as failed: {job_key}")
177
204
 
205
+ self._jobs[job_key].job_error = error
178
206
  self._finalize_job(job_key)
179
207
 
180
208
  if self._notify_callback is not None:
@@ -182,10 +210,47 @@ class TracEngine(_actors.Actor):
182
210
 
183
211
  def _finalize_job(self, job_key: str):
184
212
 
185
- job_actors = self._job_actors
186
- job_actor_id = job_actors.pop(job_key)
187
- self.actors().stop(job_actor_id)
188
- self._job_actors = job_actors
213
+ # Stop the actor but keep the job state available for status / results queries
214
+
215
+ # In the future, job state will need to be expunged after some period of time
216
+ # For now each instance of the runtime only processes one job so no need to worry
217
+
218
+ job_state = self._jobs.get(job_key)
219
+ job_actor_id = job_state.actor_id if job_state is not None else None
220
+
221
+ if job_actor_id is not None:
222
+ self.actors().stop(job_actor_id)
223
+ job_state.actor_id = None
224
+
225
+ def _get_job_info(self, job_key: str, details: bool = False) -> tp.Optional[_cfg.JobResult]:
226
+
227
+ job_state = self._jobs.get(job_key)
228
+
229
+ if job_state is None:
230
+ return None
231
+
232
+ job_result = _cfg.JobResult()
233
+ job_result.jobId = job_state.job_id
234
+
235
+ if job_state.actor_id is not None:
236
+ job_result.statusCode = _meta.JobStatusCode.RUNNING
237
+
238
+ elif job_state.job_result is not None:
239
+ job_result.statusCode = job_state.job_result.statusCode
240
+ job_result.statusMessage = job_state.job_result.statusMessage
241
+ if details:
242
+ job_result.results = job_state.job_result.results or dict()
243
+
244
+ elif job_state.job_error is not None:
245
+ job_result.statusCode = _meta.JobStatusCode.FAILED
246
+ job_result.statusMessage = str(job_state.job_error.args[0])
247
+
248
+ else:
249
+ # Alternatively return UNKNOWN status or throw an error here
250
+ job_result.statusCode = _meta.JobStatusCode.FAILED
251
+ job_result.statusMessage = "No details available"
252
+
253
+ return job_result
189
254
 
190
255
 
191
256
  class JobProcessor(_actors.Actor):
@@ -218,7 +283,7 @@ class JobProcessor(_actors.Actor):
218
283
  self._log.info(f"Cleaning up job [{self.job_key}]")
219
284
  self._models.destroy_scope(self.job_key)
220
285
 
221
- def on_signal(self, signal: Signal) -> tp.Optional[bool]:
286
+ def on_signal(self, signal: _actors.Signal) -> tp.Optional[bool]:
222
287
 
223
288
  if signal.message == _actors.SignalNames.FAILED and isinstance(signal, _actors.ErrorSignal):
224
289
 
@@ -16,6 +16,7 @@ from __future__ import annotations
16
16
 
17
17
  import dataclasses as dc
18
18
  import datetime as dt
19
+ import signal
19
20
  import threading
20
21
 
21
22
  import sys
@@ -54,6 +55,8 @@ class TracRuntime:
54
55
  _engine.ModelNodeProcessor: "model",
55
56
  _engine.DataNodeProcessor: "data"}
56
57
 
58
+ __DEFAULT_API_PORT = 9000
59
+
57
60
  def __init__(
58
61
  self,
59
62
  sys_config: tp.Union[str, pathlib.Path, _cfg.RuntimeConfig],
@@ -61,6 +64,7 @@ class TracRuntime:
61
64
  job_result_format: tp.Optional[str] = None,
62
65
  scratch_dir: tp.Union[str, pathlib.Path, None] = None,
63
66
  scratch_dir_persist: bool = False,
67
+ plugin_packages: tp.List[str] = None,
64
68
  dev_mode: bool = False):
65
69
 
66
70
  trac_version = _version.__version__
@@ -83,28 +87,34 @@ class TracRuntime:
83
87
  self._log.info(f"TRAC D.A.P. Python Runtime {trac_version}")
84
88
 
85
89
  self._sys_config = sys_config if isinstance(sys_config, _cfg.RuntimeConfig) else None
86
- self._sys_config_path = pathlib.Path(sys_config) if not self._sys_config else None
90
+ self._sys_config_path = sys_config if not self._sys_config else None
87
91
  self._job_result_dir = job_result_dir
88
92
  self._job_result_format = job_result_format
89
93
  self._scratch_dir = scratch_dir
90
94
  self._scratch_dir_provided = True if scratch_dir is not None else False
91
95
  self._scratch_dir_persist = scratch_dir_persist
96
+ self._plugin_packages = plugin_packages or []
92
97
  self._dev_mode = dev_mode
93
- self._server_enabled = False
94
- self._server_port = 0
95
98
 
99
+ # Runtime control
100
+ self._runtime_lock = threading.Lock()
101
+ self._runtime_event = threading.Condition(self._runtime_lock)
96
102
  self._pre_start_complete = False
103
+ self._shutdown_requested = False
104
+ self._oneshot_job = None
97
105
 
98
106
  # Top level resources
107
+ self._config_mgr: tp.Optional[_cparse.ConfigManager] = None
99
108
  self._models: tp.Optional[_models.ModelLoader] = None
100
109
  self._storage: tp.Optional[_storage.StorageManager] = None
101
110
 
102
111
  # The execution engine
103
112
  self._system: tp.Optional[_actors.ActorSystem] = None
104
113
  self._engine: tp.Optional[_engine.TracEngine] = None
105
- self._engine_event = threading.Condition()
106
114
 
107
115
  # Runtime API server
116
+ self._server_enabled = False
117
+ self._server_port = 0
108
118
  self._server = None
109
119
 
110
120
  self._jobs: tp.Dict[str, _RuntimeJobInfo] = dict()
@@ -134,21 +144,28 @@ class TracRuntime:
134
144
 
135
145
  self._prepare_scratch_dir()
136
146
 
137
- # Plugin manager and static API impl are singletons
138
- # If these methods are called multiple times, the second and subsequent calls are ignored
147
+ # Plugin manager, static API and guard rails are singletons
148
+ # Calling these methods multiple times is safe (e.g. for embedded or testing scenarios)
149
+ # However, plugins are never un-registered for the lifetime of the processes
139
150
 
140
151
  _plugins.PluginManager.register_core_plugins()
152
+
153
+ for plugin_package in self._plugin_packages:
154
+ _plugins.PluginManager.register_plugin_package(plugin_package)
155
+
141
156
  _static_api.StaticApiImpl.register_impl()
142
157
  _guard.PythonGuardRails.protect_dangerous_functions()
143
158
 
144
159
  # Load sys config (or use embedded), config errors are detected before start()
145
160
  # Job config can also be checked before start() by using load_job_config()
146
161
 
162
+ self._config_mgr = _cparse.ConfigManager.for_root_config(self._sys_config_path)
163
+
147
164
  if self._sys_config is None:
148
165
  sys_config_dev_mode = _dev_mode.DEV_MODE_SYS_CONFIG if self._dev_mode else None
149
- sys_config_parser = _cparse.ConfigParser(_cfg.RuntimeConfig, sys_config_dev_mode)
150
- sys_config_raw = sys_config_parser.load_raw_config(self._sys_config_path, config_file_name="system")
151
- self._sys_config = sys_config_parser.parse(sys_config_raw, self._sys_config_path)
166
+ self._sys_config = self._config_mgr.load_root_object(
167
+ _cfg.RuntimeConfig, sys_config_dev_mode,
168
+ config_file_name="system")
152
169
  else:
153
170
  self._log.info("Using embedded system config")
154
171
 
@@ -156,8 +173,15 @@ class TracRuntime:
156
173
  # I.e. it can be applied to embedded configs
157
174
 
158
175
  if self._dev_mode:
159
- config_dir = self._sys_config_path.parent if self._sys_config_path is not None else None
160
- self._sys_config = _dev_mode.DevModeTranslator.translate_sys_config(self._sys_config, config_dir)
176
+ self._sys_config = _dev_mode.DevModeTranslator.translate_sys_config(self._sys_config, self._config_mgr)
177
+
178
+ # Runtime API server is controlled by the sys config
179
+
180
+ if self._sys_config.runtimeApi is not None:
181
+ api_config = self._sys_config.runtimeApi
182
+ if api_config.enabled:
183
+ self._server_enabled = True
184
+ self._server_port = api_config.port or self.__DEFAULT_API_PORT
161
185
 
162
186
  self._pre_start_complete = True
163
187
 
@@ -196,7 +220,7 @@ class TracRuntime:
196
220
  # The server module pulls in all the gRPC dependencies, don't import it unless we have to
197
221
  import tracdap.rt._exec.server as _server
198
222
 
199
- self._server = _server.RuntimeApiServer(self._server_port)
223
+ self._server = _server.RuntimeApiServer(self._system, self._server_port)
200
224
  self._server.start()
201
225
 
202
226
  except Exception as e:
@@ -237,6 +261,28 @@ class TracRuntime:
237
261
  else:
238
262
  self._log.info("TRAC runtime has gone down cleanly")
239
263
 
264
+ def is_oneshot(self):
265
+ return not self._server_enabled
266
+
267
+ def run_until_done(self):
268
+
269
+ if self._server_enabled == False and len(self._jobs) == 0:
270
+ self._log.error("No job config supplied, TRAC runtime will not run")
271
+ raise _ex.EStartup("No job config supplied")
272
+
273
+ signal.signal(signal.SIGTERM, self._request_shutdown)
274
+ signal.signal(signal.SIGINT, self._request_shutdown)
275
+
276
+ with self._runtime_lock:
277
+ while not self._shutdown_requested:
278
+ self._runtime_event.wait()
279
+
280
+ def _request_shutdown(self, _signum = None, _frame = None):
281
+
282
+ with self._runtime_lock:
283
+ self._shutdown_requested = True
284
+ self._runtime_event.notify()
285
+
240
286
  def _prepare_scratch_dir(self):
241
287
 
242
288
  if not self._scratch_dir_provided:
@@ -274,20 +320,18 @@ class TracRuntime:
274
320
 
275
321
  if isinstance(job_config, _cfg.JobConfig):
276
322
  self._log.info("Using embedded job config")
277
- job_config_path = None
278
323
 
279
324
  else:
280
- job_config_path = job_config
281
325
  job_config_dev_mode = _dev_mode.DEV_MODE_JOB_CONFIG if self._dev_mode else None
282
- job_config_parser = _cparse.ConfigParser(_cfg.JobConfig, job_config_dev_mode)
283
- job_config_raw = job_config_parser.load_raw_config(job_config_path, config_file_name="job")
284
- job_config = job_config_parser.parse(job_config_raw, job_config_path)
326
+ job_config = self._config_mgr.load_config_object(
327
+ job_config, _cfg.JobConfig,
328
+ job_config_dev_mode,
329
+ config_file_name="job")
285
330
 
286
331
  if self._dev_mode:
287
- config_dir = job_config_path.parent if job_config_path is not None else None
288
332
  job_config = _dev_mode.DevModeTranslator.translate_job_config(
289
333
  self._sys_config, job_config,
290
- self._scratch_dir, config_dir,
334
+ self._scratch_dir, self._config_mgr,
291
335
  model_class)
292
336
 
293
337
  return job_config
@@ -297,7 +341,7 @@ class TracRuntime:
297
341
  job_key = _util.object_key(job_config.jobId)
298
342
  self._jobs[job_key] = _RuntimeJobInfo()
299
343
 
300
- self._system.send(
344
+ self._system.send_main(
301
345
  "submit_job", job_config,
302
346
  str(self._job_result_dir) if self._job_result_dir else "",
303
347
  self._job_result_format if self._job_result_format else "")
@@ -309,35 +353,34 @@ class TracRuntime:
309
353
  if job_key not in self._jobs:
310
354
  raise _ex.ETracInternal(f"Attempt to wait for a job that was never started")
311
355
 
312
- with self._engine_event:
313
- while True:
356
+ self._oneshot_job = job_key
314
357
 
315
- job_info = self._jobs[job_key]
358
+ self.run_until_done()
316
359
 
317
- if job_info.error is not None:
318
- raise job_info.error
360
+ job_info = self._jobs[job_key]
319
361
 
320
- if job_info.result is not None:
321
- return job_info.result
362
+ if job_info.error is not None:
363
+ raise job_info.error
322
364
 
323
- # TODO: Timeout / heartbeat
365
+ elif job_info.result is not None:
366
+ return job_info.result
324
367
 
325
- self._engine_event.wait(1)
368
+ else:
369
+ err = f"No result or error information is available for job [{job_key}]"
370
+ self._log.error(err)
371
+ raise _ex.ETracInternal(err)
326
372
 
327
373
  def _engine_callback(self, job_key, job_result, job_error):
328
374
 
329
- with self._engine_event:
330
-
331
- if job_result is not None:
332
- self._jobs[job_key].done = True
333
- self._jobs[job_key].result = job_result
334
- elif job_error is not None:
335
- self._jobs[job_key].done = True
336
- self._jobs[job_key].error = job_error
337
- else:
338
- pass
375
+ if job_result is not None:
376
+ self._jobs[job_key].done = True
377
+ self._jobs[job_key].result = job_result
378
+ elif job_error is not None:
379
+ self._jobs[job_key].done = True
380
+ self._jobs[job_key].error = job_error
339
381
 
340
- self._engine_event.notify()
382
+ if self._oneshot_job == job_key:
383
+ self._request_shutdown()
341
384
 
342
385
  # ------------------------------------------------------------------------------------------------------------------
343
386
  # Error handling