tracdap-runtime 0.9.0b1__py3-none-any.whl → 0.9.0b2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. tracdap/rt/_impl/core/config_parser.py +29 -3
  2. tracdap/rt/_impl/core/data.py +93 -51
  3. tracdap/rt/_impl/core/repos.py +15 -13
  4. tracdap/rt/_impl/core/storage.py +17 -12
  5. tracdap/rt/_impl/core/struct.py +254 -60
  6. tracdap/rt/_impl/core/util.py +94 -23
  7. tracdap/rt/_impl/exec/context.py +35 -8
  8. tracdap/rt/_impl/exec/dev_mode.py +60 -40
  9. tracdap/rt/_impl/exec/engine.py +44 -50
  10. tracdap/rt/_impl/exec/functions.py +12 -8
  11. tracdap/rt/_impl/exec/graph.py +3 -3
  12. tracdap/rt/_impl/exec/graph_builder.py +22 -5
  13. tracdap/rt/_impl/grpc/codec.py +4 -11
  14. tracdap/rt/_impl/grpc/tracdap/metadata/data_pb2.py +36 -34
  15. tracdap/rt/_impl/grpc/tracdap/metadata/data_pb2.pyi +37 -43
  16. tracdap/rt/_impl/grpc/tracdap/metadata/job_pb2.py +64 -64
  17. tracdap/rt/_impl/grpc/tracdap/metadata/type_pb2.py +22 -18
  18. tracdap/rt/_impl/grpc/tracdap/metadata/type_pb2.pyi +15 -2
  19. tracdap/rt/_impl/runtime.py +2 -16
  20. tracdap/rt/_impl/static_api.py +5 -6
  21. tracdap/rt/_plugins/format_csv.py +2 -2
  22. tracdap/rt/_plugins/storage_aws.py +165 -150
  23. tracdap/rt/_plugins/storage_azure.py +17 -11
  24. tracdap/rt/_plugins/storage_gcp.py +35 -18
  25. tracdap/rt/_version.py +1 -1
  26. tracdap/rt/api/model_api.py +45 -0
  27. tracdap/rt/config/__init__.py +8 -10
  28. tracdap/rt/config/common.py +0 -16
  29. tracdap/rt/config/job.py +4 -0
  30. tracdap/rt/config/platform.py +9 -32
  31. tracdap/rt/config/runtime.py +4 -11
  32. tracdap/rt/config/tenant.py +28 -0
  33. tracdap/rt/launch/cli.py +0 -8
  34. tracdap/rt/launch/launch.py +1 -3
  35. tracdap/rt/metadata/__init__.py +18 -19
  36. tracdap/rt/metadata/data.py +19 -31
  37. tracdap/rt/metadata/job.py +1 -1
  38. tracdap/rt/metadata/type.py +9 -5
  39. {tracdap_runtime-0.9.0b1.dist-info → tracdap_runtime-0.9.0b2.dist-info}/METADATA +3 -3
  40. {tracdap_runtime-0.9.0b1.dist-info → tracdap_runtime-0.9.0b2.dist-info}/RECORD +43 -42
  41. {tracdap_runtime-0.9.0b1.dist-info → tracdap_runtime-0.9.0b2.dist-info}/WHEEL +1 -1
  42. {tracdap_runtime-0.9.0b1.dist-info → tracdap_runtime-0.9.0b2.dist-info}/licenses/LICENSE +0 -0
  43. {tracdap_runtime-0.9.0b1.dist-info → tracdap_runtime-0.9.0b2.dist-info}/top_level.txt +0 -0
@@ -58,11 +58,6 @@ class DevModeTranslator:
58
58
 
59
59
  cls._log.info(f"Applying dev mode config translation to system config")
60
60
 
61
- # TODO: In code gen, default object types to a new object unless the field is marked as optional
62
- # This would match the general semantics of protobuf
63
- if sys_config.storage is None:
64
- sys_config.storage = _cfg.StorageConfig()
65
-
66
61
  sys_config = cls._add_integrated_repo(sys_config)
67
62
  sys_config = cls._process_storage(sys_config, config_mgr)
68
63
 
@@ -73,11 +68,12 @@ class DevModeTranslator:
73
68
 
74
69
  # Add the integrated model repo trac_integrated
75
70
 
76
- integrated_repo_config = _cfg.PluginConfig(
71
+ integrated_repo_config = _meta.ResourceDefinition(
72
+ resourceType=_meta.ResourceType.MODEL_REPOSITORY,
77
73
  protocol="integrated",
78
74
  properties={})
79
75
 
80
- sys_config.repositories["trac_integrated"] = integrated_repo_config
76
+ sys_config.resources["trac_integrated"] = integrated_repo_config
81
77
 
82
78
  return sys_config
83
79
 
@@ -86,24 +82,17 @@ class DevModeTranslator:
86
82
  cls, sys_config: _cfg.RuntimeConfig,
87
83
  config_mgr: _cfg_p.ConfigManager):
88
84
 
89
- storage_config = copy.deepcopy(sys_config.storage)
90
- storage_config.defaultLayout = _meta.StorageLayout.DEVELOPER_LAYOUT
91
-
92
- for bucket_key, bucket_config in storage_config.buckets.items():
93
- storage_config.buckets[bucket_key] = cls._resolve_storage_location(
94
- bucket_key, bucket_config, config_mgr)
95
-
96
- for bucket_key, bucket_config in storage_config.external.items():
97
- storage_config.external[bucket_key] = cls._resolve_storage_location(
98
- bucket_key, bucket_config, config_mgr)
85
+ sys_config.properties[_cfg_p.ConfigKeys.STORAGE_DEFAULT_LAYOUT] = _meta.StorageLayout.DEVELOPER_LAYOUT.name
99
86
 
100
- sys_config = copy.copy(sys_config)
101
- sys_config.storage = storage_config
87
+ for resource_key, resource in sys_config.resources.items():
88
+ if resource.resourceType in [_meta.ResourceType.INTERNAL_STORAGE, _meta.ResourceType.EXTERNAL_STORAGE]:
89
+ sys_config.resources[resource_key] = cls._resolve_storage_location(
90
+ resource_key, resource, config_mgr)
102
91
 
103
92
  return sys_config
104
93
 
105
94
  @classmethod
106
- def _resolve_storage_location(cls, bucket_key, bucket_config, config_mgr: _cfg_p.ConfigManager):
95
+ def _resolve_storage_location(cls, bucket_key, bucket_config: _meta.ResourceDefinition, config_mgr: _cfg_p.ConfigManager):
107
96
 
108
97
  if bucket_config.protocol != "LOCAL":
109
98
  return bucket_config
@@ -167,6 +156,9 @@ class DevModeTranslator:
167
156
  job_config, job_def = self.translate_job_def(job_config, job_def, model_class)
168
157
  job_config.job = job_def
169
158
 
159
+ # Include some basic tags and attributes in the generated metadata
160
+ job_config = self.generate_dev_mode_tags(job_config)
161
+
170
162
  return job_config
171
163
 
172
164
  finally:
@@ -225,6 +217,8 @@ class DevModeTranslator:
225
217
  -> _cfg.JobConfig:
226
218
 
227
219
  obj_key = _util.object_key(obj_id)
220
+
221
+ job_config.objectMapping[obj_key] = obj_id
228
222
  job_config.objects[obj_key] = obj
229
223
 
230
224
  return job_config
@@ -799,7 +793,6 @@ class DevModeTranslator:
799
793
  else:
800
794
  return job_config, job_def
801
795
 
802
- job_metadata = job_config.objects
803
796
  job_inputs = job_detail.inputs
804
797
  job_outputs = job_detail.outputs
805
798
  job_prior_outputs = job_detail.priorOutputs
@@ -810,7 +803,7 @@ class DevModeTranslator:
810
803
  raise _ex.EJobValidation(f"Missing required input [{key}]")
811
804
  continue
812
805
  supplied_input = job_inputs.pop(key) if key in job_inputs else None
813
- input_selector = self._process_socket(key, schema, supplied_input, job_metadata, is_output=False)
806
+ input_selector = self._process_socket(key, schema, supplied_input, job_config, is_output=False)
814
807
  if input_selector is not None:
815
808
  job_inputs[key] = input_selector
816
809
 
@@ -818,28 +811,28 @@ class DevModeTranslator:
818
811
  if key not in job_outputs:
819
812
  raise _ex.EJobValidation(f"Missing required output [{key}]")
820
813
  supplied_output = job_outputs.pop(key)
821
- output_selector = self._process_socket(key, schema, supplied_output, job_metadata, is_output=True)
814
+ output_selector = self._process_socket(key, schema, supplied_output, job_config, is_output=True)
822
815
  if output_selector is not None:
823
816
  job_prior_outputs[key] = output_selector
824
817
 
825
818
  return job_config, job_def
826
819
 
827
- def _process_socket(self, key, socket, supplied_value, job_metadata, is_output) -> _meta.TagSelector:
820
+ def _process_socket(self, key, socket, supplied_value, job_config, is_output) -> _meta.TagSelector:
828
821
 
829
822
  if socket.objectType == _meta.ObjectType.DATA:
830
823
  schema = socket.schema if socket and not socket.dynamic else None
831
- return self._process_data_socket(key, supplied_value, schema, job_metadata, is_output)
824
+ return self._process_data_socket(key, supplied_value, schema, job_config, is_output)
832
825
 
833
826
  elif socket.objectType == _meta.ObjectType.FILE:
834
827
  file_type = socket.fileType
835
- return self._process_file_socket(key, supplied_value, file_type, job_metadata, is_output)
828
+ return self._process_file_socket(key, supplied_value, file_type, job_config, is_output)
836
829
 
837
830
  else:
838
831
  raise _ex.EUnexpected()
839
832
 
840
833
  def _process_data_socket(
841
834
  self, data_key, data_value, schema: tp.Optional[_meta.SchemaDefinition],
842
- job_metadata: tp.Dict[str, _meta.ObjectDefinition], is_output: bool)\
835
+ job_config: _cfg.JobConfig, is_output: bool)\
843
836
  -> _meta.TagSelector:
844
837
 
845
838
  data_id = _util.new_object_id(_meta.ObjectType.DATA)
@@ -847,8 +840,8 @@ class DevModeTranslator:
847
840
 
848
841
  if isinstance(data_value, str):
849
842
  storage_path = data_value
850
- storage_key = self._sys_config.storage.defaultBucket
851
- storage_format = self.infer_format(storage_path, self._sys_config.storage, schema)
843
+ storage_key = _util.read_property(self._sys_config.properties, _cfg_p.ConfigKeys.STORAGE_DEFAULT_LOCATION)
844
+ storage_format = self.infer_format(storage_path, self._sys_config, schema)
852
845
 
853
846
  elif isinstance(data_value, dict):
854
847
 
@@ -857,8 +850,8 @@ class DevModeTranslator:
857
850
  if not storage_path:
858
851
  raise _ex.EConfigParse(f"Invalid configuration for input [{data_key}] (missing required value 'path'")
859
852
 
860
- storage_key = data_value.get("storageKey") or self._sys_config.storage.defaultBucket
861
- storage_format = data_value.get("format") or self.infer_format(storage_path, self._sys_config.storage, schema)
853
+ storage_key = data_value.get("storageKey") or _util.read_property(self._sys_config.properties, _cfg_p.ConfigKeys.STORAGE_DEFAULT_LOCATION)
854
+ storage_format = data_value.get("format") or self.infer_format(storage_path, self._sys_config, schema)
862
855
 
863
856
  else:
864
857
  raise _ex.EConfigParse(f"Invalid configuration for input '{data_key}'")
@@ -895,14 +888,14 @@ class DevModeTranslator:
895
888
  storage_id, storage_key, storage_path, storage_format,
896
889
  data_item, incarnation_index)
897
890
 
898
- job_metadata[_util.object_key(data_id)] = data_obj
899
- job_metadata[_util.object_key(storage_id)] = storage_obj
891
+ self._add_job_metadata(job_config, data_id, data_obj)
892
+ self._add_job_metadata(job_config, storage_id, storage_obj)
900
893
 
901
894
  return _util.selector_for(data_id)
902
895
 
903
896
  def _process_file_socket(
904
897
  self, file_key, file_value, file_type: _meta.FileType,
905
- job_metadata: tp.Dict[str, _meta.ObjectDefinition], is_output: bool) \
898
+ job_config: _cfg.JobConfig, is_output: bool) \
906
899
  -> tp.Optional[_meta.TagSelector]:
907
900
 
908
901
  file_id = _util.new_object_id(_meta.ObjectType.FILE)
@@ -910,12 +903,12 @@ class DevModeTranslator:
910
903
 
911
904
  if isinstance(file_value, str):
912
905
 
913
- storage_key = self._sys_config.storage.defaultBucket
906
+ storage_key = _util.read_property(self._sys_config.properties, _cfg_p.ConfigKeys.STORAGE_DEFAULT_LOCATION)
914
907
  storage_path = file_value
915
908
 
916
909
  elif isinstance(file_value, dict):
917
910
 
918
- storage_key = file_value.get("storageKey") or self._sys_config.storage.defaultBucket
911
+ storage_key = file_value.get("storageKey") or _util.read_property(self._sys_config.properties, _cfg_p.ConfigKeys.STORAGE_DEFAULT_LOCATION)
919
912
  storage_path = file_value.get("path")
920
913
 
921
914
  if not storage_path:
@@ -956,13 +949,13 @@ class DevModeTranslator:
956
949
  storage_id, storage_key, storage_path, storage_format,
957
950
  data_item, incarnation_index=0)
958
951
 
959
- job_metadata[_util.object_key(file_id)] = file_obj
960
- job_metadata[_util.object_key(storage_id)] = storage_obj
952
+ self._add_job_metadata(job_config, file_id, file_obj)
953
+ self._add_job_metadata(job_config, storage_id, storage_obj)
961
954
 
962
955
  return _util.selector_for(file_id)
963
956
 
964
957
  @staticmethod
965
- def infer_format(storage_path: str, storage_config: _cfg.StorageConfig, schema: tp.Optional[_meta.SchemaDefinition]):
958
+ def infer_format(storage_path: str, sys_config: _cfg.RuntimeConfig, schema: tp.Optional[_meta.SchemaDefinition]):
966
959
 
967
960
  schema_type = schema.schemaType if schema and schema.schemaType else _meta.SchemaType.TABLE
968
961
 
@@ -976,7 +969,7 @@ class DevModeTranslator:
976
969
  return extension[1:] if extension.startswith(".") else extension
977
970
 
978
971
  else:
979
- return storage_config.defaultFormat
972
+ return _util.read_property(sys_config.properties, _cfg_p.ConfigKeys.STORAGE_DEFAULT_FORMAT, "CSV")
980
973
 
981
974
  def _find_latest_version(self, storage_key, storage_path):
982
975
 
@@ -1071,6 +1064,33 @@ class DevModeTranslator:
1071
1064
 
1072
1065
  return _meta.ObjectDefinition(objectType=_meta.ObjectType.STORAGE, storage=storage_def)
1073
1066
 
1067
+ @classmethod
1068
+ def generate_dev_mode_tags(cls, job_config: _cfg.JobConfig) -> _cfg.JobConfig:
1069
+
1070
+ job_config = copy.copy(job_config)
1071
+
1072
+ for key, object_id in job_config.objectMapping.items():
1073
+ if key not in job_config.tags:
1074
+
1075
+ tag = _meta.Tag(header=object_id)
1076
+
1077
+ timestamp = _types.MetadataCodec.convert_datetime_value(object_id.objectTimestamp.isoDatetime)
1078
+ user_id = _types.MetadataCodec.encode_value("local_user")
1079
+ user_name = _types.MetadataCodec.encode_value("Local User")
1080
+
1081
+ tag.attrs["trac_dev_mode"] = _types.MetadataCodec.encode_value(True)
1082
+
1083
+ tag.attrs["trac_create_time"] = timestamp
1084
+ tag.attrs["trac_create_user_id"] = user_id
1085
+ tag.attrs["trac_create_user_name"] = user_name
1086
+
1087
+ tag.attrs["trac_update_time"] = timestamp
1088
+ tag.attrs["trac_update_user_id"] = user_id
1089
+ tag.attrs["trac_update_user_name"] = user_name
1090
+
1091
+ job_config.tags[key] = tag
1092
+
1093
+ return job_config
1074
1094
 
1075
1095
 
1076
1096
  DevModeTranslator._log = _logging.logger_for_class(DevModeTranslator)
@@ -17,7 +17,6 @@ import copy as cp
17
17
  import dataclasses as dc
18
18
  import enum
19
19
  import io
20
- import pathlib
21
20
  import typing as tp
22
21
 
23
22
  import tracdap.rt.metadata as _meta
@@ -83,14 +82,6 @@ class _EngineContext:
83
82
  pending_nodes, active_nodes, succeeded_nodes, failed_nodes)
84
83
 
85
84
 
86
- @dc.dataclass
87
- class _JobResultSpec:
88
-
89
- save_result: bool = False
90
- result_dir: tp.Union[str, pathlib.Path] = None
91
- result_format: str = None
92
-
93
-
94
85
  @dc.dataclass
95
86
  class _JobLog:
96
87
 
@@ -117,19 +108,16 @@ class _JobLog:
117
108
  class _JobState:
118
109
 
119
110
  job_id: _meta.TagHeader
111
+ job_config: _cfg.JobConfig = None
112
+ parent_key: str = None
120
113
 
121
114
  actor_id: _actors.ActorId = None
122
115
  monitors: tp.List[_actors.ActorId] = dc.field(default_factory=list)
116
+ job_log: _JobLog = None
123
117
 
124
- job_config: _cfg.JobConfig = None
125
118
  job_result: _cfg.JobResult = None
126
119
  job_error: Exception = None
127
120
 
128
- parent_key: str = None
129
- result_spec: _JobResultSpec = None
130
-
131
- job_log: _JobLog = None
132
-
133
121
 
134
122
  class TracEngine(_actors.Actor):
135
123
 
@@ -197,25 +185,25 @@ class TracEngine(_actors.Actor):
197
185
  return super().on_signal(signal)
198
186
 
199
187
  @_actors.Message
200
- def submit_job(
201
- self, job_config: _cfg.JobConfig,
202
- job_result_dir: str,
203
- job_result_format: str):
188
+ def submit_job(self, job_config: _cfg.JobConfig):
204
189
 
205
190
  job_key = _util.object_key(job_config.jobId)
206
191
 
207
192
  self._log.info(f"Received a new job: [{job_key}]")
208
193
 
209
- result_needed = bool(job_result_dir)
210
- result_spec = _JobResultSpec(result_needed, job_result_dir, job_result_format)
211
- job_log = _JobLog(log_file_needed=result_needed)
212
-
213
194
  job_state = _JobState(job_config.jobId)
214
- job_state.job_log = job_log
195
+ job_state.job_config = job_config
196
+
197
+ job_logs_enabled = _util.read_property(
198
+ job_config.properties,
199
+ _cfg_p.ConfigKeys.RESULT_LOGS_ENABLED,
200
+ False, bool)
201
+
202
+ job_log = _JobLog(log_file_needed=job_logs_enabled)
215
203
 
216
204
  job_processor = JobProcessor(
217
205
  self._sys_config, self._models, self._storage,
218
- job_key, job_config, graph_spec=None, job_log=job_state.job_log)
206
+ job_key, job_config, graph_spec=None, job_log=job_log)
219
207
 
220
208
  job_actor_id = self.actors().spawn(job_processor)
221
209
 
@@ -226,8 +214,7 @@ class TracEngine(_actors.Actor):
226
214
 
227
215
  job_state.actor_id = job_actor_id
228
216
  job_state.monitors.append(job_monitor_id)
229
- job_state.job_config = job_config
230
- job_state.result_spec = result_spec
217
+ job_state.job_log = job_log
231
218
 
232
219
  self._jobs[job_key] = job_state
233
220
 
@@ -245,6 +232,10 @@ class TracEngine(_actors.Actor):
245
232
 
246
233
  self._log.info(f"Received a child job: [{child_key}] for parent [{parent_key}]")
247
234
 
235
+ # Copy job config properties from parent job
236
+ child_config = _cfg.JobConfig()
237
+ child_config.properties.update(parent_state.job_config.properties)
238
+
248
239
  child_job_log = _JobLog(parent_state.job_log)
249
240
 
250
241
  child_processor = JobProcessor(
@@ -254,11 +245,11 @@ class TracEngine(_actors.Actor):
254
245
  child_actor_id = self.actors().spawn(child_processor)
255
246
 
256
247
  child_state = _JobState(child_id)
257
- child_state.job_log = child_job_log
248
+ child_state.job_config = child_config
249
+ child_state.parent_key = parent_key
258
250
  child_state.actor_id = child_actor_id
259
251
  child_state.monitors.append(monitor_id)
260
- child_state.parent_key = parent_key
261
- child_state.result_spec = _JobResultSpec(False) # Do not output separate results for child jobs
252
+ child_state.job_log = child_job_log
262
253
 
263
254
  self._jobs[child_key] = child_state
264
255
 
@@ -332,8 +323,13 @@ class TracEngine(_actors.Actor):
332
323
 
333
324
  job_state = self._jobs.get(job_key)
334
325
 
335
- # Record output metadata if required (not needed for local runs or when using API server)
336
- if job_state.parent_key is None and job_state.result_spec.save_result:
326
+ result_enabled = _util.read_property(
327
+ job_state.job_config.properties,
328
+ _cfg_p.ConfigKeys.RESULT_ENABLED,
329
+ False, bool)
330
+
331
+ # Record output metadata if required
332
+ if result_enabled and job_state.parent_key is None:
337
333
  self._save_job_result(job_key, job_state)
338
334
 
339
335
  # Stop any monitors that were created directly by the engine
@@ -352,18 +348,13 @@ class TracEngine(_actors.Actor):
352
348
 
353
349
  self._log.info(f"Saving job result for [{job_key}]")
354
350
 
355
- # It might be better abstract reporting of results, job status etc., perhaps with a job monitor
356
-
357
- if job_state.result_spec.save_result:
351
+ storage_key = _util.read_property(job_state.job_config.properties, _cfg_p.ConfigKeys.RESULT_STORAGE_LOCATION)
352
+ storage_path = _util.read_property(job_state.job_config.properties, _cfg_p.ConfigKeys.RESULT_STORAGE_PATH)
353
+ result_format = _util.read_property(job_state.job_config.properties, _cfg_p.ConfigKeys.RESULT_FORMAT, "JSON")
354
+ result_content = _cfg_p.ConfigQuoter.quote(job_state.job_result, result_format)
358
355
 
359
- result_format = job_state.result_spec.result_format
360
- result_dir = job_state.result_spec.result_dir
361
- result_file = f"job_result_{job_key}.{result_format}"
362
- result_path = pathlib.Path(result_dir).joinpath(result_file)
363
-
364
- with open(result_path, "xt") as result_stream:
365
- result_content = _cfg_p.ConfigQuoter.quote(job_state.job_result, result_format)
366
- result_stream.write(result_content)
356
+ storage = self._storage.get_file_storage(storage_key)
357
+ storage.write_bytes(storage_path, result_content.encode('utf-8'))
367
358
 
368
359
  def _get_job_info(self, job_key: str, details: bool = False) -> tp.Optional[_cfg.JobResult]:
369
360
 
@@ -455,7 +446,7 @@ class JobProcessor(_actors.Actor):
455
446
  self._models = models
456
447
  self._storage = storage
457
448
 
458
- self._job_log = job_log or _JobLog()
449
+ self._job_log = job_log if job_log is not None else _JobLog()
459
450
  self._log_provider = self._job_log.log_provider
460
451
  self._log = self._job_log.log_provider.logger_for_object(self)
461
452
 
@@ -532,7 +523,8 @@ class JobProcessor(_actors.Actor):
532
523
  # This will be the last message in the job log file
533
524
  self._log.info(f"Job succeeded [{self.job_key}]")
534
525
 
535
- self._save_job_log_file(job_result)
526
+ if self._job_log.log_file_needed:
527
+ self._save_job_log_file(job_result)
536
528
 
537
529
  self.actors().stop(self.actors().sender)
538
530
  self.actors().send_parent("job_succeeded", self.job_key, job_result)
@@ -556,7 +548,8 @@ class JobProcessor(_actors.Actor):
556
548
  result_def.statusMessage = str(error)
557
549
  job_result = _cfg.JobResult(job_id, result_id, result_def)
558
550
 
559
- self._save_job_log_file(job_result)
551
+ if self._job_log.log_file_needed:
552
+ self._save_job_log_file(job_result)
560
553
 
561
554
  self.actors().send_parent("job_failed", self.job_key, error, job_result)
562
555
 
@@ -567,10 +560,9 @@ class JobProcessor(_actors.Actor):
567
560
 
568
561
  def _save_job_log_file(self, job_result: _cfg.JobResult):
569
562
 
570
- # Do not save log files for child jobs, or if a log is not available
563
+ # Do not fail the job if log content is not available
571
564
  if self._job_log.log_buffer is None:
572
- if self._job_log.log_file_needed:
573
- self._log.warning(f"Job log not available for [{self.job_key}]")
565
+ self._log.warning(f"Job log not available for [{self.job_key}]")
574
566
  return
575
567
 
576
568
  # Saving log files could go into a separate actor
@@ -578,13 +570,15 @@ class JobProcessor(_actors.Actor):
578
570
  file_id = self._allocate_id(_meta.ObjectType.FILE)
579
571
  storage_id = self._allocate_id(_meta.ObjectType.STORAGE)
580
572
 
573
+ self._log.info(f"Saving job log [{_util.object_key(file_id)}]")
574
+
581
575
  file_name = "trac_job_log_file"
582
576
  file_type = _meta.FileType("TXT", "text/plain")
583
577
 
584
578
  file_spec = _data.build_file_spec(
585
579
  file_id, storage_id,
586
580
  file_name, file_type,
587
- self._sys_config.storage)
581
+ self._sys_config)
588
582
 
589
583
  file_def = file_spec.definition
590
584
  storage_def = file_spec.storage
@@ -231,7 +231,7 @@ class DataSpecFunc(NodeFunction[_data.DataSpec]):
231
231
  return _data.build_data_spec(
232
232
  self.node.data_obj_id, self.node.storage_obj_id,
233
233
  self.node.context_key, trac_schema,
234
- self.node.storage_config,
234
+ self.node.sys_config,
235
235
  self.node.prior_data_spec)
236
236
 
237
237
 
@@ -256,7 +256,7 @@ class DataViewFunc(NodeFunction[_data.DataView]):
256
256
 
257
257
  # TODO: Generalize processing across DataView / DataItem types
258
258
 
259
- if root_item.schema_type == _meta.SchemaType.STRUCT:
259
+ if root_item.schema_type == _meta.SchemaType.STRUCT_SCHEMA:
260
260
  view = _data.DataView.for_trac_schema(self.node.schema)
261
261
  view.parts[root_part_key] = [root_item]
262
262
  return view
@@ -269,7 +269,9 @@ class DataViewFunc(NodeFunction[_data.DataView]):
269
269
  arrow_schema = root_item.schema
270
270
  data_view = _data.DataView.for_arrow_schema(arrow_schema)
271
271
 
272
- data_view = _data.DataMapping.add_item_to_view(data_view, root_part_key, root_item)
272
+ data_view = _data.DataMapping \
273
+ .add_item_to_view(data_view, root_part_key, root_item) \
274
+ .with_metadata(root_item.metadata)
273
275
 
274
276
  return data_view
275
277
 
@@ -359,10 +361,10 @@ class LoadDataFunc( _LoadSaveDataFunc, NodeFunction[_data.DataItem],):
359
361
  if data_spec.object_type == _api.ObjectType.FILE:
360
362
  return self._load_file(data_copy)
361
363
 
362
- elif data_spec.schema_type == _api.SchemaType.TABLE:
364
+ elif data_spec.schema_type == _api.SchemaType.TABLE_SCHEMA:
363
365
  return self._load_table(data_spec, data_copy)
364
366
 
365
- elif data_spec.schema_type == _api.SchemaType.STRUCT:
367
+ elif data_spec.schema_type == _api.SchemaType.STRUCT_SCHEMA:
366
368
  return self._load_struct(data_copy)
367
369
 
368
370
  # TODO: Handle dynamic inputs, they should work for any schema type
@@ -394,7 +396,9 @@ class LoadDataFunc( _LoadSaveDataFunc, NodeFunction[_data.DataItem],):
394
396
  data_copy.storagePath, data_copy.storageFormat, arrow_schema,
395
397
  storage_options=storage_options)
396
398
 
397
- return _data.DataItem.for_table(table, table.schema, trac_schema)
399
+ return _data.DataItem \
400
+ .for_table(table, table.schema, trac_schema) \
401
+ .with_metadata(data_spec.metadata)
398
402
 
399
403
  def _load_struct(self, data_copy):
400
404
 
@@ -429,10 +433,10 @@ class SaveDataFunc(_LoadSaveDataFunc, NodeFunction[_data.DataSpec]):
429
433
  if data_item.object_type == _api.ObjectType.FILE:
430
434
  return self._save_file(data_item, data_spec, data_copy)
431
435
 
432
- elif data_item.schema_type == _api.SchemaType.TABLE:
436
+ elif data_item.schema_type == _api.SchemaType.TABLE_SCHEMA:
433
437
  return self._save_table(data_item, data_spec, data_copy)
434
438
 
435
- elif data_item.schema_type == _api.SchemaType.STRUCT:
439
+ elif data_item.schema_type == _api.SchemaType.STRUCT_SCHEMA:
436
440
  return self._save_struct(data_item, data_spec, data_copy)
437
441
 
438
442
  else:
@@ -47,7 +47,7 @@ class NodeNamespace:
47
47
  def __repr__(self):
48
48
  return repr(self.components())
49
49
 
50
- def components(self) -> [str]:
50
+ def components(self) -> _tp.List[str]:
51
51
  if self == self.__ROOT:
52
52
  return ["ROOT"]
53
53
  elif self.parent is self.__ROOT or self.parent is None:
@@ -161,7 +161,7 @@ class GraphContext:
161
161
  job_namespace: NodeNamespace
162
162
  ctx_namespace: NodeNamespace
163
163
 
164
- storage_config: _cfg.StorageConfig
164
+ sys_config: _cfg.RuntimeConfig
165
165
 
166
166
 
167
167
  @_dc.dataclass(frozen=True)
@@ -297,7 +297,7 @@ class DataSpecNode(Node[_data.DataSpec]):
297
297
  storage_obj_id: _meta.TagHeader
298
298
  context_key: str
299
299
 
300
- storage_config: _cfg.StorageConfig
300
+ sys_config: _cfg.RuntimeConfig
301
301
 
302
302
  prior_data_spec: _tp.Optional[_data.DataSpec]
303
303
 
@@ -20,7 +20,9 @@ import tracdap.rt.metadata as _meta
20
20
  import tracdap.rt.config as _cfg
21
21
  import tracdap.rt.exceptions as _ex
22
22
  import tracdap.rt._impl.core.data as _data
23
+ import tracdap.rt._impl.core.type_system as _type_system
23
24
  import tracdap.rt._impl.core.util as _util
25
+ import tracdap.rt.api as _api
24
26
 
25
27
  from .graph import *
26
28
 
@@ -40,7 +42,7 @@ class GraphBuilder:
40
42
  @classmethod
41
43
  def dynamic(cls, context: GraphContext) -> "GraphBuilder":
42
44
 
43
- sys_config = _cfg.RuntimeConfig(storage=context.storage_config)
45
+ sys_config = context.sys_config
44
46
  job_config = _cfg.JobConfig(context.job_id)
45
47
 
46
48
  return GraphBuilder(sys_config, job_config)
@@ -484,6 +486,7 @@ class GraphBuilder:
484
486
  def _build_data_input(self, input_name, input_selector, nodes, outputs, explicit_deps):
485
487
 
486
488
  data_spec = self._build_data_spec(input_selector)
489
+ data_spec = self._attach_metadata(data_spec, input_selector)
487
490
 
488
491
  # Physical load of data items from disk
489
492
  # Currently one item per input, since inputs are single part/delta
@@ -523,7 +526,7 @@ class GraphBuilder:
523
526
  nodes[data_spec_id] = DataSpecNode(
524
527
  data_spec_id, data_view_id,
525
528
  data_id, storage_id, output_name,
526
- self._sys_config.storage,
529
+ self._sys_config,
527
530
  prior_data_spec=prior_spec,
528
531
  explicit_deps=explicit_deps)
529
532
 
@@ -536,7 +539,7 @@ class GraphBuilder:
536
539
  data_spec = _data.build_data_spec(
537
540
  data_id, storage_id, output_name,
538
541
  output_schema.schema,
539
- self._sys_config.storage,
542
+ self._sys_config,
540
543
  prior_spec=prior_spec)
541
544
 
542
545
  # Save operation uses the statically produced schema info
@@ -567,6 +570,7 @@ class GraphBuilder:
567
570
  def _build_file_input(self, input_name, input_selector, nodes, outputs, explicit_deps):
568
571
 
569
572
  file_spec = self._build_file_spec(input_selector)
573
+ file_spec = self._attach_metadata(file_spec, input_selector)
570
574
 
571
575
  file_load_id = NodeId.of(f"{input_name}:LOAD", self._job_namespace, _data.DataItem)
572
576
  nodes[file_load_id] = LoadDataNode(file_load_id, spec=file_spec, explicit_deps=explicit_deps)
@@ -597,7 +601,7 @@ class GraphBuilder:
597
601
  file_spec = _data.build_file_spec(
598
602
  file_id, storage_id,
599
603
  output_name, output_schema.fileType,
600
- self._sys_config.storage,
604
+ self._sys_config,
601
605
  prior_spec=prior_spec)
602
606
 
603
607
  # Graph node for the save operation
@@ -709,7 +713,7 @@ class GraphBuilder:
709
713
  context = GraphContext(
710
714
  self._job_config.jobId,
711
715
  self._job_namespace, namespace,
712
- self._sys_config.storage)
716
+ self._sys_config)
713
717
 
714
718
  model_node = RunModelNode(
715
719
  model_id, model_def, model_scope,
@@ -1028,6 +1032,19 @@ class GraphBuilder:
1028
1032
  else:
1029
1033
  return _util.new_object_id(object_type)
1030
1034
 
1035
+ def _attach_metadata(self, obj: _tp.Any, selector: _meta.TagSelector):
1036
+
1037
+ item_id = _util.get_job_mapping(selector, self._job_config)
1038
+ tag = _util.get_job_metadata_tag(selector, self._job_config, optional=True)
1039
+
1040
+ attributes = dict() if tag is None else dict(
1041
+ (attr_name, _type_system.MetadataCodec.decode_value(attr_value))
1042
+ for attr_name, attr_value in tag.attrs.items())
1043
+
1044
+ metadata = _api.RuntimeMetadata(objectId=item_id, attributes=attributes)
1045
+
1046
+ return _util.attach_runtime_metadata(obj, metadata)
1047
+
1031
1048
  def _join_sections(self, *sections: GraphSection, allow_partial_inputs: bool = False):
1032
1049
 
1033
1050
  n_sections = len(sections)
@@ -61,16 +61,6 @@ __METADATA_MAPPING = {
61
61
  }
62
62
 
63
63
 
64
- _T_MSG = tp.TypeVar('_T_MSG', bound=_message.Message)
65
-
66
-
67
- def encode_message(msg_class: _T_MSG.__class__, obj: tp.Any) -> _T_MSG:
68
-
69
- attrs = dict((k, encode(v)) for k, v in obj.__dict__.items())
70
-
71
- return msg_class(**attrs)
72
-
73
-
74
64
  def encode(obj: tp.Any) -> tp.Any:
75
65
 
76
66
  # Translate TRAC domain objects into generic dict / list structures
@@ -97,6 +87,9 @@ def encode(obj: tp.Any) -> tp.Any:
97
87
  if msg_class is None:
98
88
  raise ex.ETracInternal(f"No gRPC metadata mapping is available for type {type(obj).__name__}")
99
89
 
100
- attrs = dict((k, encode(v)) for k, v in obj.__dict__.items() if v is not None)
90
+ if hasattr(obj, "__slots__"):
91
+ attrs = dict((k, encode(getattr(obj, k))) for k in obj.__slots__)
92
+ else:
93
+ attrs = dict((k, encode(v)) for k, v in obj.__dict__.items() if v is not None)
101
94
 
102
95
  return msg_class(**attrs)