tracdap-runtime 0.7.0__py3-none-any.whl → 0.8.0b1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tracdap/rt/_exec/context.py +140 -64
- tracdap/rt/_exec/dev_mode.py +144 -69
- tracdap/rt/_exec/engine.py +9 -7
- tracdap/rt/_exec/functions.py +95 -33
- tracdap/rt/_exec/graph.py +22 -15
- tracdap/rt/_exec/graph_builder.py +221 -98
- tracdap/rt/_exec/runtime.py +19 -6
- tracdap/rt/_impl/data.py +86 -13
- tracdap/rt/_impl/grpc/tracdap/metadata/file_pb2.py +3 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/file_pb2.pyi +8 -0
- tracdap/rt/_impl/grpc/tracdap/metadata/model_pb2.py +27 -25
- tracdap/rt/_impl/grpc/tracdap/metadata/model_pb2.pyi +14 -4
- tracdap/rt/_impl/models.py +9 -7
- tracdap/rt/_impl/static_api.py +53 -33
- tracdap/rt/_impl/util.py +1 -1
- tracdap/rt/_impl/validation.py +54 -28
- tracdap/rt/_version.py +1 -1
- tracdap/rt/api/__init__.py +6 -3
- tracdap/rt/api/file_types.py +29 -0
- tracdap/rt/api/hook.py +15 -7
- tracdap/rt/api/model_api.py +16 -0
- tracdap/rt/api/static_api.py +211 -125
- tracdap/rt/config/__init__.py +6 -6
- tracdap/rt/config/common.py +11 -1
- tracdap/rt/config/platform.py +4 -6
- tracdap/rt/launch/launch.py +9 -11
- tracdap/rt/metadata/__init__.py +10 -9
- tracdap/rt/metadata/file.py +8 -0
- tracdap/rt/metadata/model.py +12 -2
- {tracdap_runtime-0.7.0.dist-info → tracdap_runtime-0.8.0b1.dist-info}/METADATA +15 -15
- {tracdap_runtime-0.7.0.dist-info → tracdap_runtime-0.8.0b1.dist-info}/RECORD +34 -33
- {tracdap_runtime-0.7.0.dist-info → tracdap_runtime-0.8.0b1.dist-info}/WHEEL +1 -1
- {tracdap_runtime-0.7.0.dist-info → tracdap_runtime-0.8.0b1.dist-info}/LICENSE +0 -0
- {tracdap_runtime-0.7.0.dist-info → tracdap_runtime-0.8.0b1.dist-info}/top_level.txt +0 -0
tracdap/rt/_exec/dev_mode.py
CHANGED
@@ -137,11 +137,14 @@ class DevModeTranslator:
|
|
137
137
|
raise _ex.EConfigParse(msg)
|
138
138
|
|
139
139
|
|
140
|
-
def __init__(
|
140
|
+
def __init__(
|
141
|
+
self, sys_config: _cfg.RuntimeConfig, config_mgr: _cfg_p.ConfigManager, scratch_dir: pathlib.Path = None,
|
142
|
+
model_loader: _models.ModelLoader = None, storage_manager: _storage.StorageManager = None):
|
143
|
+
|
141
144
|
self._sys_config = sys_config
|
142
145
|
self._config_mgr = config_mgr
|
143
|
-
self.
|
144
|
-
self.
|
146
|
+
self._model_loader = model_loader or _models.ModelLoader(self._sys_config, scratch_dir)
|
147
|
+
self._storage_manager = storage_manager or _storage.StorageManager(self._sys_config)
|
145
148
|
|
146
149
|
def translate_job_config(
|
147
150
|
self, job_config: _cfg.JobConfig,
|
@@ -150,8 +153,6 @@ class DevModeTranslator:
|
|
150
153
|
|
151
154
|
try:
|
152
155
|
self._log.info(f"Applying dev mode config translation to job config")
|
153
|
-
|
154
|
-
self._model_loader = _models.ModelLoader(self._sys_config, self._scratch_dir)
|
155
156
|
self._model_loader.create_scope("DEV_MODE_TRANSLATION")
|
156
157
|
|
157
158
|
job_config = copy.deepcopy(job_config)
|
@@ -168,7 +169,6 @@ class DevModeTranslator:
|
|
168
169
|
|
169
170
|
finally:
|
170
171
|
self._model_loader.destroy_scope("DEV_MODE_TRANSLATION")
|
171
|
-
self._model_loader = None
|
172
172
|
|
173
173
|
def translate_job_def(
|
174
174
|
self, job_config: _cfg.JobConfig, job_def: _meta.JobDefinition,
|
@@ -694,7 +694,7 @@ class DevModeTranslator:
|
|
694
694
|
|
695
695
|
model_selector = job_def.runFlow.models.get(source.node)
|
696
696
|
model_obj = _util.get_job_resource(model_selector, job_config)
|
697
|
-
model_input = model_obj.model.
|
697
|
+
model_input = model_obj.model.outputs.get(source.socket)
|
698
698
|
model_outputs.append(model_input)
|
699
699
|
|
700
700
|
if len(model_outputs) == 0:
|
@@ -764,7 +764,7 @@ class DevModeTranslator:
|
|
764
764
|
else:
|
765
765
|
p_spec = param_specs[p_name]
|
766
766
|
|
767
|
-
cls._log.info(f"Encoding parameter [{p_name}] as {p_spec.paramType.basicType}")
|
767
|
+
cls._log.info(f"Encoding parameter [{p_name}] as {p_spec.paramType.basicType.name}")
|
768
768
|
|
769
769
|
encoded_value = _types.MetadataCodec.convert_value(p_value, p_spec.paramType)
|
770
770
|
encoded_values[p_name] = encoded_value
|
@@ -798,38 +798,46 @@ class DevModeTranslator:
|
|
798
798
|
if not (isinstance(input_value, str) and input_value in job_resources):
|
799
799
|
|
800
800
|
model_input = required_inputs[input_key]
|
801
|
-
input_schema = model_input.schema if model_input and not model_input.dynamic else None
|
802
801
|
|
803
|
-
|
804
|
-
|
805
|
-
|
802
|
+
if model_input.objectType == _meta.ObjectType.DATA:
|
803
|
+
schema = model_input.schema if model_input and not model_input.dynamic else None
|
804
|
+
input_id = self._process_data_socket(input_key, input_value, schema, job_resources, new_unique_file=False)
|
805
|
+
elif model_input.objectType == _meta.ObjectType.FILE:
|
806
|
+
file_type = model_input.fileType
|
807
|
+
input_id = self._process_file_socket(input_key, input_value, file_type, job_resources, new_unique_file=False)
|
808
|
+
else:
|
809
|
+
raise _ex.EUnexpected()
|
806
810
|
|
807
811
|
job_inputs[input_key] = _util.selector_for(input_id)
|
808
812
|
|
809
813
|
for output_key, output_value in job_outputs.items():
|
810
814
|
if not (isinstance(output_value, str) and output_value in job_resources):
|
811
815
|
|
812
|
-
model_output= required_outputs[output_key]
|
813
|
-
output_schema = model_output.schema if model_output and not model_output.dynamic else None
|
816
|
+
model_output = required_outputs[output_key]
|
814
817
|
|
815
|
-
|
816
|
-
|
817
|
-
|
818
|
+
if model_output.objectType == _meta.ObjectType.DATA:
|
819
|
+
schema = model_output.schema if model_output and not model_output.dynamic else None
|
820
|
+
output_id = self._process_data_socket(output_key, output_value, schema, job_resources, new_unique_file=True)
|
821
|
+
elif model_output.objectType == _meta.ObjectType.FILE:
|
822
|
+
file_type = model_output.fileType
|
823
|
+
output_id = self._process_file_socket(output_key, output_value, file_type, job_resources, new_unique_file=True)
|
824
|
+
else:
|
825
|
+
raise _ex.EUnexpected()
|
818
826
|
|
819
827
|
job_outputs[output_key] = _util.selector_for(output_id)
|
820
828
|
|
821
829
|
return job_config, job_def
|
822
830
|
|
823
|
-
def
|
824
|
-
self, data_key, data_value,
|
825
|
-
resources: tp.Dict[str, _meta.ObjectDefinition],
|
826
|
-
new_unique_file=False,
|
827
|
-
schema: tp.Optional[_meta.SchemaDefinition] = None) \
|
831
|
+
def _process_data_socket(
|
832
|
+
self, data_key, data_value, schema: tp.Optional[_meta.SchemaDefinition],
|
833
|
+
resources: tp.Dict[str, _meta.ObjectDefinition], new_unique_file=False) \
|
828
834
|
-> _meta.TagHeader:
|
829
835
|
|
830
836
|
data_id = _util.new_object_id(_meta.ObjectType.DATA)
|
831
837
|
storage_id = _util.new_object_id(_meta.ObjectType.STORAGE)
|
832
838
|
|
839
|
+
self._log.info(f"Generating data definition for [{data_key}] with ID = [{_util.object_key(data_id)}]")
|
840
|
+
|
833
841
|
if isinstance(data_value, str):
|
834
842
|
storage_path = data_value
|
835
843
|
storage_key = self._sys_config.storage.defaultBucket
|
@@ -850,43 +858,85 @@ class DevModeTranslator:
|
|
850
858
|
else:
|
851
859
|
raise _ex.EConfigParse(f"Invalid configuration for input '{data_key}'")
|
852
860
|
|
853
|
-
self._log.info(f"Generating data definition for [{data_key}] with ID = [{_util.object_key(data_id)}]")
|
854
|
-
|
855
861
|
# For unique outputs, increment the snap number to find a new unique snap
|
856
862
|
# These are not incarnations, bc likely in dev mode model code and inputs are changing
|
857
863
|
# Incarnations are for recreation of a dataset using the exact same code path and inputs
|
858
864
|
|
859
865
|
if new_unique_file:
|
866
|
+
storage_path, snap_version = self._new_unique_file(data_key, storage_key, storage_path, snap_version)
|
860
867
|
|
861
|
-
|
862
|
-
|
863
|
-
|
864
|
-
x_name = x_orig_path.name
|
865
|
-
|
866
|
-
if x_storage.exists(str(x_orig_path.parent)):
|
867
|
-
listing = x_storage.ls(str(x_orig_path.parent))
|
868
|
-
existing_files = list(map(lambda stat: stat.file_name, listing))
|
869
|
-
else:
|
870
|
-
existing_files = []
|
871
|
-
|
872
|
-
while x_name in existing_files:
|
868
|
+
part_key = _meta.PartKey(opaqueKey="part-root", partType=_meta.PartType.PART_ROOT)
|
869
|
+
delta_index = 1
|
870
|
+
incarnation_index = 1
|
873
871
|
|
874
|
-
|
875
|
-
|
876
|
-
storage_path = str(x_orig_path.parent.joinpath(x_name))
|
872
|
+
# This is also defined in functions.DynamicDataSpecFunc, maybe centralize?
|
873
|
+
data_item = f"data/table/{data_id.objectId}/{part_key.opaqueKey}/snap-{snap_version}/delta-{delta_index}"
|
877
874
|
|
878
|
-
|
875
|
+
data_obj = self._generate_data_definition(
|
876
|
+
part_key, snap_version, delta_index, data_item,
|
877
|
+
schema, storage_id)
|
879
878
|
|
880
|
-
|
881
|
-
|
882
|
-
|
883
|
-
schema=schema)
|
879
|
+
storage_obj = self._generate_storage_definition(
|
880
|
+
storage_id, storage_key, storage_path, storage_format,
|
881
|
+
data_item, incarnation_index)
|
884
882
|
|
885
883
|
resources[_util.object_key(data_id)] = data_obj
|
886
884
|
resources[_util.object_key(storage_id)] = storage_obj
|
887
885
|
|
888
886
|
return data_id
|
889
887
|
|
888
|
+
def _process_file_socket(
|
889
|
+
self, file_key, file_value, file_type: _meta.FileType,
|
890
|
+
resources: tp.Dict[str, _meta.ObjectDefinition], new_unique_file=False) \
|
891
|
+
-> _meta.TagHeader:
|
892
|
+
|
893
|
+
file_id = _util.new_object_id(_meta.ObjectType.FILE)
|
894
|
+
storage_id = _util.new_object_id(_meta.ObjectType.STORAGE)
|
895
|
+
|
896
|
+
self._log.info(f"Generating file definition for [{file_key}] with ID = [{_util.object_key(file_id)}]")
|
897
|
+
|
898
|
+
if isinstance(file_value, str):
|
899
|
+
|
900
|
+
storage_key = self._sys_config.storage.defaultBucket
|
901
|
+
storage_path = file_value
|
902
|
+
|
903
|
+
elif isinstance(file_value, dict):
|
904
|
+
|
905
|
+
storage_key = file_value.get("storageKey") or self._sys_config.storage.defaultBucket
|
906
|
+
storage_path = file_value.get("path")
|
907
|
+
|
908
|
+
if not storage_path:
|
909
|
+
raise _ex.EConfigParse(f"Invalid configuration for input [{file_key}] (missing required value 'path'")
|
910
|
+
|
911
|
+
else:
|
912
|
+
raise _ex.EConfigParse(f"Invalid configuration for input '{file_key}'")
|
913
|
+
|
914
|
+
storage_format = "application/x-binary"
|
915
|
+
file_version = 1
|
916
|
+
|
917
|
+
if new_unique_file:
|
918
|
+
storage_path, file_version = self._new_unique_file(file_key, storage_key, storage_path, file_version)
|
919
|
+
file_size = 0
|
920
|
+
else:
|
921
|
+
storage = self._storage_manager.get_file_storage(storage_key)
|
922
|
+
file_size = storage.size(storage_path)
|
923
|
+
|
924
|
+
data_item = f"file/{file_id.objectId}/version-{file_version}"
|
925
|
+
file_name = f"{file_key}.{file_type.extension}"
|
926
|
+
|
927
|
+
file_obj = self._generate_file_definition(
|
928
|
+
file_name, file_type, file_size,
|
929
|
+
storage_id, data_item)
|
930
|
+
|
931
|
+
storage_obj = self._generate_storage_definition(
|
932
|
+
storage_id, storage_key, storage_path, storage_format,
|
933
|
+
data_item, incarnation_index=1)
|
934
|
+
|
935
|
+
resources[_util.object_key(file_id)] = file_obj
|
936
|
+
resources[_util.object_key(storage_id)] = storage_obj
|
937
|
+
|
938
|
+
return file_id
|
939
|
+
|
890
940
|
@staticmethod
|
891
941
|
def infer_format(storage_path: str, storage_config: _cfg.StorageConfig):
|
892
942
|
|
@@ -898,20 +948,33 @@ class DevModeTranslator:
|
|
898
948
|
else:
|
899
949
|
return storage_config.defaultFormat
|
900
950
|
|
901
|
-
|
902
|
-
def _generate_input_definition(
|
903
|
-
cls, data_id: _meta.TagHeader, storage_id: _meta.TagHeader,
|
904
|
-
storage_key: str, storage_path: str, storage_format: str,
|
905
|
-
snap_index: int, delta_index: int, incarnation_index: int,
|
906
|
-
schema: tp.Optional[_meta.SchemaDefinition] = None) \
|
907
|
-
-> (_meta.ObjectDefinition, _meta.ObjectDefinition):
|
951
|
+
def _new_unique_file(self, socket_name, storage_key, storage_path, version):
|
908
952
|
|
909
|
-
|
910
|
-
|
911
|
-
|
953
|
+
x_storage = self._storage_manager.get_file_storage(storage_key)
|
954
|
+
x_orig_path = pathlib.PurePath(storage_path)
|
955
|
+
x_name = x_orig_path.name
|
912
956
|
|
913
|
-
|
914
|
-
|
957
|
+
if x_storage.exists(str(x_orig_path.parent)):
|
958
|
+
listing = x_storage.ls(str(x_orig_path.parent))
|
959
|
+
existing_files = list(map(lambda stat: stat.file_name, listing))
|
960
|
+
else:
|
961
|
+
existing_files = []
|
962
|
+
|
963
|
+
while x_name in existing_files:
|
964
|
+
|
965
|
+
version += 1
|
966
|
+
x_name = f"{x_orig_path.stem}-{version}{x_orig_path.suffix}"
|
967
|
+
storage_path = str(x_orig_path.parent.joinpath(x_name))
|
968
|
+
|
969
|
+
self._log.info(f"Output for [{socket_name}] will be version {version}")
|
970
|
+
|
971
|
+
return storage_path, version
|
972
|
+
|
973
|
+
@classmethod
|
974
|
+
def _generate_data_definition(
|
975
|
+
cls, part_key: _meta.PartKey, snap_index: int, delta_index: int, data_item: str,
|
976
|
+
schema: tp.Optional[_meta.SchemaDefinition], storage_id: _meta.TagHeader) \
|
977
|
+
-> (_meta.ObjectDefinition, _meta.ObjectDefinition):
|
915
978
|
|
916
979
|
delta = _meta.DataDefinition.Delta(
|
917
980
|
deltaIndex=delta_index,
|
@@ -925,17 +988,31 @@ class DevModeTranslator:
|
|
925
988
|
partKey=part_key,
|
926
989
|
snap=snap)
|
927
990
|
|
928
|
-
data_def = _meta.DataDefinition(
|
991
|
+
data_def = _meta.DataDefinition()
|
929
992
|
data_def.parts[part_key.opaqueKey] = part
|
993
|
+
data_def.schema = schema
|
994
|
+
data_def.storageId = _util.selector_for(storage_id)
|
930
995
|
|
931
|
-
|
932
|
-
data_def.schema = schema
|
933
|
-
else:
|
934
|
-
data_def.schema = None
|
996
|
+
return _meta.ObjectDefinition(objectType=_meta.ObjectType.DATA, data=data_def)
|
935
997
|
|
936
|
-
|
937
|
-
|
938
|
-
|
998
|
+
@classmethod
|
999
|
+
def _generate_file_definition(
|
1000
|
+
cls, file_name: str, file_type: _meta.FileType, file_size: int,
|
1001
|
+
storage_id: _meta.TagHeader, data_item: str) \
|
1002
|
+
-> _meta.ObjectDefinition:
|
1003
|
+
|
1004
|
+
file_def = _meta.FileDefinition(
|
1005
|
+
name=file_name, extension=file_type.extension, mimeType=file_type.mimeType,
|
1006
|
+
storageId=_util.selector_for(storage_id), dataItem=data_item, size=file_size)
|
1007
|
+
|
1008
|
+
return _meta.ObjectDefinition(objectType=_meta.ObjectType.FILE, file=file_def)
|
1009
|
+
|
1010
|
+
@classmethod
|
1011
|
+
def _generate_storage_definition(
|
1012
|
+
cls, storage_id: _meta.TagHeader,
|
1013
|
+
storage_key: str, storage_path: str, storage_format: str,
|
1014
|
+
data_item: str, incarnation_index: int) \
|
1015
|
+
-> _meta.ObjectDefinition:
|
939
1016
|
|
940
1017
|
storage_copy = _meta.StorageCopy(
|
941
1018
|
storageKey=storage_key,
|
@@ -952,16 +1029,14 @@ class DevModeTranslator:
|
|
952
1029
|
storage_item = _meta.StorageItem(
|
953
1030
|
incarnations=[storage_incarnation])
|
954
1031
|
|
955
|
-
storage_def = _meta.StorageDefinition(
|
956
|
-
storage_def.dataItems[
|
1032
|
+
storage_def = _meta.StorageDefinition()
|
1033
|
+
storage_def.dataItems[data_item] = storage_item
|
957
1034
|
|
958
1035
|
if storage_format.lower() == "csv":
|
959
1036
|
storage_def.storageOptions["lenient_csv_parser"] = _types.MetadataCodec.encode_value(True)
|
960
1037
|
|
961
|
-
|
962
|
-
storage_obj = _meta.ObjectDefinition(objectType=_meta.ObjectType.STORAGE, storage=storage_def)
|
1038
|
+
return _meta.ObjectDefinition(objectType=_meta.ObjectType.STORAGE, storage=storage_def)
|
963
1039
|
|
964
|
-
return data_obj, storage_obj
|
965
1040
|
|
966
1041
|
|
967
1042
|
DevModeTranslator._log = _util.logger_for_class(DevModeTranslator)
|
tracdap/rt/_exec/engine.py
CHANGED
@@ -170,7 +170,7 @@ class TracEngine(_actors.Actor):
|
|
170
170
|
|
171
171
|
self._log.info(f"Job submitted: [{job_key}]")
|
172
172
|
|
173
|
-
job_processor = JobProcessor(self._models, self._storage, job_key, job_config, result_spec, graph_spec=None)
|
173
|
+
job_processor = JobProcessor(self._sys_config, self._models, self._storage, job_key, job_config, result_spec, graph_spec=None)
|
174
174
|
job_actor_id = self.actors().spawn(job_processor)
|
175
175
|
|
176
176
|
job_monitor_success = lambda ctx, key, result: self._notify_callback(key, result, None)
|
@@ -190,7 +190,7 @@ class TracEngine(_actors.Actor):
|
|
190
190
|
|
191
191
|
child_key = _util.object_key(child_id)
|
192
192
|
|
193
|
-
child_processor = JobProcessor(self._models, self._storage, child_key, None, None, graph_spec=child_graph) # noqa
|
193
|
+
child_processor = JobProcessor(self._sys_config, self._models, self._storage, child_key, None, None, graph_spec=child_graph) # noqa
|
194
194
|
child_actor_id = self.actors().spawn(child_processor)
|
195
195
|
|
196
196
|
child_state = _JobState(child_id)
|
@@ -336,7 +336,8 @@ class JobProcessor(_actors.Actor):
|
|
336
336
|
"""
|
337
337
|
|
338
338
|
def __init__(
|
339
|
-
self,
|
339
|
+
self, sys_config: _cfg.RuntimeConfig,
|
340
|
+
models: _models.ModelLoader, storage: _storage.StorageManager,
|
340
341
|
job_key: str, job_config: _cfg.JobConfig, result_spec: _graph.JobResultSpec,
|
341
342
|
graph_spec: tp.Optional[_graph.Graph]):
|
342
343
|
|
@@ -345,6 +346,7 @@ class JobProcessor(_actors.Actor):
|
|
345
346
|
self.job_config = job_config
|
346
347
|
self.result_spec = result_spec
|
347
348
|
self.graph_spec = graph_spec
|
349
|
+
self._sys_config = sys_config
|
348
350
|
self._models = models
|
349
351
|
self._storage = storage
|
350
352
|
self._resolver = _func.FunctionResolver(models, storage)
|
@@ -358,7 +360,7 @@ class JobProcessor(_actors.Actor):
|
|
358
360
|
if self.graph_spec is not None:
|
359
361
|
self.actors().send(self.actors().id, "build_graph_succeeded", self.graph_spec)
|
360
362
|
else:
|
361
|
-
self.actors().spawn(GraphBuilder(self.job_config, self.result_spec))
|
363
|
+
self.actors().spawn(GraphBuilder(self._sys_config, self.job_config, self.result_spec))
|
362
364
|
|
363
365
|
def on_stop(self):
|
364
366
|
|
@@ -426,8 +428,9 @@ class GraphBuilder(_actors.Actor):
|
|
426
428
|
GraphBuilder is a worker (actor) to wrap the GraphBuilder logic from graph_builder.py
|
427
429
|
"""
|
428
430
|
|
429
|
-
def __init__(self, job_config: _cfg.JobConfig, result_spec: _graph.JobResultSpec):
|
431
|
+
def __init__(self, sys_config: _cfg.RuntimeConfig, job_config: _cfg.JobConfig, result_spec: _graph.JobResultSpec):
|
430
432
|
super().__init__()
|
433
|
+
self.sys_config = sys_config
|
431
434
|
self.job_config = job_config
|
432
435
|
self.result_spec = result_spec
|
433
436
|
self._log = _util.logger_for_object(self)
|
@@ -440,8 +443,7 @@ class GraphBuilder(_actors.Actor):
|
|
440
443
|
|
441
444
|
self._log.info("Building execution graph")
|
442
445
|
|
443
|
-
|
444
|
-
graph_builder = _graph.GraphBuilder(job_config, self.result_spec)
|
446
|
+
graph_builder = _graph.GraphBuilder(self.sys_config, job_config, self.result_spec)
|
445
447
|
graph_spec = graph_builder.build_job(job_config.job)
|
446
448
|
|
447
449
|
self.actors().reply("build_graph_succeeded", graph_spec)
|
tracdap/rt/_exec/functions.py
CHANGED
@@ -15,6 +15,7 @@
|
|
15
15
|
|
16
16
|
from __future__ import annotations
|
17
17
|
|
18
|
+
import copy
|
18
19
|
import datetime
|
19
20
|
import abc
|
20
21
|
import random
|
@@ -296,8 +297,13 @@ class DataViewFunc(NodeFunction[_data.DataView]):
|
|
296
297
|
|
297
298
|
# Map empty item -> emtpy view (for optional inputs not supplied)
|
298
299
|
if root_item.is_empty():
|
299
|
-
return _data.DataView.create_empty()
|
300
|
+
return _data.DataView.create_empty(root_item.object_type)
|
300
301
|
|
302
|
+
# Handle file data views
|
303
|
+
if root_item.object_type == meta.ObjectType.FILE:
|
304
|
+
return _data.DataView.for_file_item(root_item)
|
305
|
+
|
306
|
+
# Everything else is a regular data view
|
301
307
|
if self.node.schema is not None and len(self.node.schema.table.fields) > 0:
|
302
308
|
trac_schema = self.node.schema
|
303
309
|
else:
|
@@ -322,7 +328,11 @@ class DataItemFunc(NodeFunction[_data.DataItem]):
|
|
322
328
|
|
323
329
|
# Map empty view -> emtpy item (for optional outputs not supplied)
|
324
330
|
if data_view.is_empty():
|
325
|
-
return _data.DataItem.create_empty()
|
331
|
+
return _data.DataItem.create_empty(data_view.object_type)
|
332
|
+
|
333
|
+
# Handle file data views
|
334
|
+
if data_view.object_type == meta.ObjectType.FILE:
|
335
|
+
return data_view.file_item
|
326
336
|
|
327
337
|
# TODO: Support selecting data item described by self.node
|
328
338
|
|
@@ -342,25 +352,24 @@ class DataResultFunc(NodeFunction[ObjectBundle]):
|
|
342
352
|
|
343
353
|
def _execute(self, ctx: NodeContext) -> ObjectBundle:
|
344
354
|
|
345
|
-
|
355
|
+
data_spec = _ctx_lookup(self.node.data_save_id, ctx)
|
346
356
|
|
347
|
-
|
348
|
-
if data_item.is_empty():
|
349
|
-
return {}
|
357
|
+
result_bundle = dict()
|
350
358
|
|
351
|
-
|
359
|
+
# Do not record output metadata for optional outputs that are empty
|
360
|
+
if data_spec.is_empty():
|
361
|
+
return result_bundle
|
352
362
|
|
353
|
-
|
354
|
-
|
363
|
+
if self.node.data_key is not None:
|
364
|
+
result_bundle[self.node.data_key] = meta.ObjectDefinition(objectType=meta.ObjectType.DATA, data=data_spec.data_def)
|
355
365
|
|
356
|
-
|
357
|
-
|
366
|
+
if self.node.file_key is not None:
|
367
|
+
result_bundle[self.node.file_key] = meta.ObjectDefinition(objectType=meta.ObjectType.FILE, file=data_spec.file_def)
|
358
368
|
|
359
|
-
|
360
|
-
self.node.
|
361
|
-
self.node.storage_key: storage_result}
|
369
|
+
if self.node.storage_key is not None:
|
370
|
+
result_bundle[self.node.storage_key] = meta.ObjectDefinition(objectType=meta.ObjectType.STORAGE, storage=data_spec.storage_def)
|
362
371
|
|
363
|
-
return
|
372
|
+
return result_bundle
|
364
373
|
|
365
374
|
|
366
375
|
class DynamicDataSpecFunc(NodeFunction[_data.DataSpec]):
|
@@ -443,11 +452,7 @@ class DynamicDataSpecFunc(NodeFunction[_data.DataSpec]):
|
|
443
452
|
|
444
453
|
# Dynamic data def will always use an embedded schema (this is no ID for an external schema)
|
445
454
|
|
446
|
-
return _data.DataSpec(
|
447
|
-
data_item,
|
448
|
-
data_def,
|
449
|
-
storage_def,
|
450
|
-
schema_def=None)
|
455
|
+
return _data.DataSpec.create_data_spec(data_item, data_def, storage_def, schema_def=None)
|
451
456
|
|
452
457
|
|
453
458
|
class _LoadSaveDataFunc(abc.ABC):
|
@@ -455,6 +460,16 @@ class _LoadSaveDataFunc(abc.ABC):
|
|
455
460
|
def __init__(self, storage: _storage.StorageManager):
|
456
461
|
self.storage = storage
|
457
462
|
|
463
|
+
@classmethod
|
464
|
+
def _choose_data_spec(cls, spec_id, spec, ctx: NodeContext):
|
465
|
+
|
466
|
+
if spec_id is not None:
|
467
|
+
return _ctx_lookup(spec_id, ctx)
|
468
|
+
elif spec is not None:
|
469
|
+
return spec
|
470
|
+
else:
|
471
|
+
raise _ex.EUnexpected()
|
472
|
+
|
458
473
|
def _choose_copy(self, data_item: str, storage_def: meta.StorageDefinition) -> meta.StorageCopy:
|
459
474
|
|
460
475
|
# Metadata should be checked for consistency before a job is accepted
|
@@ -491,9 +506,19 @@ class LoadDataFunc( _LoadSaveDataFunc, NodeFunction[_data.DataItem],):
|
|
491
506
|
|
492
507
|
def _execute(self, ctx: NodeContext) -> _data.DataItem:
|
493
508
|
|
494
|
-
data_spec =
|
509
|
+
data_spec = self._choose_data_spec(self.node.spec_id, self.node.spec, ctx)
|
495
510
|
data_copy = self._choose_copy(data_spec.data_item, data_spec.storage_def)
|
496
|
-
|
511
|
+
|
512
|
+
if data_spec.object_type == _api.ObjectType.DATA:
|
513
|
+
return self._load_data(data_spec, data_copy)
|
514
|
+
|
515
|
+
elif data_spec.object_type == _api.ObjectType.FILE:
|
516
|
+
return self._load_file(data_copy)
|
517
|
+
|
518
|
+
else:
|
519
|
+
raise _ex.EUnexpected()
|
520
|
+
|
521
|
+
def _load_data(self, data_spec, data_copy):
|
497
522
|
|
498
523
|
trac_schema = data_spec.schema_def if data_spec.schema_def else data_spec.data_def.schema
|
499
524
|
arrow_schema = _data.DataMapping.trac_to_arrow_schema(trac_schema) if trac_schema else None
|
@@ -503,36 +528,52 @@ class LoadDataFunc( _LoadSaveDataFunc, NodeFunction[_data.DataItem],):
|
|
503
528
|
for opt_key, opt_value in data_spec.storage_def.storageOptions.items():
|
504
529
|
options[opt_key] = _types.MetadataCodec.decode_value(opt_value)
|
505
530
|
|
506
|
-
|
531
|
+
storage = self.storage.get_data_storage(data_copy.storageKey)
|
532
|
+
table = storage.read_table(
|
507
533
|
data_copy.storagePath,
|
508
534
|
data_copy.storageFormat,
|
509
535
|
arrow_schema,
|
510
536
|
storage_options=options)
|
511
537
|
|
512
|
-
return _data.DataItem(table.schema, table)
|
538
|
+
return _data.DataItem(_api.ObjectType.DATA, table.schema, table)
|
539
|
+
|
540
|
+
def _load_file(self, data_copy):
|
541
|
+
|
542
|
+
storage = self.storage.get_file_storage(data_copy.storageKey)
|
543
|
+
raw_bytes = storage.read_bytes(data_copy.storagePath)
|
513
544
|
|
545
|
+
return _data.DataItem(_api.ObjectType.FILE, raw_bytes=raw_bytes)
|
514
546
|
|
515
|
-
|
547
|
+
|
548
|
+
class SaveDataFunc(_LoadSaveDataFunc, NodeFunction[_data.DataSpec]):
|
516
549
|
|
517
550
|
def __init__(self, node: SaveDataNode, storage: _storage.StorageManager):
|
518
551
|
super().__init__(storage)
|
519
552
|
self.node = node
|
520
553
|
|
521
|
-
def _execute(self, ctx: NodeContext):
|
554
|
+
def _execute(self, ctx: NodeContext) -> _data.DataSpec:
|
522
555
|
|
523
556
|
# Item to be saved should exist in the current context
|
524
557
|
data_item = _ctx_lookup(self.node.data_item_id, ctx)
|
525
558
|
|
559
|
+
# Metadata already exists as data_spec but may not contain schema, row count, file size etc.
|
560
|
+
data_spec = self._choose_data_spec(self.node.spec_id, self.node.spec, ctx)
|
561
|
+
data_copy = self._choose_copy(data_spec.data_item, data_spec.storage_def)
|
562
|
+
|
526
563
|
# Do not save empty outputs (optional outputs that were not produced)
|
527
564
|
if data_item.is_empty():
|
528
|
-
return
|
565
|
+
return _data.DataSpec.create_empty_spec(data_item.object_type)
|
529
566
|
|
530
|
-
|
531
|
-
|
567
|
+
if data_item.object_type == _api.ObjectType.DATA:
|
568
|
+
return self._save_data(data_item, data_spec, data_copy)
|
532
569
|
|
533
|
-
|
534
|
-
|
535
|
-
|
570
|
+
elif data_item.object_type == _api.ObjectType.FILE:
|
571
|
+
return self._save_file(data_item, data_spec, data_copy)
|
572
|
+
|
573
|
+
else:
|
574
|
+
raise _ex.EUnexpected()
|
575
|
+
|
576
|
+
def _save_data(self, data_item, data_spec, data_copy):
|
536
577
|
|
537
578
|
# Current implementation will always put an Arrow table in the data item
|
538
579
|
# Empty tables are allowed, so explicitly check if table is None
|
@@ -546,11 +587,32 @@ class SaveDataFunc(_LoadSaveDataFunc, NodeFunction[None]):
|
|
546
587
|
for opt_key, opt_value in data_spec.storage_def.storageOptions.items():
|
547
588
|
options[opt_key] = _types.MetadataCodec.decode_value(opt_value)
|
548
589
|
|
549
|
-
|
590
|
+
storage = self.storage.get_data_storage(data_copy.storageKey)
|
591
|
+
storage.write_table(
|
550
592
|
data_copy.storagePath, data_copy.storageFormat,
|
551
593
|
data_item.table,
|
552
594
|
storage_options=options, overwrite=False)
|
553
595
|
|
596
|
+
data_spec = copy.deepcopy(data_spec)
|
597
|
+
# TODO: Save row count in metadata
|
598
|
+
|
599
|
+
if data_spec.data_def.schema is None and data_spec.data_def.schemaId is None:
|
600
|
+
data_spec.data_def.schema = _data.DataMapping.arrow_to_trac_schema(data_item.table.schema)
|
601
|
+
|
602
|
+
return data_spec
|
603
|
+
|
604
|
+
def _save_file(self, data_item, data_spec, data_copy):
|
605
|
+
|
606
|
+
if data_item.raw_bytes is None:
|
607
|
+
raise _ex.EUnexpected()
|
608
|
+
|
609
|
+
storage = self.storage.get_file_storage(data_copy.storageKey)
|
610
|
+
storage.write_bytes(data_copy.storagePath, data_item.raw_bytes)
|
611
|
+
|
612
|
+
data_spec = copy.deepcopy(data_spec)
|
613
|
+
data_spec.file_def.size = len(data_item.raw_bytes)
|
614
|
+
|
615
|
+
return data_spec
|
554
616
|
|
555
617
|
def _model_def_for_import(import_details: meta.ImportModelJob):
|
556
618
|
|