tracdap-runtime 0.8.0rc2__py3-none-any.whl → 0.9.0b1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tracdap/rt/_impl/core/data.py +578 -33
- tracdap/rt/_impl/core/repos.py +7 -0
- tracdap/rt/_impl/core/storage.py +10 -3
- tracdap/rt/_impl/core/util.py +54 -11
- tracdap/rt/_impl/exec/dev_mode.py +122 -100
- tracdap/rt/_impl/exec/engine.py +178 -109
- tracdap/rt/_impl/exec/functions.py +218 -257
- tracdap/rt/_impl/exec/graph.py +140 -125
- tracdap/rt/_impl/exec/graph_builder.py +411 -449
- tracdap/rt/_impl/grpc/codec.py +4 -2
- tracdap/rt/_impl/grpc/server.py +7 -7
- tracdap/rt/_impl/grpc/tracdap/api/internal/runtime_pb2.py +25 -18
- tracdap/rt/_impl/grpc/tracdap/api/internal/runtime_pb2.pyi +27 -9
- tracdap/rt/_impl/grpc/tracdap/metadata/common_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/config_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/custom_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/data_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/file_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/flow_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/job_pb2.py +67 -63
- tracdap/rt/_impl/grpc/tracdap/metadata/job_pb2.pyi +11 -2
- tracdap/rt/_impl/grpc/tracdap/metadata/model_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/object_id_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/object_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/resource_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/search_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/storage_pb2.py +11 -9
- tracdap/rt/_impl/grpc/tracdap/metadata/storage_pb2.pyi +11 -2
- tracdap/rt/_impl/grpc/tracdap/metadata/tag_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/tag_update_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/type_pb2.py +1 -1
- tracdap/rt/_impl/runtime.py +8 -0
- tracdap/rt/_plugins/repo_git.py +56 -11
- tracdap/rt/_version.py +1 -1
- tracdap/rt/config/__init__.py +6 -6
- tracdap/rt/config/common.py +5 -0
- tracdap/rt/config/job.py +13 -3
- tracdap/rt/config/result.py +8 -4
- tracdap/rt/config/runtime.py +2 -0
- tracdap/rt/metadata/__init__.py +37 -36
- tracdap/rt/metadata/job.py +2 -0
- tracdap/rt/metadata/storage.py +9 -0
- {tracdap_runtime-0.8.0rc2.dist-info → tracdap_runtime-0.9.0b1.dist-info}/METADATA +3 -1
- {tracdap_runtime-0.8.0rc2.dist-info → tracdap_runtime-0.9.0b1.dist-info}/RECORD +47 -47
- {tracdap_runtime-0.8.0rc2.dist-info → tracdap_runtime-0.9.0b1.dist-info}/WHEEL +1 -1
- {tracdap_runtime-0.8.0rc2.dist-info → tracdap_runtime-0.9.0b1.dist-info}/licenses/LICENSE +0 -0
- {tracdap_runtime-0.8.0rc2.dist-info → tracdap_runtime-0.9.0b1.dist-info}/top_level.txt +0 -0
tracdap/rt/_impl/core/repos.py
CHANGED
@@ -38,6 +38,13 @@ class RepositoryManager:
|
|
38
38
|
|
39
39
|
try:
|
40
40
|
|
41
|
+
# Add global properties related to the repo protocol
|
42
|
+
related_props = {
|
43
|
+
k: v for (k, v) in sys_config.properties.items()
|
44
|
+
if k.startswith(f"{repo_config.protocol}.")}
|
45
|
+
|
46
|
+
repo_config.properties.update(related_props)
|
47
|
+
|
41
48
|
self._repos[repo_name] = plugins.PluginManager.load_plugin(IModelRepository, repo_config)
|
42
49
|
|
43
50
|
except ex.EPluginNotAvailable as e:
|
tracdap/rt/_impl/core/storage.py
CHANGED
@@ -81,7 +81,7 @@ class StorageManager:
|
|
81
81
|
self.__file_storage: tp.Dict[str, IFileStorage] = dict()
|
82
82
|
self.__data_storage: tp.Dict[str, IDataStorage] = dict()
|
83
83
|
self.__external: tp.List[str] = list()
|
84
|
-
self.
|
84
|
+
self.__sys_config = sys_config
|
85
85
|
|
86
86
|
for storage_key, storage_config in sys_config.storage.buckets.items():
|
87
87
|
self.create_storage(storage_key, storage_config)
|
@@ -93,13 +93,20 @@ class StorageManager:
|
|
93
93
|
self.create_storage(storage_key, storage_config)
|
94
94
|
|
95
95
|
def default_storage_key(self):
|
96
|
-
return self.
|
96
|
+
return self.__sys_config.storage.defaultBucket
|
97
97
|
|
98
98
|
def default_storage_format(self):
|
99
|
-
return self.
|
99
|
+
return self.__sys_config.storage.defaultFormat
|
100
100
|
|
101
101
|
def create_storage(self, storage_key: str, storage_config: _cfg.PluginConfig):
|
102
102
|
|
103
|
+
# Add global properties related to the storage protocol
|
104
|
+
related_props = {
|
105
|
+
k: v for (k, v) in self.__sys_config.properties.items()
|
106
|
+
if k.startswith(f"{storage_config.protocol}.")}
|
107
|
+
|
108
|
+
storage_config.properties.update(related_props)
|
109
|
+
|
103
110
|
if plugins.PluginManager.is_plugin_available(IStorageProvider, storage_config.protocol):
|
104
111
|
self._create_storage_from_provider(storage_key, storage_config)
|
105
112
|
else:
|
tracdap/rt/_impl/core/util.py
CHANGED
@@ -16,6 +16,7 @@
|
|
16
16
|
import datetime as dt
|
17
17
|
import pathlib
|
18
18
|
import platform
|
19
|
+
import re
|
19
20
|
|
20
21
|
import typing as tp
|
21
22
|
import uuid
|
@@ -30,6 +31,7 @@ import traceback as tb
|
|
30
31
|
__IS_WINDOWS = platform.system() == "Windows"
|
31
32
|
__FIRST_MODEL_FRAME_NAME = "run_model"
|
32
33
|
__FIRST_MODEL_FRAME_TEST_NAME = "_callTestMethod"
|
34
|
+
__OBJ_KEY_PATTERN = re.compile(r"([A-Z]+)-(.*)-v(\d+)")
|
33
35
|
|
34
36
|
|
35
37
|
def is_windows():
|
@@ -60,7 +62,7 @@ def format_file_size(size: int) -> str:
|
|
60
62
|
|
61
63
|
def new_object_id(object_type: meta.ObjectType) -> meta.TagHeader:
|
62
64
|
|
63
|
-
timestamp = dt.datetime.
|
65
|
+
timestamp = dt.datetime.now(dt.timezone.utc)
|
64
66
|
|
65
67
|
return meta.TagHeader(
|
66
68
|
objectType=object_type,
|
@@ -71,6 +73,19 @@ def new_object_id(object_type: meta.ObjectType) -> meta.TagHeader:
|
|
71
73
|
tagTimestamp=meta.DatetimeValue(timestamp.isoformat()))
|
72
74
|
|
73
75
|
|
76
|
+
def new_object_version(prior_id: meta.TagHeader) -> meta.TagHeader:
|
77
|
+
|
78
|
+
timestamp = dt.datetime.now(dt.timezone.utc)
|
79
|
+
|
80
|
+
return meta.TagHeader(
|
81
|
+
objectType=prior_id.objectType,
|
82
|
+
objectId=prior_id.objectId,
|
83
|
+
objectVersion=prior_id.objectVersion + 1,
|
84
|
+
objectTimestamp=meta.DatetimeValue(timestamp.isoformat()),
|
85
|
+
tagVersion=1,
|
86
|
+
tagTimestamp=meta.DatetimeValue(timestamp.isoformat()))
|
87
|
+
|
88
|
+
|
74
89
|
def object_key(object_id: tp.Union[meta.TagHeader, meta.TagSelector]) -> str:
|
75
90
|
|
76
91
|
if isinstance(object_id, meta.TagHeader):
|
@@ -106,29 +121,57 @@ def selector_for_latest(object_id: meta.TagHeader) -> meta.TagSelector:
|
|
106
121
|
latestTag=True)
|
107
122
|
|
108
123
|
|
109
|
-
def
|
124
|
+
def get_job_metadata(
|
110
125
|
selector: tp.Union[meta.TagHeader, meta.TagSelector],
|
111
126
|
job_config: cfg.JobConfig,
|
112
|
-
optional: bool = False)
|
127
|
+
optional: bool = False) \
|
128
|
+
-> tp.Optional[meta.ObjectDefinition]:
|
113
129
|
|
114
|
-
|
115
|
-
|
130
|
+
obj_key = object_key(selector)
|
131
|
+
obj_id = job_config.objectMapping.get(obj_key)
|
116
132
|
|
117
|
-
if
|
118
|
-
|
133
|
+
if obj_id is not None:
|
134
|
+
obj_key = object_key(obj_id)
|
119
135
|
|
120
|
-
|
136
|
+
obj = job_config.objects.get(obj_key)
|
121
137
|
|
122
|
-
if
|
123
|
-
return
|
138
|
+
if obj is not None:
|
139
|
+
return obj
|
124
140
|
|
125
141
|
if optional:
|
126
142
|
return None
|
127
143
|
|
128
|
-
err = f"Missing required {selector.objectType.name}
|
144
|
+
err = f"Missing required {selector.objectType.name} object for [{object_key(selector)}]"
|
129
145
|
raise ex.ERuntimeValidation(err)
|
130
146
|
|
131
147
|
|
148
|
+
def get_job_mapping(
|
149
|
+
selector: tp.Union[meta.TagHeader, meta.TagSelector],
|
150
|
+
job_config: cfg.JobConfig) \
|
151
|
+
-> meta.TagHeader:
|
152
|
+
|
153
|
+
obj_key = object_key(selector)
|
154
|
+
obj_id = job_config.objectMapping.get(obj_key)
|
155
|
+
|
156
|
+
if obj_id is not None:
|
157
|
+
return obj_id
|
158
|
+
|
159
|
+
obj_key_match = __OBJ_KEY_PATTERN.match(obj_key)
|
160
|
+
|
161
|
+
if not obj_key_match:
|
162
|
+
err = f"Missing required {selector.objectType.name} ID for [{object_key(selector)}]"
|
163
|
+
raise ex.ERuntimeValidation(err)
|
164
|
+
|
165
|
+
obj_type = obj_key_match.group(1)
|
166
|
+
obj_id = obj_key_match.group(2)
|
167
|
+
obj_ver = obj_key_match.group(3)
|
168
|
+
obj_ts = job_config.jobId.objectTimestamp
|
169
|
+
|
170
|
+
return meta.TagHeader(
|
171
|
+
meta.ObjectType.__members__[obj_type], obj_id,
|
172
|
+
int(obj_ver), obj_ts, 1, obj_ts)
|
173
|
+
|
174
|
+
|
132
175
|
def get_origin(metaclass: type):
|
133
176
|
|
134
177
|
# Minimum supported Python is 3.7, which does not provide get_origin and get_args
|
@@ -87,6 +87,7 @@ class DevModeTranslator:
|
|
87
87
|
config_mgr: _cfg_p.ConfigManager):
|
88
88
|
|
89
89
|
storage_config = copy.deepcopy(sys_config.storage)
|
90
|
+
storage_config.defaultLayout = _meta.StorageLayout.DEVELOPER_LAYOUT
|
90
91
|
|
91
92
|
for bucket_key, bucket_config in storage_config.buckets.items():
|
92
93
|
storage_config.buckets[bucket_key] = cls._resolve_storage_location(
|
@@ -218,27 +219,29 @@ class DevModeTranslator:
|
|
218
219
|
return job_config, job_def
|
219
220
|
|
220
221
|
@classmethod
|
221
|
-
def
|
222
|
+
def _add_job_metadata(
|
222
223
|
cls, job_config: _cfg.JobConfig,
|
223
224
|
obj_id: _meta.TagHeader, obj: _meta.ObjectDefinition) \
|
224
225
|
-> _cfg.JobConfig:
|
225
226
|
|
226
227
|
obj_key = _util.object_key(obj_id)
|
227
|
-
job_config.
|
228
|
+
job_config.objects[obj_key] = obj
|
228
229
|
|
229
230
|
return job_config
|
230
231
|
|
231
232
|
@classmethod
|
232
|
-
def _process_job_id(cls, job_config: _cfg.JobConfig):
|
233
|
+
def _process_job_id(cls, job_config: _cfg.JobConfig) -> _cfg.JobConfig:
|
233
234
|
|
234
235
|
job_id = _util.new_object_id(_meta.ObjectType.JOB)
|
236
|
+
result_id = _util.new_object_id(_meta.ObjectType.RESULT)
|
235
237
|
|
236
238
|
cls._log.info(f"Assigning job ID = [{_util.object_key(job_id)}]")
|
239
|
+
cls._log.info(f"Assigning result ID = [{_util.object_key(result_id)}]")
|
237
240
|
|
238
|
-
|
239
|
-
|
241
|
+
job_config.jobId = job_id
|
242
|
+
job_config.resultId = result_id
|
240
243
|
|
241
|
-
return
|
244
|
+
return job_config
|
242
245
|
|
243
246
|
@classmethod
|
244
247
|
def _process_job_type(cls, job_def: _meta.JobDefinition):
|
@@ -346,7 +349,7 @@ class DevModeTranslator:
|
|
346
349
|
|
347
350
|
model_id, model_obj = self._generate_model_for_class(model_class)
|
348
351
|
job_detail.model = _util.selector_for(model_id)
|
349
|
-
job_config = self.
|
352
|
+
job_config = self._add_job_metadata(job_config, model_id, model_obj)
|
350
353
|
|
351
354
|
# Otherwise look for models specified as a single string, and take that as the entry point
|
352
355
|
else:
|
@@ -355,7 +358,7 @@ class DevModeTranslator:
|
|
355
358
|
if hasattr(job_detail, "model") and isinstance(job_detail.model, str):
|
356
359
|
model_id, model_obj = self._generate_model_for_entry_point(job_detail.model) # noqa
|
357
360
|
job_detail.model = _util.selector_for(model_id)
|
358
|
-
job_config = self.
|
361
|
+
job_config = self._add_job_metadata(job_config, model_id, model_obj)
|
359
362
|
|
360
363
|
elif hasattr(job_detail, "model") and isinstance(job_detail.model, _meta.TagSelector):
|
361
364
|
if job_detail.model.objectType == _meta.ObjectType.OBJECT_TYPE_NOT_SET:
|
@@ -369,7 +372,7 @@ class DevModeTranslator:
|
|
369
372
|
if isinstance(model_detail, str):
|
370
373
|
model_id, model_obj = self._generate_model_for_entry_point(model_detail)
|
371
374
|
job_detail.models[model_key] = _util.selector_for(model_id)
|
372
|
-
job_config = self.
|
375
|
+
job_config = self._add_job_metadata(job_config, model_id, model_obj)
|
373
376
|
|
374
377
|
return job_config, job_def
|
375
378
|
|
@@ -446,8 +449,8 @@ class DevModeTranslator:
|
|
446
449
|
job_def.runFlow.flow = _util.selector_for(flow_id)
|
447
450
|
|
448
451
|
job_config = copy.copy(job_config)
|
449
|
-
job_config.
|
450
|
-
job_config = self.
|
452
|
+
job_config.objects = copy.copy(job_config.objects)
|
453
|
+
job_config = self._add_job_metadata(job_config, flow_id, flow_obj)
|
451
454
|
|
452
455
|
return job_config, job_def
|
453
456
|
|
@@ -472,7 +475,7 @@ class DevModeTranslator:
|
|
472
475
|
for model_name, model_node in model_nodes.items():
|
473
476
|
|
474
477
|
model_selector = job_def.runFlow.models[model_name]
|
475
|
-
model_obj = _util.
|
478
|
+
model_obj = _util.get_job_metadata(model_selector, job_config)
|
476
479
|
|
477
480
|
model_inputs = set(model_obj.model.inputs.keys())
|
478
481
|
model_outputs = set(model_obj.model.outputs.keys())
|
@@ -540,7 +543,7 @@ class DevModeTranslator:
|
|
540
543
|
# Generate node param sockets needed by the model
|
541
544
|
if node_name in job.models:
|
542
545
|
model_selector = job.models[node_name]
|
543
|
-
model_obj = _util.
|
546
|
+
model_obj = _util.get_job_metadata(model_selector, job_config)
|
544
547
|
for param_name in model_obj.model.parameters:
|
545
548
|
add_param_to_flow(node_name, param_name)
|
546
549
|
if param_name not in node.parameters:
|
@@ -622,7 +625,7 @@ class DevModeTranslator:
|
|
622
625
|
for target in targets:
|
623
626
|
|
624
627
|
model_selector = job_def.runFlow.models.get(target.node)
|
625
|
-
model_obj = _util.
|
628
|
+
model_obj = _util.get_job_metadata(model_selector, job_config)
|
626
629
|
model_param = model_obj.model.parameters.get(target.socket)
|
627
630
|
model_params.append(model_param)
|
628
631
|
|
@@ -659,7 +662,7 @@ class DevModeTranslator:
|
|
659
662
|
for target in targets:
|
660
663
|
|
661
664
|
model_selector = job_def.runFlow.models.get(target.node)
|
662
|
-
model_obj = _util.
|
665
|
+
model_obj = _util.get_job_metadata(model_selector, job_config)
|
663
666
|
model_input = model_obj.model.inputs.get(target.socket)
|
664
667
|
model_inputs.append(model_input)
|
665
668
|
|
@@ -694,7 +697,7 @@ class DevModeTranslator:
|
|
694
697
|
for source in sources:
|
695
698
|
|
696
699
|
model_selector = job_def.runFlow.models.get(source.node)
|
697
|
-
model_obj = _util.
|
700
|
+
model_obj = _util.get_job_metadata(model_selector, job_config)
|
698
701
|
model_input = model_obj.model.outputs.get(source.socket)
|
699
702
|
model_outputs.append(model_input)
|
700
703
|
|
@@ -727,10 +730,10 @@ class DevModeTranslator:
|
|
727
730
|
|
728
731
|
if hasattr(job_detail, "model"):
|
729
732
|
model_key = _util.object_key(job_detail.model)
|
730
|
-
model_or_flow = job_config.
|
733
|
+
model_or_flow = job_config.objects[model_key].model
|
731
734
|
elif hasattr(job_detail, "flow"):
|
732
735
|
flow_key = _util.object_key(job_detail.flow)
|
733
|
-
model_or_flow = job_config.
|
736
|
+
model_or_flow = job_config.objects[flow_key].flow
|
734
737
|
else:
|
735
738
|
model_or_flow = None
|
736
739
|
|
@@ -784,71 +787,68 @@ class DevModeTranslator:
|
|
784
787
|
job_detail = self._get_job_detail(job_def)
|
785
788
|
|
786
789
|
if hasattr(job_detail, "model"):
|
787
|
-
model_obj = _util.
|
790
|
+
model_obj = _util.get_job_metadata(job_detail.model, job_config)
|
788
791
|
required_inputs = model_obj.model.inputs
|
789
|
-
|
792
|
+
expected_outputs = model_obj.model.outputs
|
790
793
|
|
791
794
|
elif hasattr(job_detail, "flow"):
|
792
|
-
flow_obj = _util.
|
795
|
+
flow_obj = _util.get_job_metadata(job_detail.flow, job_config)
|
793
796
|
required_inputs = flow_obj.flow.inputs
|
794
|
-
|
797
|
+
expected_outputs = flow_obj.flow.outputs
|
795
798
|
|
796
799
|
else:
|
797
800
|
return job_config, job_def
|
798
801
|
|
802
|
+
job_metadata = job_config.objects
|
799
803
|
job_inputs = job_detail.inputs
|
800
804
|
job_outputs = job_detail.outputs
|
801
|
-
|
802
|
-
|
803
|
-
for
|
804
|
-
if not
|
805
|
-
|
806
|
-
|
805
|
+
job_prior_outputs = job_detail.priorOutputs
|
806
|
+
|
807
|
+
for key, schema in required_inputs.items():
|
808
|
+
if key not in job_inputs:
|
809
|
+
if not schema.optional:
|
810
|
+
raise _ex.EJobValidation(f"Missing required input [{key}]")
|
811
|
+
continue
|
812
|
+
supplied_input = job_inputs.pop(key) if key in job_inputs else None
|
813
|
+
input_selector = self._process_socket(key, schema, supplied_input, job_metadata, is_output=False)
|
814
|
+
if input_selector is not None:
|
815
|
+
job_inputs[key] = input_selector
|
816
|
+
|
817
|
+
for key, schema in expected_outputs.items():
|
818
|
+
if key not in job_outputs:
|
819
|
+
raise _ex.EJobValidation(f"Missing required output [{key}]")
|
820
|
+
supplied_output = job_outputs.pop(key)
|
821
|
+
output_selector = self._process_socket(key, schema, supplied_output, job_metadata, is_output=True)
|
822
|
+
if output_selector is not None:
|
823
|
+
job_prior_outputs[key] = output_selector
|
807
824
|
|
808
|
-
|
809
|
-
schema = model_input.schema if model_input and not model_input.dynamic else None
|
810
|
-
input_id = self._process_data_socket(input_key, input_value, schema, job_resources, new_unique_file=False)
|
811
|
-
elif model_input.objectType == _meta.ObjectType.FILE:
|
812
|
-
file_type = model_input.fileType
|
813
|
-
input_id = self._process_file_socket(input_key, input_value, file_type, job_resources, new_unique_file=False)
|
814
|
-
else:
|
815
|
-
raise _ex.EUnexpected()
|
816
|
-
|
817
|
-
job_inputs[input_key] = _util.selector_for(input_id)
|
818
|
-
|
819
|
-
for output_key, output_value in job_outputs.items():
|
820
|
-
if not (isinstance(output_value, str) and output_value in job_resources):
|
825
|
+
return job_config, job_def
|
821
826
|
|
822
|
-
|
827
|
+
def _process_socket(self, key, socket, supplied_value, job_metadata, is_output) -> _meta.TagSelector:
|
823
828
|
|
824
|
-
|
825
|
-
|
826
|
-
|
827
|
-
elif model_output.objectType == _meta.ObjectType.FILE:
|
828
|
-
file_type = model_output.fileType
|
829
|
-
output_id = self._process_file_socket(output_key, output_value, file_type, job_resources, new_unique_file=True)
|
830
|
-
else:
|
831
|
-
raise _ex.EUnexpected()
|
829
|
+
if socket.objectType == _meta.ObjectType.DATA:
|
830
|
+
schema = socket.schema if socket and not socket.dynamic else None
|
831
|
+
return self._process_data_socket(key, supplied_value, schema, job_metadata, is_output)
|
832
832
|
|
833
|
-
|
833
|
+
elif socket.objectType == _meta.ObjectType.FILE:
|
834
|
+
file_type = socket.fileType
|
835
|
+
return self._process_file_socket(key, supplied_value, file_type, job_metadata, is_output)
|
834
836
|
|
835
|
-
|
837
|
+
else:
|
838
|
+
raise _ex.EUnexpected()
|
836
839
|
|
837
840
|
def _process_data_socket(
|
838
841
|
self, data_key, data_value, schema: tp.Optional[_meta.SchemaDefinition],
|
839
|
-
|
840
|
-
-> _meta.
|
842
|
+
job_metadata: tp.Dict[str, _meta.ObjectDefinition], is_output: bool)\
|
843
|
+
-> _meta.TagSelector:
|
841
844
|
|
842
845
|
data_id = _util.new_object_id(_meta.ObjectType.DATA)
|
843
846
|
storage_id = _util.new_object_id(_meta.ObjectType.STORAGE)
|
844
847
|
|
845
|
-
self._log.info(f"Generating data definition for [{data_key}] with ID = [{_util.object_key(data_id)}]")
|
846
|
-
|
847
848
|
if isinstance(data_value, str):
|
848
849
|
storage_path = data_value
|
849
850
|
storage_key = self._sys_config.storage.defaultBucket
|
850
851
|
storage_format = self.infer_format(storage_path, self._sys_config.storage, schema)
|
851
|
-
snap_version = 1
|
852
852
|
|
853
853
|
elif isinstance(data_value, dict):
|
854
854
|
|
@@ -859,48 +859,55 @@ class DevModeTranslator:
|
|
859
859
|
|
860
860
|
storage_key = data_value.get("storageKey") or self._sys_config.storage.defaultBucket
|
861
861
|
storage_format = data_value.get("format") or self.infer_format(storage_path, self._sys_config.storage, schema)
|
862
|
-
snap_version = 1
|
863
862
|
|
864
863
|
else:
|
865
864
|
raise _ex.EConfigParse(f"Invalid configuration for input '{data_key}'")
|
866
865
|
|
867
|
-
#
|
868
|
-
|
869
|
-
|
866
|
+
# Scan for existing versions using hte DEVELOPER storage layout
|
867
|
+
|
868
|
+
self._log.info(f"Looking for {'output' if is_output else 'input'} [{data_key}]...")
|
869
|
+
|
870
|
+
storage_path, version = self._find_latest_version(storage_key, storage_path)
|
871
|
+
data_id.objectVersion = version
|
870
872
|
|
871
|
-
if
|
872
|
-
|
873
|
+
if version > 0:
|
874
|
+
self._log.info(f"Found {'output' if is_output else 'input'} [{data_key}] version {version}")
|
875
|
+
self._log.info(f"Generating {'prior' if is_output else 'data'} definition for [{data_key}] with ID = [{_util.object_key(data_id)}]")
|
876
|
+
elif is_output:
|
877
|
+
self._log.info(f"No prior data for output [{data_key}]")
|
878
|
+
else:
|
879
|
+
# This is allowed for some scenarios, e.g. inside a job group
|
880
|
+
self._log.warning(f"No data found for input [{data_key}]")
|
873
881
|
|
874
882
|
part_key = _meta.PartKey(opaqueKey="part-root", partType=_meta.PartType.PART_ROOT)
|
875
|
-
|
876
|
-
|
883
|
+
snap_index = version - 1 if version > 0 else 0
|
884
|
+
delta_index = 0
|
885
|
+
incarnation_index = 0
|
877
886
|
|
878
887
|
# This is also defined in functions.DynamicDataSpecFunc, maybe centralize?
|
879
|
-
data_item = f"data/table/{data_id.objectId}/{part_key.opaqueKey}/snap-{
|
888
|
+
data_item = f"data/table/{data_id.objectId}/{part_key.opaqueKey}/snap-{snap_index}/delta-{delta_index}"
|
880
889
|
|
881
890
|
data_obj = self._generate_data_definition(
|
882
|
-
part_key,
|
891
|
+
part_key, snap_index, delta_index, data_item,
|
883
892
|
schema, storage_id)
|
884
893
|
|
885
894
|
storage_obj = self._generate_storage_definition(
|
886
895
|
storage_id, storage_key, storage_path, storage_format,
|
887
896
|
data_item, incarnation_index)
|
888
897
|
|
889
|
-
|
890
|
-
|
898
|
+
job_metadata[_util.object_key(data_id)] = data_obj
|
899
|
+
job_metadata[_util.object_key(storage_id)] = storage_obj
|
891
900
|
|
892
|
-
return data_id
|
901
|
+
return _util.selector_for(data_id)
|
893
902
|
|
894
903
|
def _process_file_socket(
|
895
904
|
self, file_key, file_value, file_type: _meta.FileType,
|
896
|
-
|
897
|
-
-> _meta.
|
905
|
+
job_metadata: tp.Dict[str, _meta.ObjectDefinition], is_output: bool) \
|
906
|
+
-> tp.Optional[_meta.TagSelector]:
|
898
907
|
|
899
908
|
file_id = _util.new_object_id(_meta.ObjectType.FILE)
|
900
909
|
storage_id = _util.new_object_id(_meta.ObjectType.STORAGE)
|
901
910
|
|
902
|
-
self._log.info(f"Generating file definition for [{file_key}] with ID = [{_util.object_key(file_id)}]")
|
903
|
-
|
904
911
|
if isinstance(file_value, str):
|
905
912
|
|
906
913
|
storage_key = self._sys_config.storage.defaultBucket
|
@@ -917,17 +924,28 @@ class DevModeTranslator:
|
|
917
924
|
else:
|
918
925
|
raise _ex.EConfigParse(f"Invalid configuration for input '{file_key}'")
|
919
926
|
|
920
|
-
|
921
|
-
file_version = 1
|
927
|
+
# Scan for existing versions using hte DEVELOPER storage layout
|
922
928
|
|
923
|
-
if
|
924
|
-
|
925
|
-
|
929
|
+
self._log.info(f"Looking for {'output' if is_output else 'input'} [{file_key}]...")
|
930
|
+
|
931
|
+
storage_path, version = self._find_latest_version(storage_key, storage_path)
|
932
|
+
file_id.objectVersion = version
|
933
|
+
|
934
|
+
if version > 0:
|
935
|
+
self._log.info(f"Found {'output' if is_output else 'input'} [{file_key}] version {version}")
|
936
|
+
self._log.info(f"Generating {'prior' if is_output else 'file'} definition for [{file_key}] with ID = [{_util.object_key(file_id)}]")
|
937
|
+
elif is_output:
|
938
|
+
self._log.info(f"No prior data for output [{file_key}]")
|
926
939
|
else:
|
927
|
-
|
928
|
-
|
940
|
+
# This is allowed for some scenarios, e.g. inside a job group
|
941
|
+
self._log.warning(f"No data found for input [{file_key}]")
|
942
|
+
|
943
|
+
storage = self._storage_manager.get_file_storage(storage_key)
|
944
|
+
file_size = storage.size(storage_path) if storage.exists(storage_path) else 0
|
929
945
|
|
930
|
-
|
946
|
+
storage_format = "application/x-binary"
|
947
|
+
|
948
|
+
data_item = f"file/{file_id.objectId}/version-{version}"
|
931
949
|
file_name = f"{file_key}.{file_type.extension}"
|
932
950
|
|
933
951
|
file_obj = self._generate_file_definition(
|
@@ -936,12 +954,12 @@ class DevModeTranslator:
|
|
936
954
|
|
937
955
|
storage_obj = self._generate_storage_definition(
|
938
956
|
storage_id, storage_key, storage_path, storage_format,
|
939
|
-
data_item, incarnation_index=
|
957
|
+
data_item, incarnation_index=0)
|
940
958
|
|
941
|
-
|
942
|
-
|
959
|
+
job_metadata[_util.object_key(file_id)] = file_obj
|
960
|
+
job_metadata[_util.object_key(storage_id)] = storage_obj
|
943
961
|
|
944
|
-
return file_id
|
962
|
+
return _util.selector_for(file_id)
|
945
963
|
|
946
964
|
@staticmethod
|
947
965
|
def infer_format(storage_path: str, storage_config: _cfg.StorageConfig, schema: tp.Optional[_meta.SchemaDefinition]):
|
@@ -960,25 +978,28 @@ class DevModeTranslator:
|
|
960
978
|
else:
|
961
979
|
return storage_config.defaultFormat
|
962
980
|
|
963
|
-
def
|
981
|
+
def _find_latest_version(self, storage_key, storage_path):
|
964
982
|
|
965
|
-
|
966
|
-
|
967
|
-
|
983
|
+
storage = self._storage_manager.get_file_storage(storage_key)
|
984
|
+
orig_path = pathlib.PurePath(storage_path)
|
985
|
+
version = 0
|
968
986
|
|
969
|
-
if
|
970
|
-
|
971
|
-
|
972
|
-
|
973
|
-
|
987
|
+
if not storage.exists(str(orig_path.parent)):
|
988
|
+
return storage_path, version
|
989
|
+
|
990
|
+
listing = storage.ls(str(orig_path.parent))
|
991
|
+
existing_files = list(map(lambda stat: stat.file_name, listing))
|
992
|
+
|
993
|
+
next_version = version + 1
|
994
|
+
next_name = f"{orig_path.stem}{orig_path.suffix}"
|
974
995
|
|
975
|
-
while
|
996
|
+
while next_name in existing_files:
|
976
997
|
|
977
|
-
|
978
|
-
|
979
|
-
storage_path = str(x_orig_path.parent.joinpath(x_name))
|
998
|
+
storage_path = str(orig_path.parent.joinpath(next_name))
|
999
|
+
version = next_version
|
980
1000
|
|
981
|
-
|
1001
|
+
next_version = version + 1
|
1002
|
+
next_name = f"{orig_path.stem}-{next_version}{orig_path.suffix}"
|
982
1003
|
|
983
1004
|
return storage_path, version
|
984
1005
|
|
@@ -1043,6 +1064,7 @@ class DevModeTranslator:
|
|
1043
1064
|
|
1044
1065
|
storage_def = _meta.StorageDefinition()
|
1045
1066
|
storage_def.dataItems[data_item] = storage_item
|
1067
|
+
storage_def.layout = _meta.StorageLayout.DEVELOPER_LAYOUT
|
1046
1068
|
|
1047
1069
|
if storage_format.lower() == "csv":
|
1048
1070
|
storage_def.storageOptions["lenient_csv_parser"] = _types.MetadataCodec.encode_value(True)
|