tracdap-runtime 0.8.0rc2__py3-none-any.whl → 0.9.0b2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tracdap/rt/_impl/core/config_parser.py +29 -3
- tracdap/rt/_impl/core/data.py +627 -40
- tracdap/rt/_impl/core/repos.py +17 -8
- tracdap/rt/_impl/core/storage.py +25 -13
- tracdap/rt/_impl/core/struct.py +254 -60
- tracdap/rt/_impl/core/util.py +125 -11
- tracdap/rt/_impl/exec/context.py +35 -8
- tracdap/rt/_impl/exec/dev_mode.py +169 -127
- tracdap/rt/_impl/exec/engine.py +203 -140
- tracdap/rt/_impl/exec/functions.py +228 -263
- tracdap/rt/_impl/exec/graph.py +141 -126
- tracdap/rt/_impl/exec/graph_builder.py +428 -449
- tracdap/rt/_impl/grpc/codec.py +8 -13
- tracdap/rt/_impl/grpc/server.py +7 -7
- tracdap/rt/_impl/grpc/tracdap/api/internal/runtime_pb2.py +25 -18
- tracdap/rt/_impl/grpc/tracdap/api/internal/runtime_pb2.pyi +27 -9
- tracdap/rt/_impl/grpc/tracdap/metadata/common_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/config_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/custom_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/data_pb2.py +37 -35
- tracdap/rt/_impl/grpc/tracdap/metadata/data_pb2.pyi +37 -43
- tracdap/rt/_impl/grpc/tracdap/metadata/file_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/flow_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/job_pb2.py +67 -63
- tracdap/rt/_impl/grpc/tracdap/metadata/job_pb2.pyi +11 -2
- tracdap/rt/_impl/grpc/tracdap/metadata/model_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/object_id_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/object_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/resource_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/search_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/storage_pb2.py +11 -9
- tracdap/rt/_impl/grpc/tracdap/metadata/storage_pb2.pyi +11 -2
- tracdap/rt/_impl/grpc/tracdap/metadata/tag_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/tag_update_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/type_pb2.py +23 -19
- tracdap/rt/_impl/grpc/tracdap/metadata/type_pb2.pyi +15 -2
- tracdap/rt/_impl/runtime.py +3 -9
- tracdap/rt/_impl/static_api.py +5 -6
- tracdap/rt/_plugins/format_csv.py +2 -2
- tracdap/rt/_plugins/repo_git.py +56 -11
- tracdap/rt/_plugins/storage_aws.py +165 -150
- tracdap/rt/_plugins/storage_azure.py +17 -11
- tracdap/rt/_plugins/storage_gcp.py +35 -18
- tracdap/rt/_version.py +1 -1
- tracdap/rt/api/model_api.py +45 -0
- tracdap/rt/config/__init__.py +7 -9
- tracdap/rt/config/common.py +3 -14
- tracdap/rt/config/job.py +17 -3
- tracdap/rt/config/platform.py +9 -32
- tracdap/rt/config/result.py +8 -4
- tracdap/rt/config/runtime.py +5 -10
- tracdap/rt/config/tenant.py +28 -0
- tracdap/rt/launch/cli.py +0 -8
- tracdap/rt/launch/launch.py +1 -3
- tracdap/rt/metadata/__init__.py +35 -35
- tracdap/rt/metadata/data.py +19 -31
- tracdap/rt/metadata/job.py +3 -1
- tracdap/rt/metadata/storage.py +9 -0
- tracdap/rt/metadata/type.py +9 -5
- {tracdap_runtime-0.8.0rc2.dist-info → tracdap_runtime-0.9.0b2.dist-info}/METADATA +5 -3
- {tracdap_runtime-0.8.0rc2.dist-info → tracdap_runtime-0.9.0b2.dist-info}/RECORD +64 -63
- {tracdap_runtime-0.8.0rc2.dist-info → tracdap_runtime-0.9.0b2.dist-info}/WHEEL +1 -1
- {tracdap_runtime-0.8.0rc2.dist-info → tracdap_runtime-0.9.0b2.dist-info}/licenses/LICENSE +0 -0
- {tracdap_runtime-0.8.0rc2.dist-info → tracdap_runtime-0.9.0b2.dist-info}/top_level.txt +0 -0
@@ -58,11 +58,6 @@ class DevModeTranslator:
|
|
58
58
|
|
59
59
|
cls._log.info(f"Applying dev mode config translation to system config")
|
60
60
|
|
61
|
-
# TODO: In code gen, default object types to a new object unless the field is marked as optional
|
62
|
-
# This would match the general semantics of protobuf
|
63
|
-
if sys_config.storage is None:
|
64
|
-
sys_config.storage = _cfg.StorageConfig()
|
65
|
-
|
66
61
|
sys_config = cls._add_integrated_repo(sys_config)
|
67
62
|
sys_config = cls._process_storage(sys_config, config_mgr)
|
68
63
|
|
@@ -73,11 +68,12 @@ class DevModeTranslator:
|
|
73
68
|
|
74
69
|
# Add the integrated model repo trac_integrated
|
75
70
|
|
76
|
-
integrated_repo_config =
|
71
|
+
integrated_repo_config = _meta.ResourceDefinition(
|
72
|
+
resourceType=_meta.ResourceType.MODEL_REPOSITORY,
|
77
73
|
protocol="integrated",
|
78
74
|
properties={})
|
79
75
|
|
80
|
-
sys_config.
|
76
|
+
sys_config.resources["trac_integrated"] = integrated_repo_config
|
81
77
|
|
82
78
|
return sys_config
|
83
79
|
|
@@ -86,23 +82,17 @@ class DevModeTranslator:
|
|
86
82
|
cls, sys_config: _cfg.RuntimeConfig,
|
87
83
|
config_mgr: _cfg_p.ConfigManager):
|
88
84
|
|
89
|
-
|
90
|
-
|
91
|
-
for bucket_key, bucket_config in storage_config.buckets.items():
|
92
|
-
storage_config.buckets[bucket_key] = cls._resolve_storage_location(
|
93
|
-
bucket_key, bucket_config, config_mgr)
|
94
|
-
|
95
|
-
for bucket_key, bucket_config in storage_config.external.items():
|
96
|
-
storage_config.external[bucket_key] = cls._resolve_storage_location(
|
97
|
-
bucket_key, bucket_config, config_mgr)
|
85
|
+
sys_config.properties[_cfg_p.ConfigKeys.STORAGE_DEFAULT_LAYOUT] = _meta.StorageLayout.DEVELOPER_LAYOUT.name
|
98
86
|
|
99
|
-
|
100
|
-
|
87
|
+
for resource_key, resource in sys_config.resources.items():
|
88
|
+
if resource.resourceType in [_meta.ResourceType.INTERNAL_STORAGE, _meta.ResourceType.EXTERNAL_STORAGE]:
|
89
|
+
sys_config.resources[resource_key] = cls._resolve_storage_location(
|
90
|
+
resource_key, resource, config_mgr)
|
101
91
|
|
102
92
|
return sys_config
|
103
93
|
|
104
94
|
@classmethod
|
105
|
-
def _resolve_storage_location(cls, bucket_key, bucket_config, config_mgr: _cfg_p.ConfigManager):
|
95
|
+
def _resolve_storage_location(cls, bucket_key, bucket_config: _meta.ResourceDefinition, config_mgr: _cfg_p.ConfigManager):
|
106
96
|
|
107
97
|
if bucket_config.protocol != "LOCAL":
|
108
98
|
return bucket_config
|
@@ -166,6 +156,9 @@ class DevModeTranslator:
|
|
166
156
|
job_config, job_def = self.translate_job_def(job_config, job_def, model_class)
|
167
157
|
job_config.job = job_def
|
168
158
|
|
159
|
+
# Include some basic tags and attributes in the generated metadata
|
160
|
+
job_config = self.generate_dev_mode_tags(job_config)
|
161
|
+
|
169
162
|
return job_config
|
170
163
|
|
171
164
|
finally:
|
@@ -218,27 +211,31 @@ class DevModeTranslator:
|
|
218
211
|
return job_config, job_def
|
219
212
|
|
220
213
|
@classmethod
|
221
|
-
def
|
214
|
+
def _add_job_metadata(
|
222
215
|
cls, job_config: _cfg.JobConfig,
|
223
216
|
obj_id: _meta.TagHeader, obj: _meta.ObjectDefinition) \
|
224
217
|
-> _cfg.JobConfig:
|
225
218
|
|
226
219
|
obj_key = _util.object_key(obj_id)
|
227
|
-
|
220
|
+
|
221
|
+
job_config.objectMapping[obj_key] = obj_id
|
222
|
+
job_config.objects[obj_key] = obj
|
228
223
|
|
229
224
|
return job_config
|
230
225
|
|
231
226
|
@classmethod
|
232
|
-
def _process_job_id(cls, job_config: _cfg.JobConfig):
|
227
|
+
def _process_job_id(cls, job_config: _cfg.JobConfig) -> _cfg.JobConfig:
|
233
228
|
|
234
229
|
job_id = _util.new_object_id(_meta.ObjectType.JOB)
|
230
|
+
result_id = _util.new_object_id(_meta.ObjectType.RESULT)
|
235
231
|
|
236
232
|
cls._log.info(f"Assigning job ID = [{_util.object_key(job_id)}]")
|
233
|
+
cls._log.info(f"Assigning result ID = [{_util.object_key(result_id)}]")
|
237
234
|
|
238
|
-
|
239
|
-
|
235
|
+
job_config.jobId = job_id
|
236
|
+
job_config.resultId = result_id
|
240
237
|
|
241
|
-
return
|
238
|
+
return job_config
|
242
239
|
|
243
240
|
@classmethod
|
244
241
|
def _process_job_type(cls, job_def: _meta.JobDefinition):
|
@@ -346,7 +343,7 @@ class DevModeTranslator:
|
|
346
343
|
|
347
344
|
model_id, model_obj = self._generate_model_for_class(model_class)
|
348
345
|
job_detail.model = _util.selector_for(model_id)
|
349
|
-
job_config = self.
|
346
|
+
job_config = self._add_job_metadata(job_config, model_id, model_obj)
|
350
347
|
|
351
348
|
# Otherwise look for models specified as a single string, and take that as the entry point
|
352
349
|
else:
|
@@ -355,7 +352,7 @@ class DevModeTranslator:
|
|
355
352
|
if hasattr(job_detail, "model") and isinstance(job_detail.model, str):
|
356
353
|
model_id, model_obj = self._generate_model_for_entry_point(job_detail.model) # noqa
|
357
354
|
job_detail.model = _util.selector_for(model_id)
|
358
|
-
job_config = self.
|
355
|
+
job_config = self._add_job_metadata(job_config, model_id, model_obj)
|
359
356
|
|
360
357
|
elif hasattr(job_detail, "model") and isinstance(job_detail.model, _meta.TagSelector):
|
361
358
|
if job_detail.model.objectType == _meta.ObjectType.OBJECT_TYPE_NOT_SET:
|
@@ -369,7 +366,7 @@ class DevModeTranslator:
|
|
369
366
|
if isinstance(model_detail, str):
|
370
367
|
model_id, model_obj = self._generate_model_for_entry_point(model_detail)
|
371
368
|
job_detail.models[model_key] = _util.selector_for(model_id)
|
372
|
-
job_config = self.
|
369
|
+
job_config = self._add_job_metadata(job_config, model_id, model_obj)
|
373
370
|
|
374
371
|
return job_config, job_def
|
375
372
|
|
@@ -446,8 +443,8 @@ class DevModeTranslator:
|
|
446
443
|
job_def.runFlow.flow = _util.selector_for(flow_id)
|
447
444
|
|
448
445
|
job_config = copy.copy(job_config)
|
449
|
-
job_config.
|
450
|
-
job_config = self.
|
446
|
+
job_config.objects = copy.copy(job_config.objects)
|
447
|
+
job_config = self._add_job_metadata(job_config, flow_id, flow_obj)
|
451
448
|
|
452
449
|
return job_config, job_def
|
453
450
|
|
@@ -472,7 +469,7 @@ class DevModeTranslator:
|
|
472
469
|
for model_name, model_node in model_nodes.items():
|
473
470
|
|
474
471
|
model_selector = job_def.runFlow.models[model_name]
|
475
|
-
model_obj = _util.
|
472
|
+
model_obj = _util.get_job_metadata(model_selector, job_config)
|
476
473
|
|
477
474
|
model_inputs = set(model_obj.model.inputs.keys())
|
478
475
|
model_outputs = set(model_obj.model.outputs.keys())
|
@@ -540,7 +537,7 @@ class DevModeTranslator:
|
|
540
537
|
# Generate node param sockets needed by the model
|
541
538
|
if node_name in job.models:
|
542
539
|
model_selector = job.models[node_name]
|
543
|
-
model_obj = _util.
|
540
|
+
model_obj = _util.get_job_metadata(model_selector, job_config)
|
544
541
|
for param_name in model_obj.model.parameters:
|
545
542
|
add_param_to_flow(node_name, param_name)
|
546
543
|
if param_name not in node.parameters:
|
@@ -622,7 +619,7 @@ class DevModeTranslator:
|
|
622
619
|
for target in targets:
|
623
620
|
|
624
621
|
model_selector = job_def.runFlow.models.get(target.node)
|
625
|
-
model_obj = _util.
|
622
|
+
model_obj = _util.get_job_metadata(model_selector, job_config)
|
626
623
|
model_param = model_obj.model.parameters.get(target.socket)
|
627
624
|
model_params.append(model_param)
|
628
625
|
|
@@ -659,7 +656,7 @@ class DevModeTranslator:
|
|
659
656
|
for target in targets:
|
660
657
|
|
661
658
|
model_selector = job_def.runFlow.models.get(target.node)
|
662
|
-
model_obj = _util.
|
659
|
+
model_obj = _util.get_job_metadata(model_selector, job_config)
|
663
660
|
model_input = model_obj.model.inputs.get(target.socket)
|
664
661
|
model_inputs.append(model_input)
|
665
662
|
|
@@ -694,7 +691,7 @@ class DevModeTranslator:
|
|
694
691
|
for source in sources:
|
695
692
|
|
696
693
|
model_selector = job_def.runFlow.models.get(source.node)
|
697
|
-
model_obj = _util.
|
694
|
+
model_obj = _util.get_job_metadata(model_selector, job_config)
|
698
695
|
model_input = model_obj.model.outputs.get(source.socket)
|
699
696
|
model_outputs.append(model_input)
|
700
697
|
|
@@ -727,10 +724,10 @@ class DevModeTranslator:
|
|
727
724
|
|
728
725
|
if hasattr(job_detail, "model"):
|
729
726
|
model_key = _util.object_key(job_detail.model)
|
730
|
-
model_or_flow = job_config.
|
727
|
+
model_or_flow = job_config.objects[model_key].model
|
731
728
|
elif hasattr(job_detail, "flow"):
|
732
729
|
flow_key = _util.object_key(job_detail.flow)
|
733
|
-
model_or_flow = job_config.
|
730
|
+
model_or_flow = job_config.objects[flow_key].flow
|
734
731
|
else:
|
735
732
|
model_or_flow = None
|
736
733
|
|
@@ -784,71 +781,67 @@ class DevModeTranslator:
|
|
784
781
|
job_detail = self._get_job_detail(job_def)
|
785
782
|
|
786
783
|
if hasattr(job_detail, "model"):
|
787
|
-
model_obj = _util.
|
784
|
+
model_obj = _util.get_job_metadata(job_detail.model, job_config)
|
788
785
|
required_inputs = model_obj.model.inputs
|
789
|
-
|
786
|
+
expected_outputs = model_obj.model.outputs
|
790
787
|
|
791
788
|
elif hasattr(job_detail, "flow"):
|
792
|
-
flow_obj = _util.
|
789
|
+
flow_obj = _util.get_job_metadata(job_detail.flow, job_config)
|
793
790
|
required_inputs = flow_obj.flow.inputs
|
794
|
-
|
791
|
+
expected_outputs = flow_obj.flow.outputs
|
795
792
|
|
796
793
|
else:
|
797
794
|
return job_config, job_def
|
798
795
|
|
799
796
|
job_inputs = job_detail.inputs
|
800
797
|
job_outputs = job_detail.outputs
|
801
|
-
|
802
|
-
|
803
|
-
for
|
804
|
-
if not
|
805
|
-
|
806
|
-
|
807
|
-
|
808
|
-
|
809
|
-
|
810
|
-
|
811
|
-
|
812
|
-
|
813
|
-
|
814
|
-
|
815
|
-
|
816
|
-
|
817
|
-
|
798
|
+
job_prior_outputs = job_detail.priorOutputs
|
799
|
+
|
800
|
+
for key, schema in required_inputs.items():
|
801
|
+
if key not in job_inputs:
|
802
|
+
if not schema.optional:
|
803
|
+
raise _ex.EJobValidation(f"Missing required input [{key}]")
|
804
|
+
continue
|
805
|
+
supplied_input = job_inputs.pop(key) if key in job_inputs else None
|
806
|
+
input_selector = self._process_socket(key, schema, supplied_input, job_config, is_output=False)
|
807
|
+
if input_selector is not None:
|
808
|
+
job_inputs[key] = input_selector
|
809
|
+
|
810
|
+
for key, schema in expected_outputs.items():
|
811
|
+
if key not in job_outputs:
|
812
|
+
raise _ex.EJobValidation(f"Missing required output [{key}]")
|
813
|
+
supplied_output = job_outputs.pop(key)
|
814
|
+
output_selector = self._process_socket(key, schema, supplied_output, job_config, is_output=True)
|
815
|
+
if output_selector is not None:
|
816
|
+
job_prior_outputs[key] = output_selector
|
818
817
|
|
819
|
-
|
820
|
-
if not (isinstance(output_value, str) and output_value in job_resources):
|
818
|
+
return job_config, job_def
|
821
819
|
|
822
|
-
|
820
|
+
def _process_socket(self, key, socket, supplied_value, job_config, is_output) -> _meta.TagSelector:
|
823
821
|
|
824
|
-
|
825
|
-
|
826
|
-
|
827
|
-
elif model_output.objectType == _meta.ObjectType.FILE:
|
828
|
-
file_type = model_output.fileType
|
829
|
-
output_id = self._process_file_socket(output_key, output_value, file_type, job_resources, new_unique_file=True)
|
830
|
-
else:
|
831
|
-
raise _ex.EUnexpected()
|
822
|
+
if socket.objectType == _meta.ObjectType.DATA:
|
823
|
+
schema = socket.schema if socket and not socket.dynamic else None
|
824
|
+
return self._process_data_socket(key, supplied_value, schema, job_config, is_output)
|
832
825
|
|
833
|
-
|
826
|
+
elif socket.objectType == _meta.ObjectType.FILE:
|
827
|
+
file_type = socket.fileType
|
828
|
+
return self._process_file_socket(key, supplied_value, file_type, job_config, is_output)
|
834
829
|
|
835
|
-
|
830
|
+
else:
|
831
|
+
raise _ex.EUnexpected()
|
836
832
|
|
837
833
|
def _process_data_socket(
|
838
834
|
self, data_key, data_value, schema: tp.Optional[_meta.SchemaDefinition],
|
839
|
-
|
840
|
-
-> _meta.
|
835
|
+
job_config: _cfg.JobConfig, is_output: bool)\
|
836
|
+
-> _meta.TagSelector:
|
841
837
|
|
842
838
|
data_id = _util.new_object_id(_meta.ObjectType.DATA)
|
843
839
|
storage_id = _util.new_object_id(_meta.ObjectType.STORAGE)
|
844
840
|
|
845
|
-
self._log.info(f"Generating data definition for [{data_key}] with ID = [{_util.object_key(data_id)}]")
|
846
|
-
|
847
841
|
if isinstance(data_value, str):
|
848
842
|
storage_path = data_value
|
849
|
-
storage_key = self._sys_config.
|
850
|
-
storage_format = self.infer_format(storage_path, self._sys_config
|
851
|
-
snap_version = 1
|
843
|
+
storage_key = _util.read_property(self._sys_config.properties, _cfg_p.ConfigKeys.STORAGE_DEFAULT_LOCATION)
|
844
|
+
storage_format = self.infer_format(storage_path, self._sys_config, schema)
|
852
845
|
|
853
846
|
elif isinstance(data_value, dict):
|
854
847
|
|
@@ -857,58 +850,65 @@ class DevModeTranslator:
|
|
857
850
|
if not storage_path:
|
858
851
|
raise _ex.EConfigParse(f"Invalid configuration for input [{data_key}] (missing required value 'path'")
|
859
852
|
|
860
|
-
storage_key = data_value.get("storageKey") or self._sys_config.
|
861
|
-
storage_format = data_value.get("format") or self.infer_format(storage_path, self._sys_config
|
862
|
-
snap_version = 1
|
853
|
+
storage_key = data_value.get("storageKey") or _util.read_property(self._sys_config.properties, _cfg_p.ConfigKeys.STORAGE_DEFAULT_LOCATION)
|
854
|
+
storage_format = data_value.get("format") or self.infer_format(storage_path, self._sys_config, schema)
|
863
855
|
|
864
856
|
else:
|
865
857
|
raise _ex.EConfigParse(f"Invalid configuration for input '{data_key}'")
|
866
858
|
|
867
|
-
#
|
868
|
-
|
869
|
-
|
859
|
+
# Scan for existing versions using hte DEVELOPER storage layout
|
860
|
+
|
861
|
+
self._log.info(f"Looking for {'output' if is_output else 'input'} [{data_key}]...")
|
870
862
|
|
871
|
-
|
872
|
-
|
863
|
+
storage_path, version = self._find_latest_version(storage_key, storage_path)
|
864
|
+
data_id.objectVersion = version
|
865
|
+
|
866
|
+
if version > 0:
|
867
|
+
self._log.info(f"Found {'output' if is_output else 'input'} [{data_key}] version {version}")
|
868
|
+
self._log.info(f"Generating {'prior' if is_output else 'data'} definition for [{data_key}] with ID = [{_util.object_key(data_id)}]")
|
869
|
+
elif is_output:
|
870
|
+
self._log.info(f"No prior data for output [{data_key}]")
|
871
|
+
else:
|
872
|
+
# This is allowed for some scenarios, e.g. inside a job group
|
873
|
+
self._log.warning(f"No data found for input [{data_key}]")
|
873
874
|
|
874
875
|
part_key = _meta.PartKey(opaqueKey="part-root", partType=_meta.PartType.PART_ROOT)
|
875
|
-
|
876
|
-
|
876
|
+
snap_index = version - 1 if version > 0 else 0
|
877
|
+
delta_index = 0
|
878
|
+
incarnation_index = 0
|
877
879
|
|
878
880
|
# This is also defined in functions.DynamicDataSpecFunc, maybe centralize?
|
879
|
-
data_item = f"data/table/{data_id.objectId}/{part_key.opaqueKey}/snap-{
|
881
|
+
data_item = f"data/table/{data_id.objectId}/{part_key.opaqueKey}/snap-{snap_index}/delta-{delta_index}"
|
880
882
|
|
881
883
|
data_obj = self._generate_data_definition(
|
882
|
-
part_key,
|
884
|
+
part_key, snap_index, delta_index, data_item,
|
883
885
|
schema, storage_id)
|
884
886
|
|
885
887
|
storage_obj = self._generate_storage_definition(
|
886
888
|
storage_id, storage_key, storage_path, storage_format,
|
887
889
|
data_item, incarnation_index)
|
888
890
|
|
889
|
-
|
890
|
-
|
891
|
+
self._add_job_metadata(job_config, data_id, data_obj)
|
892
|
+
self._add_job_metadata(job_config, storage_id, storage_obj)
|
891
893
|
|
892
|
-
return data_id
|
894
|
+
return _util.selector_for(data_id)
|
893
895
|
|
894
896
|
def _process_file_socket(
|
895
897
|
self, file_key, file_value, file_type: _meta.FileType,
|
896
|
-
|
897
|
-
-> _meta.
|
898
|
+
job_config: _cfg.JobConfig, is_output: bool) \
|
899
|
+
-> tp.Optional[_meta.TagSelector]:
|
898
900
|
|
899
901
|
file_id = _util.new_object_id(_meta.ObjectType.FILE)
|
900
902
|
storage_id = _util.new_object_id(_meta.ObjectType.STORAGE)
|
901
903
|
|
902
|
-
self._log.info(f"Generating file definition for [{file_key}] with ID = [{_util.object_key(file_id)}]")
|
903
|
-
|
904
904
|
if isinstance(file_value, str):
|
905
905
|
|
906
|
-
storage_key = self._sys_config.
|
906
|
+
storage_key = _util.read_property(self._sys_config.properties, _cfg_p.ConfigKeys.STORAGE_DEFAULT_LOCATION)
|
907
907
|
storage_path = file_value
|
908
908
|
|
909
909
|
elif isinstance(file_value, dict):
|
910
910
|
|
911
|
-
storage_key = file_value.get("storageKey") or self._sys_config.
|
911
|
+
storage_key = file_value.get("storageKey") or _util.read_property(self._sys_config.properties, _cfg_p.ConfigKeys.STORAGE_DEFAULT_LOCATION)
|
912
912
|
storage_path = file_value.get("path")
|
913
913
|
|
914
914
|
if not storage_path:
|
@@ -917,17 +917,28 @@ class DevModeTranslator:
|
|
917
917
|
else:
|
918
918
|
raise _ex.EConfigParse(f"Invalid configuration for input '{file_key}'")
|
919
919
|
|
920
|
-
|
921
|
-
|
920
|
+
# Scan for existing versions using hte DEVELOPER storage layout
|
921
|
+
|
922
|
+
self._log.info(f"Looking for {'output' if is_output else 'input'} [{file_key}]...")
|
922
923
|
|
923
|
-
|
924
|
-
|
925
|
-
|
924
|
+
storage_path, version = self._find_latest_version(storage_key, storage_path)
|
925
|
+
file_id.objectVersion = version
|
926
|
+
|
927
|
+
if version > 0:
|
928
|
+
self._log.info(f"Found {'output' if is_output else 'input'} [{file_key}] version {version}")
|
929
|
+
self._log.info(f"Generating {'prior' if is_output else 'file'} definition for [{file_key}] with ID = [{_util.object_key(file_id)}]")
|
930
|
+
elif is_output:
|
931
|
+
self._log.info(f"No prior data for output [{file_key}]")
|
926
932
|
else:
|
927
|
-
|
928
|
-
|
933
|
+
# This is allowed for some scenarios, e.g. inside a job group
|
934
|
+
self._log.warning(f"No data found for input [{file_key}]")
|
935
|
+
|
936
|
+
storage = self._storage_manager.get_file_storage(storage_key)
|
937
|
+
file_size = storage.size(storage_path) if storage.exists(storage_path) else 0
|
929
938
|
|
930
|
-
|
939
|
+
storage_format = "application/x-binary"
|
940
|
+
|
941
|
+
data_item = f"file/{file_id.objectId}/version-{version}"
|
931
942
|
file_name = f"{file_key}.{file_type.extension}"
|
932
943
|
|
933
944
|
file_obj = self._generate_file_definition(
|
@@ -936,15 +947,15 @@ class DevModeTranslator:
|
|
936
947
|
|
937
948
|
storage_obj = self._generate_storage_definition(
|
938
949
|
storage_id, storage_key, storage_path, storage_format,
|
939
|
-
data_item, incarnation_index=
|
950
|
+
data_item, incarnation_index=0)
|
940
951
|
|
941
|
-
|
942
|
-
|
952
|
+
self._add_job_metadata(job_config, file_id, file_obj)
|
953
|
+
self._add_job_metadata(job_config, storage_id, storage_obj)
|
943
954
|
|
944
|
-
return file_id
|
955
|
+
return _util.selector_for(file_id)
|
945
956
|
|
946
957
|
@staticmethod
|
947
|
-
def infer_format(storage_path: str,
|
958
|
+
def infer_format(storage_path: str, sys_config: _cfg.RuntimeConfig, schema: tp.Optional[_meta.SchemaDefinition]):
|
948
959
|
|
949
960
|
schema_type = schema.schemaType if schema and schema.schemaType else _meta.SchemaType.TABLE
|
950
961
|
|
@@ -958,27 +969,30 @@ class DevModeTranslator:
|
|
958
969
|
return extension[1:] if extension.startswith(".") else extension
|
959
970
|
|
960
971
|
else:
|
961
|
-
return
|
972
|
+
return _util.read_property(sys_config.properties, _cfg_p.ConfigKeys.STORAGE_DEFAULT_FORMAT, "CSV")
|
962
973
|
|
963
|
-
def
|
974
|
+
def _find_latest_version(self, storage_key, storage_path):
|
964
975
|
|
965
|
-
|
966
|
-
|
967
|
-
|
976
|
+
storage = self._storage_manager.get_file_storage(storage_key)
|
977
|
+
orig_path = pathlib.PurePath(storage_path)
|
978
|
+
version = 0
|
968
979
|
|
969
|
-
if
|
970
|
-
|
971
|
-
existing_files = list(map(lambda stat: stat.file_name, listing))
|
972
|
-
else:
|
973
|
-
existing_files = []
|
980
|
+
if not storage.exists(str(orig_path.parent)):
|
981
|
+
return storage_path, version
|
974
982
|
|
975
|
-
|
983
|
+
listing = storage.ls(str(orig_path.parent))
|
984
|
+
existing_files = list(map(lambda stat: stat.file_name, listing))
|
976
985
|
|
977
|
-
|
978
|
-
|
979
|
-
storage_path = str(x_orig_path.parent.joinpath(x_name))
|
986
|
+
next_version = version + 1
|
987
|
+
next_name = f"{orig_path.stem}{orig_path.suffix}"
|
980
988
|
|
981
|
-
|
989
|
+
while next_name in existing_files:
|
990
|
+
|
991
|
+
storage_path = str(orig_path.parent.joinpath(next_name))
|
992
|
+
version = next_version
|
993
|
+
|
994
|
+
next_version = version + 1
|
995
|
+
next_name = f"{orig_path.stem}-{next_version}{orig_path.suffix}"
|
982
996
|
|
983
997
|
return storage_path, version
|
984
998
|
|
@@ -1043,12 +1057,40 @@ class DevModeTranslator:
|
|
1043
1057
|
|
1044
1058
|
storage_def = _meta.StorageDefinition()
|
1045
1059
|
storage_def.dataItems[data_item] = storage_item
|
1060
|
+
storage_def.layout = _meta.StorageLayout.DEVELOPER_LAYOUT
|
1046
1061
|
|
1047
1062
|
if storage_format.lower() == "csv":
|
1048
1063
|
storage_def.storageOptions["lenient_csv_parser"] = _types.MetadataCodec.encode_value(True)
|
1049
1064
|
|
1050
1065
|
return _meta.ObjectDefinition(objectType=_meta.ObjectType.STORAGE, storage=storage_def)
|
1051
1066
|
|
1067
|
+
@classmethod
|
1068
|
+
def generate_dev_mode_tags(cls, job_config: _cfg.JobConfig) -> _cfg.JobConfig:
|
1069
|
+
|
1070
|
+
job_config = copy.copy(job_config)
|
1071
|
+
|
1072
|
+
for key, object_id in job_config.objectMapping.items():
|
1073
|
+
if key not in job_config.tags:
|
1074
|
+
|
1075
|
+
tag = _meta.Tag(header=object_id)
|
1076
|
+
|
1077
|
+
timestamp = _types.MetadataCodec.convert_datetime_value(object_id.objectTimestamp.isoDatetime)
|
1078
|
+
user_id = _types.MetadataCodec.encode_value("local_user")
|
1079
|
+
user_name = _types.MetadataCodec.encode_value("Local User")
|
1080
|
+
|
1081
|
+
tag.attrs["trac_dev_mode"] = _types.MetadataCodec.encode_value(True)
|
1082
|
+
|
1083
|
+
tag.attrs["trac_create_time"] = timestamp
|
1084
|
+
tag.attrs["trac_create_user_id"] = user_id
|
1085
|
+
tag.attrs["trac_create_user_name"] = user_name
|
1086
|
+
|
1087
|
+
tag.attrs["trac_update_time"] = timestamp
|
1088
|
+
tag.attrs["trac_update_user_id"] = user_id
|
1089
|
+
tag.attrs["trac_update_user_name"] = user_name
|
1090
|
+
|
1091
|
+
job_config.tags[key] = tag
|
1092
|
+
|
1093
|
+
return job_config
|
1052
1094
|
|
1053
1095
|
|
1054
1096
|
DevModeTranslator._log = _logging.logger_for_class(DevModeTranslator)
|