tracdap-runtime 0.8.0rc2__py3-none-any.whl → 0.9.0b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. tracdap/rt/_impl/core/data.py +578 -33
  2. tracdap/rt/_impl/core/repos.py +7 -0
  3. tracdap/rt/_impl/core/storage.py +10 -3
  4. tracdap/rt/_impl/core/util.py +54 -11
  5. tracdap/rt/_impl/exec/dev_mode.py +122 -100
  6. tracdap/rt/_impl/exec/engine.py +178 -109
  7. tracdap/rt/_impl/exec/functions.py +218 -257
  8. tracdap/rt/_impl/exec/graph.py +140 -125
  9. tracdap/rt/_impl/exec/graph_builder.py +411 -449
  10. tracdap/rt/_impl/grpc/codec.py +4 -2
  11. tracdap/rt/_impl/grpc/server.py +7 -7
  12. tracdap/rt/_impl/grpc/tracdap/api/internal/runtime_pb2.py +25 -18
  13. tracdap/rt/_impl/grpc/tracdap/api/internal/runtime_pb2.pyi +27 -9
  14. tracdap/rt/_impl/grpc/tracdap/metadata/common_pb2.py +1 -1
  15. tracdap/rt/_impl/grpc/tracdap/metadata/config_pb2.py +1 -1
  16. tracdap/rt/_impl/grpc/tracdap/metadata/custom_pb2.py +1 -1
  17. tracdap/rt/_impl/grpc/tracdap/metadata/data_pb2.py +1 -1
  18. tracdap/rt/_impl/grpc/tracdap/metadata/file_pb2.py +1 -1
  19. tracdap/rt/_impl/grpc/tracdap/metadata/flow_pb2.py +1 -1
  20. tracdap/rt/_impl/grpc/tracdap/metadata/job_pb2.py +67 -63
  21. tracdap/rt/_impl/grpc/tracdap/metadata/job_pb2.pyi +11 -2
  22. tracdap/rt/_impl/grpc/tracdap/metadata/model_pb2.py +1 -1
  23. tracdap/rt/_impl/grpc/tracdap/metadata/object_id_pb2.py +1 -1
  24. tracdap/rt/_impl/grpc/tracdap/metadata/object_pb2.py +1 -1
  25. tracdap/rt/_impl/grpc/tracdap/metadata/resource_pb2.py +1 -1
  26. tracdap/rt/_impl/grpc/tracdap/metadata/search_pb2.py +1 -1
  27. tracdap/rt/_impl/grpc/tracdap/metadata/storage_pb2.py +11 -9
  28. tracdap/rt/_impl/grpc/tracdap/metadata/storage_pb2.pyi +11 -2
  29. tracdap/rt/_impl/grpc/tracdap/metadata/tag_pb2.py +1 -1
  30. tracdap/rt/_impl/grpc/tracdap/metadata/tag_update_pb2.py +1 -1
  31. tracdap/rt/_impl/grpc/tracdap/metadata/type_pb2.py +1 -1
  32. tracdap/rt/_impl/runtime.py +8 -0
  33. tracdap/rt/_plugins/repo_git.py +56 -11
  34. tracdap/rt/_version.py +1 -1
  35. tracdap/rt/config/__init__.py +6 -6
  36. tracdap/rt/config/common.py +5 -0
  37. tracdap/rt/config/job.py +13 -3
  38. tracdap/rt/config/result.py +8 -4
  39. tracdap/rt/config/runtime.py +2 -0
  40. tracdap/rt/metadata/__init__.py +37 -36
  41. tracdap/rt/metadata/job.py +2 -0
  42. tracdap/rt/metadata/storage.py +9 -0
  43. {tracdap_runtime-0.8.0rc2.dist-info → tracdap_runtime-0.9.0b1.dist-info}/METADATA +3 -1
  44. {tracdap_runtime-0.8.0rc2.dist-info → tracdap_runtime-0.9.0b1.dist-info}/RECORD +47 -47
  45. {tracdap_runtime-0.8.0rc2.dist-info → tracdap_runtime-0.9.0b1.dist-info}/WHEEL +1 -1
  46. {tracdap_runtime-0.8.0rc2.dist-info → tracdap_runtime-0.9.0b1.dist-info}/licenses/LICENSE +0 -0
  47. {tracdap_runtime-0.8.0rc2.dist-info → tracdap_runtime-0.9.0b1.dist-info}/top_level.txt +0 -0
@@ -38,6 +38,13 @@ class RepositoryManager:
38
38
 
39
39
  try:
40
40
 
41
+ # Add global properties related to the repo protocol
42
+ related_props = {
43
+ k: v for (k, v) in sys_config.properties.items()
44
+ if k.startswith(f"{repo_config.protocol}.")}
45
+
46
+ repo_config.properties.update(related_props)
47
+
41
48
  self._repos[repo_name] = plugins.PluginManager.load_plugin(IModelRepository, repo_config)
42
49
 
43
50
  except ex.EPluginNotAvailable as e:
@@ -81,7 +81,7 @@ class StorageManager:
81
81
  self.__file_storage: tp.Dict[str, IFileStorage] = dict()
82
82
  self.__data_storage: tp.Dict[str, IDataStorage] = dict()
83
83
  self.__external: tp.List[str] = list()
84
- self.__settings = sys_config.storage
84
+ self.__sys_config = sys_config
85
85
 
86
86
  for storage_key, storage_config in sys_config.storage.buckets.items():
87
87
  self.create_storage(storage_key, storage_config)
@@ -93,13 +93,20 @@ class StorageManager:
93
93
  self.create_storage(storage_key, storage_config)
94
94
 
95
95
  def default_storage_key(self):
96
- return self.__settings.defaultBucket
96
+ return self.__sys_config.storage.defaultBucket
97
97
 
98
98
  def default_storage_format(self):
99
- return self.__settings.defaultFormat
99
+ return self.__sys_config.storage.defaultFormat
100
100
 
101
101
  def create_storage(self, storage_key: str, storage_config: _cfg.PluginConfig):
102
102
 
103
+ # Add global properties related to the storage protocol
104
+ related_props = {
105
+ k: v for (k, v) in self.__sys_config.properties.items()
106
+ if k.startswith(f"{storage_config.protocol}.")}
107
+
108
+ storage_config.properties.update(related_props)
109
+
103
110
  if plugins.PluginManager.is_plugin_available(IStorageProvider, storage_config.protocol):
104
111
  self._create_storage_from_provider(storage_key, storage_config)
105
112
  else:
@@ -16,6 +16,7 @@
16
16
  import datetime as dt
17
17
  import pathlib
18
18
  import platform
19
+ import re
19
20
 
20
21
  import typing as tp
21
22
  import uuid
@@ -30,6 +31,7 @@ import traceback as tb
30
31
  __IS_WINDOWS = platform.system() == "Windows"
31
32
  __FIRST_MODEL_FRAME_NAME = "run_model"
32
33
  __FIRST_MODEL_FRAME_TEST_NAME = "_callTestMethod"
34
+ __OBJ_KEY_PATTERN = re.compile(r"([A-Z]+)-(.*)-v(\d+)")
33
35
 
34
36
 
35
37
  def is_windows():
@@ -60,7 +62,7 @@ def format_file_size(size: int) -> str:
60
62
 
61
63
  def new_object_id(object_type: meta.ObjectType) -> meta.TagHeader:
62
64
 
63
- timestamp = dt.datetime.utcnow()
65
+ timestamp = dt.datetime.now(dt.timezone.utc)
64
66
 
65
67
  return meta.TagHeader(
66
68
  objectType=object_type,
@@ -71,6 +73,19 @@ def new_object_id(object_type: meta.ObjectType) -> meta.TagHeader:
71
73
  tagTimestamp=meta.DatetimeValue(timestamp.isoformat()))
72
74
 
73
75
 
76
+ def new_object_version(prior_id: meta.TagHeader) -> meta.TagHeader:
77
+
78
+ timestamp = dt.datetime.now(dt.timezone.utc)
79
+
80
+ return meta.TagHeader(
81
+ objectType=prior_id.objectType,
82
+ objectId=prior_id.objectId,
83
+ objectVersion=prior_id.objectVersion + 1,
84
+ objectTimestamp=meta.DatetimeValue(timestamp.isoformat()),
85
+ tagVersion=1,
86
+ tagTimestamp=meta.DatetimeValue(timestamp.isoformat()))
87
+
88
+
74
89
  def object_key(object_id: tp.Union[meta.TagHeader, meta.TagSelector]) -> str:
75
90
 
76
91
  if isinstance(object_id, meta.TagHeader):
@@ -106,29 +121,57 @@ def selector_for_latest(object_id: meta.TagHeader) -> meta.TagSelector:
106
121
  latestTag=True)
107
122
 
108
123
 
109
- def get_job_resource(
124
+ def get_job_metadata(
110
125
  selector: tp.Union[meta.TagHeader, meta.TagSelector],
111
126
  job_config: cfg.JobConfig,
112
- optional: bool = False):
127
+ optional: bool = False) \
128
+ -> tp.Optional[meta.ObjectDefinition]:
113
129
 
114
- resource_key = object_key(selector)
115
- resource_id = job_config.resourceMapping.get(resource_key)
130
+ obj_key = object_key(selector)
131
+ obj_id = job_config.objectMapping.get(obj_key)
116
132
 
117
- if resource_id is not None:
118
- resource_key = object_key(resource_id)
133
+ if obj_id is not None:
134
+ obj_key = object_key(obj_id)
119
135
 
120
- resource = job_config.resources.get(resource_key)
136
+ obj = job_config.objects.get(obj_key)
121
137
 
122
- if resource is not None:
123
- return resource
138
+ if obj is not None:
139
+ return obj
124
140
 
125
141
  if optional:
126
142
  return None
127
143
 
128
- err = f"Missing required {selector.objectType.name} resource [{object_key(selector)}]"
144
+ err = f"Missing required {selector.objectType.name} object for [{object_key(selector)}]"
129
145
  raise ex.ERuntimeValidation(err)
130
146
 
131
147
 
148
+ def get_job_mapping(
149
+ selector: tp.Union[meta.TagHeader, meta.TagSelector],
150
+ job_config: cfg.JobConfig) \
151
+ -> meta.TagHeader:
152
+
153
+ obj_key = object_key(selector)
154
+ obj_id = job_config.objectMapping.get(obj_key)
155
+
156
+ if obj_id is not None:
157
+ return obj_id
158
+
159
+ obj_key_match = __OBJ_KEY_PATTERN.match(obj_key)
160
+
161
+ if not obj_key_match:
162
+ err = f"Missing required {selector.objectType.name} ID for [{object_key(selector)}]"
163
+ raise ex.ERuntimeValidation(err)
164
+
165
+ obj_type = obj_key_match.group(1)
166
+ obj_id = obj_key_match.group(2)
167
+ obj_ver = obj_key_match.group(3)
168
+ obj_ts = job_config.jobId.objectTimestamp
169
+
170
+ return meta.TagHeader(
171
+ meta.ObjectType.__members__[obj_type], obj_id,
172
+ int(obj_ver), obj_ts, 1, obj_ts)
173
+
174
+
132
175
  def get_origin(metaclass: type):
133
176
 
134
177
  # Minimum supported Python is 3.7, which does not provide get_origin and get_args
@@ -87,6 +87,7 @@ class DevModeTranslator:
87
87
  config_mgr: _cfg_p.ConfigManager):
88
88
 
89
89
  storage_config = copy.deepcopy(sys_config.storage)
90
+ storage_config.defaultLayout = _meta.StorageLayout.DEVELOPER_LAYOUT
90
91
 
91
92
  for bucket_key, bucket_config in storage_config.buckets.items():
92
93
  storage_config.buckets[bucket_key] = cls._resolve_storage_location(
@@ -218,27 +219,29 @@ class DevModeTranslator:
218
219
  return job_config, job_def
219
220
 
220
221
  @classmethod
221
- def _add_job_resource(
222
+ def _add_job_metadata(
222
223
  cls, job_config: _cfg.JobConfig,
223
224
  obj_id: _meta.TagHeader, obj: _meta.ObjectDefinition) \
224
225
  -> _cfg.JobConfig:
225
226
 
226
227
  obj_key = _util.object_key(obj_id)
227
- job_config.resources[obj_key] = obj
228
+ job_config.objects[obj_key] = obj
228
229
 
229
230
  return job_config
230
231
 
231
232
  @classmethod
232
- def _process_job_id(cls, job_config: _cfg.JobConfig):
233
+ def _process_job_id(cls, job_config: _cfg.JobConfig) -> _cfg.JobConfig:
233
234
 
234
235
  job_id = _util.new_object_id(_meta.ObjectType.JOB)
236
+ result_id = _util.new_object_id(_meta.ObjectType.RESULT)
235
237
 
236
238
  cls._log.info(f"Assigning job ID = [{_util.object_key(job_id)}]")
239
+ cls._log.info(f"Assigning result ID = [{_util.object_key(result_id)}]")
237
240
 
238
- translated_config = copy.copy(job_config)
239
- translated_config.jobId = job_id
241
+ job_config.jobId = job_id
242
+ job_config.resultId = result_id
240
243
 
241
- return translated_config
244
+ return job_config
242
245
 
243
246
  @classmethod
244
247
  def _process_job_type(cls, job_def: _meta.JobDefinition):
@@ -346,7 +349,7 @@ class DevModeTranslator:
346
349
 
347
350
  model_id, model_obj = self._generate_model_for_class(model_class)
348
351
  job_detail.model = _util.selector_for(model_id)
349
- job_config = self._add_job_resource(job_config, model_id, model_obj)
352
+ job_config = self._add_job_metadata(job_config, model_id, model_obj)
350
353
 
351
354
  # Otherwise look for models specified as a single string, and take that as the entry point
352
355
  else:
@@ -355,7 +358,7 @@ class DevModeTranslator:
355
358
  if hasattr(job_detail, "model") and isinstance(job_detail.model, str):
356
359
  model_id, model_obj = self._generate_model_for_entry_point(job_detail.model) # noqa
357
360
  job_detail.model = _util.selector_for(model_id)
358
- job_config = self._add_job_resource(job_config, model_id, model_obj)
361
+ job_config = self._add_job_metadata(job_config, model_id, model_obj)
359
362
 
360
363
  elif hasattr(job_detail, "model") and isinstance(job_detail.model, _meta.TagSelector):
361
364
  if job_detail.model.objectType == _meta.ObjectType.OBJECT_TYPE_NOT_SET:
@@ -369,7 +372,7 @@ class DevModeTranslator:
369
372
  if isinstance(model_detail, str):
370
373
  model_id, model_obj = self._generate_model_for_entry_point(model_detail)
371
374
  job_detail.models[model_key] = _util.selector_for(model_id)
372
- job_config = self._add_job_resource(job_config, model_id, model_obj)
375
+ job_config = self._add_job_metadata(job_config, model_id, model_obj)
373
376
 
374
377
  return job_config, job_def
375
378
 
@@ -446,8 +449,8 @@ class DevModeTranslator:
446
449
  job_def.runFlow.flow = _util.selector_for(flow_id)
447
450
 
448
451
  job_config = copy.copy(job_config)
449
- job_config.resources = copy.copy(job_config.resources)
450
- job_config = self._add_job_resource(job_config, flow_id, flow_obj)
452
+ job_config.objects = copy.copy(job_config.objects)
453
+ job_config = self._add_job_metadata(job_config, flow_id, flow_obj)
451
454
 
452
455
  return job_config, job_def
453
456
 
@@ -472,7 +475,7 @@ class DevModeTranslator:
472
475
  for model_name, model_node in model_nodes.items():
473
476
 
474
477
  model_selector = job_def.runFlow.models[model_name]
475
- model_obj = _util.get_job_resource(model_selector, job_config)
478
+ model_obj = _util.get_job_metadata(model_selector, job_config)
476
479
 
477
480
  model_inputs = set(model_obj.model.inputs.keys())
478
481
  model_outputs = set(model_obj.model.outputs.keys())
@@ -540,7 +543,7 @@ class DevModeTranslator:
540
543
  # Generate node param sockets needed by the model
541
544
  if node_name in job.models:
542
545
  model_selector = job.models[node_name]
543
- model_obj = _util.get_job_resource(model_selector, job_config)
546
+ model_obj = _util.get_job_metadata(model_selector, job_config)
544
547
  for param_name in model_obj.model.parameters:
545
548
  add_param_to_flow(node_name, param_name)
546
549
  if param_name not in node.parameters:
@@ -622,7 +625,7 @@ class DevModeTranslator:
622
625
  for target in targets:
623
626
 
624
627
  model_selector = job_def.runFlow.models.get(target.node)
625
- model_obj = _util.get_job_resource(model_selector, job_config)
628
+ model_obj = _util.get_job_metadata(model_selector, job_config)
626
629
  model_param = model_obj.model.parameters.get(target.socket)
627
630
  model_params.append(model_param)
628
631
 
@@ -659,7 +662,7 @@ class DevModeTranslator:
659
662
  for target in targets:
660
663
 
661
664
  model_selector = job_def.runFlow.models.get(target.node)
662
- model_obj = _util.get_job_resource(model_selector, job_config)
665
+ model_obj = _util.get_job_metadata(model_selector, job_config)
663
666
  model_input = model_obj.model.inputs.get(target.socket)
664
667
  model_inputs.append(model_input)
665
668
 
@@ -694,7 +697,7 @@ class DevModeTranslator:
694
697
  for source in sources:
695
698
 
696
699
  model_selector = job_def.runFlow.models.get(source.node)
697
- model_obj = _util.get_job_resource(model_selector, job_config)
700
+ model_obj = _util.get_job_metadata(model_selector, job_config)
698
701
  model_input = model_obj.model.outputs.get(source.socket)
699
702
  model_outputs.append(model_input)
700
703
 
@@ -727,10 +730,10 @@ class DevModeTranslator:
727
730
 
728
731
  if hasattr(job_detail, "model"):
729
732
  model_key = _util.object_key(job_detail.model)
730
- model_or_flow = job_config.resources[model_key].model
733
+ model_or_flow = job_config.objects[model_key].model
731
734
  elif hasattr(job_detail, "flow"):
732
735
  flow_key = _util.object_key(job_detail.flow)
733
- model_or_flow = job_config.resources[flow_key].flow
736
+ model_or_flow = job_config.objects[flow_key].flow
734
737
  else:
735
738
  model_or_flow = None
736
739
 
@@ -784,71 +787,68 @@ class DevModeTranslator:
784
787
  job_detail = self._get_job_detail(job_def)
785
788
 
786
789
  if hasattr(job_detail, "model"):
787
- model_obj = _util.get_job_resource(job_detail.model, job_config)
790
+ model_obj = _util.get_job_metadata(job_detail.model, job_config)
788
791
  required_inputs = model_obj.model.inputs
789
- required_outputs = model_obj.model.outputs
792
+ expected_outputs = model_obj.model.outputs
790
793
 
791
794
  elif hasattr(job_detail, "flow"):
792
- flow_obj = _util.get_job_resource(job_detail.flow, job_config)
795
+ flow_obj = _util.get_job_metadata(job_detail.flow, job_config)
793
796
  required_inputs = flow_obj.flow.inputs
794
- required_outputs = flow_obj.flow.outputs
797
+ expected_outputs = flow_obj.flow.outputs
795
798
 
796
799
  else:
797
800
  return job_config, job_def
798
801
 
802
+ job_metadata = job_config.objects
799
803
  job_inputs = job_detail.inputs
800
804
  job_outputs = job_detail.outputs
801
- job_resources = job_config.resources
802
-
803
- for input_key, input_value in job_inputs.items():
804
- if not (isinstance(input_value, str) and input_value in job_resources):
805
-
806
- model_input = required_inputs[input_key]
805
+ job_prior_outputs = job_detail.priorOutputs
806
+
807
+ for key, schema in required_inputs.items():
808
+ if key not in job_inputs:
809
+ if not schema.optional:
810
+ raise _ex.EJobValidation(f"Missing required input [{key}]")
811
+ continue
812
+ supplied_input = job_inputs.pop(key) if key in job_inputs else None
813
+ input_selector = self._process_socket(key, schema, supplied_input, job_metadata, is_output=False)
814
+ if input_selector is not None:
815
+ job_inputs[key] = input_selector
816
+
817
+ for key, schema in expected_outputs.items():
818
+ if key not in job_outputs:
819
+ raise _ex.EJobValidation(f"Missing required output [{key}]")
820
+ supplied_output = job_outputs.pop(key)
821
+ output_selector = self._process_socket(key, schema, supplied_output, job_metadata, is_output=True)
822
+ if output_selector is not None:
823
+ job_prior_outputs[key] = output_selector
807
824
 
808
- if model_input.objectType == _meta.ObjectType.DATA:
809
- schema = model_input.schema if model_input and not model_input.dynamic else None
810
- input_id = self._process_data_socket(input_key, input_value, schema, job_resources, new_unique_file=False)
811
- elif model_input.objectType == _meta.ObjectType.FILE:
812
- file_type = model_input.fileType
813
- input_id = self._process_file_socket(input_key, input_value, file_type, job_resources, new_unique_file=False)
814
- else:
815
- raise _ex.EUnexpected()
816
-
817
- job_inputs[input_key] = _util.selector_for(input_id)
818
-
819
- for output_key, output_value in job_outputs.items():
820
- if not (isinstance(output_value, str) and output_value in job_resources):
825
+ return job_config, job_def
821
826
 
822
- model_output = required_outputs[output_key]
827
+ def _process_socket(self, key, socket, supplied_value, job_metadata, is_output) -> _meta.TagSelector:
823
828
 
824
- if model_output.objectType == _meta.ObjectType.DATA:
825
- schema = model_output.schema if model_output and not model_output.dynamic else None
826
- output_id = self._process_data_socket(output_key, output_value, schema, job_resources, new_unique_file=True)
827
- elif model_output.objectType == _meta.ObjectType.FILE:
828
- file_type = model_output.fileType
829
- output_id = self._process_file_socket(output_key, output_value, file_type, job_resources, new_unique_file=True)
830
- else:
831
- raise _ex.EUnexpected()
829
+ if socket.objectType == _meta.ObjectType.DATA:
830
+ schema = socket.schema if socket and not socket.dynamic else None
831
+ return self._process_data_socket(key, supplied_value, schema, job_metadata, is_output)
832
832
 
833
- job_outputs[output_key] = _util.selector_for(output_id)
833
+ elif socket.objectType == _meta.ObjectType.FILE:
834
+ file_type = socket.fileType
835
+ return self._process_file_socket(key, supplied_value, file_type, job_metadata, is_output)
834
836
 
835
- return job_config, job_def
837
+ else:
838
+ raise _ex.EUnexpected()
836
839
 
837
840
  def _process_data_socket(
838
841
  self, data_key, data_value, schema: tp.Optional[_meta.SchemaDefinition],
839
- resources: tp.Dict[str, _meta.ObjectDefinition], new_unique_file=False) \
840
- -> _meta.TagHeader:
842
+ job_metadata: tp.Dict[str, _meta.ObjectDefinition], is_output: bool)\
843
+ -> _meta.TagSelector:
841
844
 
842
845
  data_id = _util.new_object_id(_meta.ObjectType.DATA)
843
846
  storage_id = _util.new_object_id(_meta.ObjectType.STORAGE)
844
847
 
845
- self._log.info(f"Generating data definition for [{data_key}] with ID = [{_util.object_key(data_id)}]")
846
-
847
848
  if isinstance(data_value, str):
848
849
  storage_path = data_value
849
850
  storage_key = self._sys_config.storage.defaultBucket
850
851
  storage_format = self.infer_format(storage_path, self._sys_config.storage, schema)
851
- snap_version = 1
852
852
 
853
853
  elif isinstance(data_value, dict):
854
854
 
@@ -859,48 +859,55 @@ class DevModeTranslator:
859
859
 
860
860
  storage_key = data_value.get("storageKey") or self._sys_config.storage.defaultBucket
861
861
  storage_format = data_value.get("format") or self.infer_format(storage_path, self._sys_config.storage, schema)
862
- snap_version = 1
863
862
 
864
863
  else:
865
864
  raise _ex.EConfigParse(f"Invalid configuration for input '{data_key}'")
866
865
 
867
- # For unique outputs, increment the snap number to find a new unique snap
868
- # These are not incarnations, bc likely in dev mode model code and inputs are changing
869
- # Incarnations are for recreation of a dataset using the exact same code path and inputs
866
+ # Scan for existing versions using hte DEVELOPER storage layout
867
+
868
+ self._log.info(f"Looking for {'output' if is_output else 'input'} [{data_key}]...")
869
+
870
+ storage_path, version = self._find_latest_version(storage_key, storage_path)
871
+ data_id.objectVersion = version
870
872
 
871
- if new_unique_file:
872
- storage_path, snap_version = self._new_unique_file(data_key, storage_key, storage_path, snap_version)
873
+ if version > 0:
874
+ self._log.info(f"Found {'output' if is_output else 'input'} [{data_key}] version {version}")
875
+ self._log.info(f"Generating {'prior' if is_output else 'data'} definition for [{data_key}] with ID = [{_util.object_key(data_id)}]")
876
+ elif is_output:
877
+ self._log.info(f"No prior data for output [{data_key}]")
878
+ else:
879
+ # This is allowed for some scenarios, e.g. inside a job group
880
+ self._log.warning(f"No data found for input [{data_key}]")
873
881
 
874
882
  part_key = _meta.PartKey(opaqueKey="part-root", partType=_meta.PartType.PART_ROOT)
875
- delta_index = 1
876
- incarnation_index = 1
883
+ snap_index = version - 1 if version > 0 else 0
884
+ delta_index = 0
885
+ incarnation_index = 0
877
886
 
878
887
  # This is also defined in functions.DynamicDataSpecFunc, maybe centralize?
879
- data_item = f"data/table/{data_id.objectId}/{part_key.opaqueKey}/snap-{snap_version}/delta-{delta_index}"
888
+ data_item = f"data/table/{data_id.objectId}/{part_key.opaqueKey}/snap-{snap_index}/delta-{delta_index}"
880
889
 
881
890
  data_obj = self._generate_data_definition(
882
- part_key, snap_version, delta_index, data_item,
891
+ part_key, snap_index, delta_index, data_item,
883
892
  schema, storage_id)
884
893
 
885
894
  storage_obj = self._generate_storage_definition(
886
895
  storage_id, storage_key, storage_path, storage_format,
887
896
  data_item, incarnation_index)
888
897
 
889
- resources[_util.object_key(data_id)] = data_obj
890
- resources[_util.object_key(storage_id)] = storage_obj
898
+ job_metadata[_util.object_key(data_id)] = data_obj
899
+ job_metadata[_util.object_key(storage_id)] = storage_obj
891
900
 
892
- return data_id
901
+ return _util.selector_for(data_id)
893
902
 
894
903
  def _process_file_socket(
895
904
  self, file_key, file_value, file_type: _meta.FileType,
896
- resources: tp.Dict[str, _meta.ObjectDefinition], new_unique_file=False) \
897
- -> _meta.TagHeader:
905
+ job_metadata: tp.Dict[str, _meta.ObjectDefinition], is_output: bool) \
906
+ -> tp.Optional[_meta.TagSelector]:
898
907
 
899
908
  file_id = _util.new_object_id(_meta.ObjectType.FILE)
900
909
  storage_id = _util.new_object_id(_meta.ObjectType.STORAGE)
901
910
 
902
- self._log.info(f"Generating file definition for [{file_key}] with ID = [{_util.object_key(file_id)}]")
903
-
904
911
  if isinstance(file_value, str):
905
912
 
906
913
  storage_key = self._sys_config.storage.defaultBucket
@@ -917,17 +924,28 @@ class DevModeTranslator:
917
924
  else:
918
925
  raise _ex.EConfigParse(f"Invalid configuration for input '{file_key}'")
919
926
 
920
- storage_format = "application/x-binary"
921
- file_version = 1
927
+ # Scan for existing versions using hte DEVELOPER storage layout
922
928
 
923
- if new_unique_file:
924
- storage_path, file_version = self._new_unique_file(file_key, storage_key, storage_path, file_version)
925
- file_size = 0
929
+ self._log.info(f"Looking for {'output' if is_output else 'input'} [{file_key}]...")
930
+
931
+ storage_path, version = self._find_latest_version(storage_key, storage_path)
932
+ file_id.objectVersion = version
933
+
934
+ if version > 0:
935
+ self._log.info(f"Found {'output' if is_output else 'input'} [{file_key}] version {version}")
936
+ self._log.info(f"Generating {'prior' if is_output else 'file'} definition for [{file_key}] with ID = [{_util.object_key(file_id)}]")
937
+ elif is_output:
938
+ self._log.info(f"No prior data for output [{file_key}]")
926
939
  else:
927
- storage = self._storage_manager.get_file_storage(storage_key)
928
- file_size = storage.size(storage_path)
940
+ # This is allowed for some scenarios, e.g. inside a job group
941
+ self._log.warning(f"No data found for input [{file_key}]")
942
+
943
+ storage = self._storage_manager.get_file_storage(storage_key)
944
+ file_size = storage.size(storage_path) if storage.exists(storage_path) else 0
929
945
 
930
- data_item = f"file/{file_id.objectId}/version-{file_version}"
946
+ storage_format = "application/x-binary"
947
+
948
+ data_item = f"file/{file_id.objectId}/version-{version}"
931
949
  file_name = f"{file_key}.{file_type.extension}"
932
950
 
933
951
  file_obj = self._generate_file_definition(
@@ -936,12 +954,12 @@ class DevModeTranslator:
936
954
 
937
955
  storage_obj = self._generate_storage_definition(
938
956
  storage_id, storage_key, storage_path, storage_format,
939
- data_item, incarnation_index=1)
957
+ data_item, incarnation_index=0)
940
958
 
941
- resources[_util.object_key(file_id)] = file_obj
942
- resources[_util.object_key(storage_id)] = storage_obj
959
+ job_metadata[_util.object_key(file_id)] = file_obj
960
+ job_metadata[_util.object_key(storage_id)] = storage_obj
943
961
 
944
- return file_id
962
+ return _util.selector_for(file_id)
945
963
 
946
964
  @staticmethod
947
965
  def infer_format(storage_path: str, storage_config: _cfg.StorageConfig, schema: tp.Optional[_meta.SchemaDefinition]):
@@ -960,25 +978,28 @@ class DevModeTranslator:
960
978
  else:
961
979
  return storage_config.defaultFormat
962
980
 
963
- def _new_unique_file(self, socket_name, storage_key, storage_path, version):
981
+ def _find_latest_version(self, storage_key, storage_path):
964
982
 
965
- x_storage = self._storage_manager.get_file_storage(storage_key)
966
- x_orig_path = pathlib.PurePath(storage_path)
967
- x_name = x_orig_path.name
983
+ storage = self._storage_manager.get_file_storage(storage_key)
984
+ orig_path = pathlib.PurePath(storage_path)
985
+ version = 0
968
986
 
969
- if x_storage.exists(str(x_orig_path.parent)):
970
- listing = x_storage.ls(str(x_orig_path.parent))
971
- existing_files = list(map(lambda stat: stat.file_name, listing))
972
- else:
973
- existing_files = []
987
+ if not storage.exists(str(orig_path.parent)):
988
+ return storage_path, version
989
+
990
+ listing = storage.ls(str(orig_path.parent))
991
+ existing_files = list(map(lambda stat: stat.file_name, listing))
992
+
993
+ next_version = version + 1
994
+ next_name = f"{orig_path.stem}{orig_path.suffix}"
974
995
 
975
- while x_name in existing_files:
996
+ while next_name in existing_files:
976
997
 
977
- version += 1
978
- x_name = f"{x_orig_path.stem}-{version}{x_orig_path.suffix}"
979
- storage_path = str(x_orig_path.parent.joinpath(x_name))
998
+ storage_path = str(orig_path.parent.joinpath(next_name))
999
+ version = next_version
980
1000
 
981
- self._log.info(f"Output for [{socket_name}] will be version {version}")
1001
+ next_version = version + 1
1002
+ next_name = f"{orig_path.stem}-{next_version}{orig_path.suffix}"
982
1003
 
983
1004
  return storage_path, version
984
1005
 
@@ -1043,6 +1064,7 @@ class DevModeTranslator:
1043
1064
 
1044
1065
  storage_def = _meta.StorageDefinition()
1045
1066
  storage_def.dataItems[data_item] = storage_item
1067
+ storage_def.layout = _meta.StorageLayout.DEVELOPER_LAYOUT
1046
1068
 
1047
1069
  if storage_format.lower() == "csv":
1048
1070
  storage_def.storageOptions["lenient_csv_parser"] = _types.MetadataCodec.encode_value(True)