tracdap-runtime 0.8.0rc2__py3-none-any.whl → 0.9.0b2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. tracdap/rt/_impl/core/config_parser.py +29 -3
  2. tracdap/rt/_impl/core/data.py +627 -40
  3. tracdap/rt/_impl/core/repos.py +17 -8
  4. tracdap/rt/_impl/core/storage.py +25 -13
  5. tracdap/rt/_impl/core/struct.py +254 -60
  6. tracdap/rt/_impl/core/util.py +125 -11
  7. tracdap/rt/_impl/exec/context.py +35 -8
  8. tracdap/rt/_impl/exec/dev_mode.py +169 -127
  9. tracdap/rt/_impl/exec/engine.py +203 -140
  10. tracdap/rt/_impl/exec/functions.py +228 -263
  11. tracdap/rt/_impl/exec/graph.py +141 -126
  12. tracdap/rt/_impl/exec/graph_builder.py +428 -449
  13. tracdap/rt/_impl/grpc/codec.py +8 -13
  14. tracdap/rt/_impl/grpc/server.py +7 -7
  15. tracdap/rt/_impl/grpc/tracdap/api/internal/runtime_pb2.py +25 -18
  16. tracdap/rt/_impl/grpc/tracdap/api/internal/runtime_pb2.pyi +27 -9
  17. tracdap/rt/_impl/grpc/tracdap/metadata/common_pb2.py +1 -1
  18. tracdap/rt/_impl/grpc/tracdap/metadata/config_pb2.py +1 -1
  19. tracdap/rt/_impl/grpc/tracdap/metadata/custom_pb2.py +1 -1
  20. tracdap/rt/_impl/grpc/tracdap/metadata/data_pb2.py +37 -35
  21. tracdap/rt/_impl/grpc/tracdap/metadata/data_pb2.pyi +37 -43
  22. tracdap/rt/_impl/grpc/tracdap/metadata/file_pb2.py +1 -1
  23. tracdap/rt/_impl/grpc/tracdap/metadata/flow_pb2.py +1 -1
  24. tracdap/rt/_impl/grpc/tracdap/metadata/job_pb2.py +67 -63
  25. tracdap/rt/_impl/grpc/tracdap/metadata/job_pb2.pyi +11 -2
  26. tracdap/rt/_impl/grpc/tracdap/metadata/model_pb2.py +1 -1
  27. tracdap/rt/_impl/grpc/tracdap/metadata/object_id_pb2.py +1 -1
  28. tracdap/rt/_impl/grpc/tracdap/metadata/object_pb2.py +1 -1
  29. tracdap/rt/_impl/grpc/tracdap/metadata/resource_pb2.py +1 -1
  30. tracdap/rt/_impl/grpc/tracdap/metadata/search_pb2.py +1 -1
  31. tracdap/rt/_impl/grpc/tracdap/metadata/storage_pb2.py +11 -9
  32. tracdap/rt/_impl/grpc/tracdap/metadata/storage_pb2.pyi +11 -2
  33. tracdap/rt/_impl/grpc/tracdap/metadata/tag_pb2.py +1 -1
  34. tracdap/rt/_impl/grpc/tracdap/metadata/tag_update_pb2.py +1 -1
  35. tracdap/rt/_impl/grpc/tracdap/metadata/type_pb2.py +23 -19
  36. tracdap/rt/_impl/grpc/tracdap/metadata/type_pb2.pyi +15 -2
  37. tracdap/rt/_impl/runtime.py +3 -9
  38. tracdap/rt/_impl/static_api.py +5 -6
  39. tracdap/rt/_plugins/format_csv.py +2 -2
  40. tracdap/rt/_plugins/repo_git.py +56 -11
  41. tracdap/rt/_plugins/storage_aws.py +165 -150
  42. tracdap/rt/_plugins/storage_azure.py +17 -11
  43. tracdap/rt/_plugins/storage_gcp.py +35 -18
  44. tracdap/rt/_version.py +1 -1
  45. tracdap/rt/api/model_api.py +45 -0
  46. tracdap/rt/config/__init__.py +7 -9
  47. tracdap/rt/config/common.py +3 -14
  48. tracdap/rt/config/job.py +17 -3
  49. tracdap/rt/config/platform.py +9 -32
  50. tracdap/rt/config/result.py +8 -4
  51. tracdap/rt/config/runtime.py +5 -10
  52. tracdap/rt/config/tenant.py +28 -0
  53. tracdap/rt/launch/cli.py +0 -8
  54. tracdap/rt/launch/launch.py +1 -3
  55. tracdap/rt/metadata/__init__.py +35 -35
  56. tracdap/rt/metadata/data.py +19 -31
  57. tracdap/rt/metadata/job.py +3 -1
  58. tracdap/rt/metadata/storage.py +9 -0
  59. tracdap/rt/metadata/type.py +9 -5
  60. {tracdap_runtime-0.8.0rc2.dist-info → tracdap_runtime-0.9.0b2.dist-info}/METADATA +5 -3
  61. {tracdap_runtime-0.8.0rc2.dist-info → tracdap_runtime-0.9.0b2.dist-info}/RECORD +64 -63
  62. {tracdap_runtime-0.8.0rc2.dist-info → tracdap_runtime-0.9.0b2.dist-info}/WHEEL +1 -1
  63. {tracdap_runtime-0.8.0rc2.dist-info → tracdap_runtime-0.9.0b2.dist-info}/licenses/LICENSE +0 -0
  64. {tracdap_runtime-0.8.0rc2.dist-info → tracdap_runtime-0.9.0b2.dist-info}/top_level.txt +0 -0
@@ -58,11 +58,6 @@ class DevModeTranslator:
58
58
 
59
59
  cls._log.info(f"Applying dev mode config translation to system config")
60
60
 
61
- # TODO: In code gen, default object types to a new object unless the field is marked as optional
62
- # This would match the general semantics of protobuf
63
- if sys_config.storage is None:
64
- sys_config.storage = _cfg.StorageConfig()
65
-
66
61
  sys_config = cls._add_integrated_repo(sys_config)
67
62
  sys_config = cls._process_storage(sys_config, config_mgr)
68
63
 
@@ -73,11 +68,12 @@ class DevModeTranslator:
73
68
 
74
69
  # Add the integrated model repo trac_integrated
75
70
 
76
- integrated_repo_config = _cfg.PluginConfig(
71
+ integrated_repo_config = _meta.ResourceDefinition(
72
+ resourceType=_meta.ResourceType.MODEL_REPOSITORY,
77
73
  protocol="integrated",
78
74
  properties={})
79
75
 
80
- sys_config.repositories["trac_integrated"] = integrated_repo_config
76
+ sys_config.resources["trac_integrated"] = integrated_repo_config
81
77
 
82
78
  return sys_config
83
79
 
@@ -86,23 +82,17 @@ class DevModeTranslator:
86
82
  cls, sys_config: _cfg.RuntimeConfig,
87
83
  config_mgr: _cfg_p.ConfigManager):
88
84
 
89
- storage_config = copy.deepcopy(sys_config.storage)
90
-
91
- for bucket_key, bucket_config in storage_config.buckets.items():
92
- storage_config.buckets[bucket_key] = cls._resolve_storage_location(
93
- bucket_key, bucket_config, config_mgr)
94
-
95
- for bucket_key, bucket_config in storage_config.external.items():
96
- storage_config.external[bucket_key] = cls._resolve_storage_location(
97
- bucket_key, bucket_config, config_mgr)
85
+ sys_config.properties[_cfg_p.ConfigKeys.STORAGE_DEFAULT_LAYOUT] = _meta.StorageLayout.DEVELOPER_LAYOUT.name
98
86
 
99
- sys_config = copy.copy(sys_config)
100
- sys_config.storage = storage_config
87
+ for resource_key, resource in sys_config.resources.items():
88
+ if resource.resourceType in [_meta.ResourceType.INTERNAL_STORAGE, _meta.ResourceType.EXTERNAL_STORAGE]:
89
+ sys_config.resources[resource_key] = cls._resolve_storage_location(
90
+ resource_key, resource, config_mgr)
101
91
 
102
92
  return sys_config
103
93
 
104
94
  @classmethod
105
- def _resolve_storage_location(cls, bucket_key, bucket_config, config_mgr: _cfg_p.ConfigManager):
95
+ def _resolve_storage_location(cls, bucket_key, bucket_config: _meta.ResourceDefinition, config_mgr: _cfg_p.ConfigManager):
106
96
 
107
97
  if bucket_config.protocol != "LOCAL":
108
98
  return bucket_config
@@ -166,6 +156,9 @@ class DevModeTranslator:
166
156
  job_config, job_def = self.translate_job_def(job_config, job_def, model_class)
167
157
  job_config.job = job_def
168
158
 
159
+ # Include some basic tags and attributes in the generated metadata
160
+ job_config = self.generate_dev_mode_tags(job_config)
161
+
169
162
  return job_config
170
163
 
171
164
  finally:
@@ -218,27 +211,31 @@ class DevModeTranslator:
218
211
  return job_config, job_def
219
212
 
220
213
  @classmethod
221
- def _add_job_resource(
214
+ def _add_job_metadata(
222
215
  cls, job_config: _cfg.JobConfig,
223
216
  obj_id: _meta.TagHeader, obj: _meta.ObjectDefinition) \
224
217
  -> _cfg.JobConfig:
225
218
 
226
219
  obj_key = _util.object_key(obj_id)
227
- job_config.resources[obj_key] = obj
220
+
221
+ job_config.objectMapping[obj_key] = obj_id
222
+ job_config.objects[obj_key] = obj
228
223
 
229
224
  return job_config
230
225
 
231
226
  @classmethod
232
- def _process_job_id(cls, job_config: _cfg.JobConfig):
227
+ def _process_job_id(cls, job_config: _cfg.JobConfig) -> _cfg.JobConfig:
233
228
 
234
229
  job_id = _util.new_object_id(_meta.ObjectType.JOB)
230
+ result_id = _util.new_object_id(_meta.ObjectType.RESULT)
235
231
 
236
232
  cls._log.info(f"Assigning job ID = [{_util.object_key(job_id)}]")
233
+ cls._log.info(f"Assigning result ID = [{_util.object_key(result_id)}]")
237
234
 
238
- translated_config = copy.copy(job_config)
239
- translated_config.jobId = job_id
235
+ job_config.jobId = job_id
236
+ job_config.resultId = result_id
240
237
 
241
- return translated_config
238
+ return job_config
242
239
 
243
240
  @classmethod
244
241
  def _process_job_type(cls, job_def: _meta.JobDefinition):
@@ -346,7 +343,7 @@ class DevModeTranslator:
346
343
 
347
344
  model_id, model_obj = self._generate_model_for_class(model_class)
348
345
  job_detail.model = _util.selector_for(model_id)
349
- job_config = self._add_job_resource(job_config, model_id, model_obj)
346
+ job_config = self._add_job_metadata(job_config, model_id, model_obj)
350
347
 
351
348
  # Otherwise look for models specified as a single string, and take that as the entry point
352
349
  else:
@@ -355,7 +352,7 @@ class DevModeTranslator:
355
352
  if hasattr(job_detail, "model") and isinstance(job_detail.model, str):
356
353
  model_id, model_obj = self._generate_model_for_entry_point(job_detail.model) # noqa
357
354
  job_detail.model = _util.selector_for(model_id)
358
- job_config = self._add_job_resource(job_config, model_id, model_obj)
355
+ job_config = self._add_job_metadata(job_config, model_id, model_obj)
359
356
 
360
357
  elif hasattr(job_detail, "model") and isinstance(job_detail.model, _meta.TagSelector):
361
358
  if job_detail.model.objectType == _meta.ObjectType.OBJECT_TYPE_NOT_SET:
@@ -369,7 +366,7 @@ class DevModeTranslator:
369
366
  if isinstance(model_detail, str):
370
367
  model_id, model_obj = self._generate_model_for_entry_point(model_detail)
371
368
  job_detail.models[model_key] = _util.selector_for(model_id)
372
- job_config = self._add_job_resource(job_config, model_id, model_obj)
369
+ job_config = self._add_job_metadata(job_config, model_id, model_obj)
373
370
 
374
371
  return job_config, job_def
375
372
 
@@ -446,8 +443,8 @@ class DevModeTranslator:
446
443
  job_def.runFlow.flow = _util.selector_for(flow_id)
447
444
 
448
445
  job_config = copy.copy(job_config)
449
- job_config.resources = copy.copy(job_config.resources)
450
- job_config = self._add_job_resource(job_config, flow_id, flow_obj)
446
+ job_config.objects = copy.copy(job_config.objects)
447
+ job_config = self._add_job_metadata(job_config, flow_id, flow_obj)
451
448
 
452
449
  return job_config, job_def
453
450
 
@@ -472,7 +469,7 @@ class DevModeTranslator:
472
469
  for model_name, model_node in model_nodes.items():
473
470
 
474
471
  model_selector = job_def.runFlow.models[model_name]
475
- model_obj = _util.get_job_resource(model_selector, job_config)
472
+ model_obj = _util.get_job_metadata(model_selector, job_config)
476
473
 
477
474
  model_inputs = set(model_obj.model.inputs.keys())
478
475
  model_outputs = set(model_obj.model.outputs.keys())
@@ -540,7 +537,7 @@ class DevModeTranslator:
540
537
  # Generate node param sockets needed by the model
541
538
  if node_name in job.models:
542
539
  model_selector = job.models[node_name]
543
- model_obj = _util.get_job_resource(model_selector, job_config)
540
+ model_obj = _util.get_job_metadata(model_selector, job_config)
544
541
  for param_name in model_obj.model.parameters:
545
542
  add_param_to_flow(node_name, param_name)
546
543
  if param_name not in node.parameters:
@@ -622,7 +619,7 @@ class DevModeTranslator:
622
619
  for target in targets:
623
620
 
624
621
  model_selector = job_def.runFlow.models.get(target.node)
625
- model_obj = _util.get_job_resource(model_selector, job_config)
622
+ model_obj = _util.get_job_metadata(model_selector, job_config)
626
623
  model_param = model_obj.model.parameters.get(target.socket)
627
624
  model_params.append(model_param)
628
625
 
@@ -659,7 +656,7 @@ class DevModeTranslator:
659
656
  for target in targets:
660
657
 
661
658
  model_selector = job_def.runFlow.models.get(target.node)
662
- model_obj = _util.get_job_resource(model_selector, job_config)
659
+ model_obj = _util.get_job_metadata(model_selector, job_config)
663
660
  model_input = model_obj.model.inputs.get(target.socket)
664
661
  model_inputs.append(model_input)
665
662
 
@@ -694,7 +691,7 @@ class DevModeTranslator:
694
691
  for source in sources:
695
692
 
696
693
  model_selector = job_def.runFlow.models.get(source.node)
697
- model_obj = _util.get_job_resource(model_selector, job_config)
694
+ model_obj = _util.get_job_metadata(model_selector, job_config)
698
695
  model_input = model_obj.model.outputs.get(source.socket)
699
696
  model_outputs.append(model_input)
700
697
 
@@ -727,10 +724,10 @@ class DevModeTranslator:
727
724
 
728
725
  if hasattr(job_detail, "model"):
729
726
  model_key = _util.object_key(job_detail.model)
730
- model_or_flow = job_config.resources[model_key].model
727
+ model_or_flow = job_config.objects[model_key].model
731
728
  elif hasattr(job_detail, "flow"):
732
729
  flow_key = _util.object_key(job_detail.flow)
733
- model_or_flow = job_config.resources[flow_key].flow
730
+ model_or_flow = job_config.objects[flow_key].flow
734
731
  else:
735
732
  model_or_flow = None
736
733
 
@@ -784,71 +781,67 @@ class DevModeTranslator:
784
781
  job_detail = self._get_job_detail(job_def)
785
782
 
786
783
  if hasattr(job_detail, "model"):
787
- model_obj = _util.get_job_resource(job_detail.model, job_config)
784
+ model_obj = _util.get_job_metadata(job_detail.model, job_config)
788
785
  required_inputs = model_obj.model.inputs
789
- required_outputs = model_obj.model.outputs
786
+ expected_outputs = model_obj.model.outputs
790
787
 
791
788
  elif hasattr(job_detail, "flow"):
792
- flow_obj = _util.get_job_resource(job_detail.flow, job_config)
789
+ flow_obj = _util.get_job_metadata(job_detail.flow, job_config)
793
790
  required_inputs = flow_obj.flow.inputs
794
- required_outputs = flow_obj.flow.outputs
791
+ expected_outputs = flow_obj.flow.outputs
795
792
 
796
793
  else:
797
794
  return job_config, job_def
798
795
 
799
796
  job_inputs = job_detail.inputs
800
797
  job_outputs = job_detail.outputs
801
- job_resources = job_config.resources
802
-
803
- for input_key, input_value in job_inputs.items():
804
- if not (isinstance(input_value, str) and input_value in job_resources):
805
-
806
- model_input = required_inputs[input_key]
807
-
808
- if model_input.objectType == _meta.ObjectType.DATA:
809
- schema = model_input.schema if model_input and not model_input.dynamic else None
810
- input_id = self._process_data_socket(input_key, input_value, schema, job_resources, new_unique_file=False)
811
- elif model_input.objectType == _meta.ObjectType.FILE:
812
- file_type = model_input.fileType
813
- input_id = self._process_file_socket(input_key, input_value, file_type, job_resources, new_unique_file=False)
814
- else:
815
- raise _ex.EUnexpected()
816
-
817
- job_inputs[input_key] = _util.selector_for(input_id)
798
+ job_prior_outputs = job_detail.priorOutputs
799
+
800
+ for key, schema in required_inputs.items():
801
+ if key not in job_inputs:
802
+ if not schema.optional:
803
+ raise _ex.EJobValidation(f"Missing required input [{key}]")
804
+ continue
805
+ supplied_input = job_inputs.pop(key) if key in job_inputs else None
806
+ input_selector = self._process_socket(key, schema, supplied_input, job_config, is_output=False)
807
+ if input_selector is not None:
808
+ job_inputs[key] = input_selector
809
+
810
+ for key, schema in expected_outputs.items():
811
+ if key not in job_outputs:
812
+ raise _ex.EJobValidation(f"Missing required output [{key}]")
813
+ supplied_output = job_outputs.pop(key)
814
+ output_selector = self._process_socket(key, schema, supplied_output, job_config, is_output=True)
815
+ if output_selector is not None:
816
+ job_prior_outputs[key] = output_selector
818
817
 
819
- for output_key, output_value in job_outputs.items():
820
- if not (isinstance(output_value, str) and output_value in job_resources):
818
+ return job_config, job_def
821
819
 
822
- model_output = required_outputs[output_key]
820
+ def _process_socket(self, key, socket, supplied_value, job_config, is_output) -> _meta.TagSelector:
823
821
 
824
- if model_output.objectType == _meta.ObjectType.DATA:
825
- schema = model_output.schema if model_output and not model_output.dynamic else None
826
- output_id = self._process_data_socket(output_key, output_value, schema, job_resources, new_unique_file=True)
827
- elif model_output.objectType == _meta.ObjectType.FILE:
828
- file_type = model_output.fileType
829
- output_id = self._process_file_socket(output_key, output_value, file_type, job_resources, new_unique_file=True)
830
- else:
831
- raise _ex.EUnexpected()
822
+ if socket.objectType == _meta.ObjectType.DATA:
823
+ schema = socket.schema if socket and not socket.dynamic else None
824
+ return self._process_data_socket(key, supplied_value, schema, job_config, is_output)
832
825
 
833
- job_outputs[output_key] = _util.selector_for(output_id)
826
+ elif socket.objectType == _meta.ObjectType.FILE:
827
+ file_type = socket.fileType
828
+ return self._process_file_socket(key, supplied_value, file_type, job_config, is_output)
834
829
 
835
- return job_config, job_def
830
+ else:
831
+ raise _ex.EUnexpected()
836
832
 
837
833
  def _process_data_socket(
838
834
  self, data_key, data_value, schema: tp.Optional[_meta.SchemaDefinition],
839
- resources: tp.Dict[str, _meta.ObjectDefinition], new_unique_file=False) \
840
- -> _meta.TagHeader:
835
+ job_config: _cfg.JobConfig, is_output: bool)\
836
+ -> _meta.TagSelector:
841
837
 
842
838
  data_id = _util.new_object_id(_meta.ObjectType.DATA)
843
839
  storage_id = _util.new_object_id(_meta.ObjectType.STORAGE)
844
840
 
845
- self._log.info(f"Generating data definition for [{data_key}] with ID = [{_util.object_key(data_id)}]")
846
-
847
841
  if isinstance(data_value, str):
848
842
  storage_path = data_value
849
- storage_key = self._sys_config.storage.defaultBucket
850
- storage_format = self.infer_format(storage_path, self._sys_config.storage, schema)
851
- snap_version = 1
843
+ storage_key = _util.read_property(self._sys_config.properties, _cfg_p.ConfigKeys.STORAGE_DEFAULT_LOCATION)
844
+ storage_format = self.infer_format(storage_path, self._sys_config, schema)
852
845
 
853
846
  elif isinstance(data_value, dict):
854
847
 
@@ -857,58 +850,65 @@ class DevModeTranslator:
857
850
  if not storage_path:
858
851
  raise _ex.EConfigParse(f"Invalid configuration for input [{data_key}] (missing required value 'path'")
859
852
 
860
- storage_key = data_value.get("storageKey") or self._sys_config.storage.defaultBucket
861
- storage_format = data_value.get("format") or self.infer_format(storage_path, self._sys_config.storage, schema)
862
- snap_version = 1
853
+ storage_key = data_value.get("storageKey") or _util.read_property(self._sys_config.properties, _cfg_p.ConfigKeys.STORAGE_DEFAULT_LOCATION)
854
+ storage_format = data_value.get("format") or self.infer_format(storage_path, self._sys_config, schema)
863
855
 
864
856
  else:
865
857
  raise _ex.EConfigParse(f"Invalid configuration for input '{data_key}'")
866
858
 
867
- # For unique outputs, increment the snap number to find a new unique snap
868
- # These are not incarnations, bc likely in dev mode model code and inputs are changing
869
- # Incarnations are for recreation of a dataset using the exact same code path and inputs
859
+ # Scan for existing versions using hte DEVELOPER storage layout
860
+
861
+ self._log.info(f"Looking for {'output' if is_output else 'input'} [{data_key}]...")
870
862
 
871
- if new_unique_file:
872
- storage_path, snap_version = self._new_unique_file(data_key, storage_key, storage_path, snap_version)
863
+ storage_path, version = self._find_latest_version(storage_key, storage_path)
864
+ data_id.objectVersion = version
865
+
866
+ if version > 0:
867
+ self._log.info(f"Found {'output' if is_output else 'input'} [{data_key}] version {version}")
868
+ self._log.info(f"Generating {'prior' if is_output else 'data'} definition for [{data_key}] with ID = [{_util.object_key(data_id)}]")
869
+ elif is_output:
870
+ self._log.info(f"No prior data for output [{data_key}]")
871
+ else:
872
+ # This is allowed for some scenarios, e.g. inside a job group
873
+ self._log.warning(f"No data found for input [{data_key}]")
873
874
 
874
875
  part_key = _meta.PartKey(opaqueKey="part-root", partType=_meta.PartType.PART_ROOT)
875
- delta_index = 1
876
- incarnation_index = 1
876
+ snap_index = version - 1 if version > 0 else 0
877
+ delta_index = 0
878
+ incarnation_index = 0
877
879
 
878
880
  # This is also defined in functions.DynamicDataSpecFunc, maybe centralize?
879
- data_item = f"data/table/{data_id.objectId}/{part_key.opaqueKey}/snap-{snap_version}/delta-{delta_index}"
881
+ data_item = f"data/table/{data_id.objectId}/{part_key.opaqueKey}/snap-{snap_index}/delta-{delta_index}"
880
882
 
881
883
  data_obj = self._generate_data_definition(
882
- part_key, snap_version, delta_index, data_item,
884
+ part_key, snap_index, delta_index, data_item,
883
885
  schema, storage_id)
884
886
 
885
887
  storage_obj = self._generate_storage_definition(
886
888
  storage_id, storage_key, storage_path, storage_format,
887
889
  data_item, incarnation_index)
888
890
 
889
- resources[_util.object_key(data_id)] = data_obj
890
- resources[_util.object_key(storage_id)] = storage_obj
891
+ self._add_job_metadata(job_config, data_id, data_obj)
892
+ self._add_job_metadata(job_config, storage_id, storage_obj)
891
893
 
892
- return data_id
894
+ return _util.selector_for(data_id)
893
895
 
894
896
  def _process_file_socket(
895
897
  self, file_key, file_value, file_type: _meta.FileType,
896
- resources: tp.Dict[str, _meta.ObjectDefinition], new_unique_file=False) \
897
- -> _meta.TagHeader:
898
+ job_config: _cfg.JobConfig, is_output: bool) \
899
+ -> tp.Optional[_meta.TagSelector]:
898
900
 
899
901
  file_id = _util.new_object_id(_meta.ObjectType.FILE)
900
902
  storage_id = _util.new_object_id(_meta.ObjectType.STORAGE)
901
903
 
902
- self._log.info(f"Generating file definition for [{file_key}] with ID = [{_util.object_key(file_id)}]")
903
-
904
904
  if isinstance(file_value, str):
905
905
 
906
- storage_key = self._sys_config.storage.defaultBucket
906
+ storage_key = _util.read_property(self._sys_config.properties, _cfg_p.ConfigKeys.STORAGE_DEFAULT_LOCATION)
907
907
  storage_path = file_value
908
908
 
909
909
  elif isinstance(file_value, dict):
910
910
 
911
- storage_key = file_value.get("storageKey") or self._sys_config.storage.defaultBucket
911
+ storage_key = file_value.get("storageKey") or _util.read_property(self._sys_config.properties, _cfg_p.ConfigKeys.STORAGE_DEFAULT_LOCATION)
912
912
  storage_path = file_value.get("path")
913
913
 
914
914
  if not storage_path:
@@ -917,17 +917,28 @@ class DevModeTranslator:
917
917
  else:
918
918
  raise _ex.EConfigParse(f"Invalid configuration for input '{file_key}'")
919
919
 
920
- storage_format = "application/x-binary"
921
- file_version = 1
920
+ # Scan for existing versions using hte DEVELOPER storage layout
921
+
922
+ self._log.info(f"Looking for {'output' if is_output else 'input'} [{file_key}]...")
922
923
 
923
- if new_unique_file:
924
- storage_path, file_version = self._new_unique_file(file_key, storage_key, storage_path, file_version)
925
- file_size = 0
924
+ storage_path, version = self._find_latest_version(storage_key, storage_path)
925
+ file_id.objectVersion = version
926
+
927
+ if version > 0:
928
+ self._log.info(f"Found {'output' if is_output else 'input'} [{file_key}] version {version}")
929
+ self._log.info(f"Generating {'prior' if is_output else 'file'} definition for [{file_key}] with ID = [{_util.object_key(file_id)}]")
930
+ elif is_output:
931
+ self._log.info(f"No prior data for output [{file_key}]")
926
932
  else:
927
- storage = self._storage_manager.get_file_storage(storage_key)
928
- file_size = storage.size(storage_path)
933
+ # This is allowed for some scenarios, e.g. inside a job group
934
+ self._log.warning(f"No data found for input [{file_key}]")
935
+
936
+ storage = self._storage_manager.get_file_storage(storage_key)
937
+ file_size = storage.size(storage_path) if storage.exists(storage_path) else 0
929
938
 
930
- data_item = f"file/{file_id.objectId}/version-{file_version}"
939
+ storage_format = "application/x-binary"
940
+
941
+ data_item = f"file/{file_id.objectId}/version-{version}"
931
942
  file_name = f"{file_key}.{file_type.extension}"
932
943
 
933
944
  file_obj = self._generate_file_definition(
@@ -936,15 +947,15 @@ class DevModeTranslator:
936
947
 
937
948
  storage_obj = self._generate_storage_definition(
938
949
  storage_id, storage_key, storage_path, storage_format,
939
- data_item, incarnation_index=1)
950
+ data_item, incarnation_index=0)
940
951
 
941
- resources[_util.object_key(file_id)] = file_obj
942
- resources[_util.object_key(storage_id)] = storage_obj
952
+ self._add_job_metadata(job_config, file_id, file_obj)
953
+ self._add_job_metadata(job_config, storage_id, storage_obj)
943
954
 
944
- return file_id
955
+ return _util.selector_for(file_id)
945
956
 
946
957
  @staticmethod
947
- def infer_format(storage_path: str, storage_config: _cfg.StorageConfig, schema: tp.Optional[_meta.SchemaDefinition]):
958
+ def infer_format(storage_path: str, sys_config: _cfg.RuntimeConfig, schema: tp.Optional[_meta.SchemaDefinition]):
948
959
 
949
960
  schema_type = schema.schemaType if schema and schema.schemaType else _meta.SchemaType.TABLE
950
961
 
@@ -958,27 +969,30 @@ class DevModeTranslator:
958
969
  return extension[1:] if extension.startswith(".") else extension
959
970
 
960
971
  else:
961
- return storage_config.defaultFormat
972
+ return _util.read_property(sys_config.properties, _cfg_p.ConfigKeys.STORAGE_DEFAULT_FORMAT, "CSV")
962
973
 
963
- def _new_unique_file(self, socket_name, storage_key, storage_path, version):
974
+ def _find_latest_version(self, storage_key, storage_path):
964
975
 
965
- x_storage = self._storage_manager.get_file_storage(storage_key)
966
- x_orig_path = pathlib.PurePath(storage_path)
967
- x_name = x_orig_path.name
976
+ storage = self._storage_manager.get_file_storage(storage_key)
977
+ orig_path = pathlib.PurePath(storage_path)
978
+ version = 0
968
979
 
969
- if x_storage.exists(str(x_orig_path.parent)):
970
- listing = x_storage.ls(str(x_orig_path.parent))
971
- existing_files = list(map(lambda stat: stat.file_name, listing))
972
- else:
973
- existing_files = []
980
+ if not storage.exists(str(orig_path.parent)):
981
+ return storage_path, version
974
982
 
975
- while x_name in existing_files:
983
+ listing = storage.ls(str(orig_path.parent))
984
+ existing_files = list(map(lambda stat: stat.file_name, listing))
976
985
 
977
- version += 1
978
- x_name = f"{x_orig_path.stem}-{version}{x_orig_path.suffix}"
979
- storage_path = str(x_orig_path.parent.joinpath(x_name))
986
+ next_version = version + 1
987
+ next_name = f"{orig_path.stem}{orig_path.suffix}"
980
988
 
981
- self._log.info(f"Output for [{socket_name}] will be version {version}")
989
+ while next_name in existing_files:
990
+
991
+ storage_path = str(orig_path.parent.joinpath(next_name))
992
+ version = next_version
993
+
994
+ next_version = version + 1
995
+ next_name = f"{orig_path.stem}-{next_version}{orig_path.suffix}"
982
996
 
983
997
  return storage_path, version
984
998
 
@@ -1043,12 +1057,40 @@ class DevModeTranslator:
1043
1057
 
1044
1058
  storage_def = _meta.StorageDefinition()
1045
1059
  storage_def.dataItems[data_item] = storage_item
1060
+ storage_def.layout = _meta.StorageLayout.DEVELOPER_LAYOUT
1046
1061
 
1047
1062
  if storage_format.lower() == "csv":
1048
1063
  storage_def.storageOptions["lenient_csv_parser"] = _types.MetadataCodec.encode_value(True)
1049
1064
 
1050
1065
  return _meta.ObjectDefinition(objectType=_meta.ObjectType.STORAGE, storage=storage_def)
1051
1066
 
1067
+ @classmethod
1068
+ def generate_dev_mode_tags(cls, job_config: _cfg.JobConfig) -> _cfg.JobConfig:
1069
+
1070
+ job_config = copy.copy(job_config)
1071
+
1072
+ for key, object_id in job_config.objectMapping.items():
1073
+ if key not in job_config.tags:
1074
+
1075
+ tag = _meta.Tag(header=object_id)
1076
+
1077
+ timestamp = _types.MetadataCodec.convert_datetime_value(object_id.objectTimestamp.isoDatetime)
1078
+ user_id = _types.MetadataCodec.encode_value("local_user")
1079
+ user_name = _types.MetadataCodec.encode_value("Local User")
1080
+
1081
+ tag.attrs["trac_dev_mode"] = _types.MetadataCodec.encode_value(True)
1082
+
1083
+ tag.attrs["trac_create_time"] = timestamp
1084
+ tag.attrs["trac_create_user_id"] = user_id
1085
+ tag.attrs["trac_create_user_name"] = user_name
1086
+
1087
+ tag.attrs["trac_update_time"] = timestamp
1088
+ tag.attrs["trac_update_user_id"] = user_id
1089
+ tag.attrs["trac_update_user_name"] = user_name
1090
+
1091
+ job_config.tags[key] = tag
1092
+
1093
+ return job_config
1052
1094
 
1053
1095
 
1054
1096
  DevModeTranslator._log = _logging.logger_for_class(DevModeTranslator)