tracdap-runtime 0.6.5__py3-none-any.whl → 0.7.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. tracdap/rt/__init__.py +6 -5
  2. tracdap/rt/_exec/actors.py +6 -5
  3. tracdap/rt/_exec/context.py +278 -110
  4. tracdap/rt/_exec/dev_mode.py +237 -143
  5. tracdap/rt/_exec/engine.py +223 -64
  6. tracdap/rt/_exec/functions.py +31 -6
  7. tracdap/rt/_exec/graph.py +15 -5
  8. tracdap/rt/_exec/graph_builder.py +301 -203
  9. tracdap/rt/_exec/runtime.py +13 -10
  10. tracdap/rt/_exec/server.py +6 -5
  11. tracdap/rt/_impl/__init__.py +6 -5
  12. tracdap/rt/_impl/config_parser.py +17 -9
  13. tracdap/rt/_impl/data.py +284 -172
  14. tracdap/rt/_impl/ext/__init__.py +14 -0
  15. tracdap/rt/_impl/ext/sql.py +117 -0
  16. tracdap/rt/_impl/ext/storage.py +58 -0
  17. tracdap/rt/_impl/grpc/__init__.py +6 -5
  18. tracdap/rt/_impl/grpc/codec.py +6 -5
  19. tracdap/rt/_impl/grpc/tracdap/metadata/job_pb2.py +62 -54
  20. tracdap/rt/_impl/grpc/tracdap/metadata/job_pb2.pyi +37 -2
  21. tracdap/rt/_impl/guard_rails.py +6 -5
  22. tracdap/rt/_impl/models.py +6 -5
  23. tracdap/rt/_impl/repos.py +6 -5
  24. tracdap/rt/_impl/schemas.py +6 -5
  25. tracdap/rt/_impl/shim.py +6 -5
  26. tracdap/rt/_impl/static_api.py +30 -16
  27. tracdap/rt/_impl/storage.py +8 -7
  28. tracdap/rt/_impl/type_system.py +6 -5
  29. tracdap/rt/_impl/util.py +16 -5
  30. tracdap/rt/_impl/validation.py +72 -18
  31. tracdap/rt/_plugins/__init__.py +6 -5
  32. tracdap/rt/_plugins/_helpers.py +6 -5
  33. tracdap/rt/_plugins/config_local.py +6 -5
  34. tracdap/rt/_plugins/format_arrow.py +6 -5
  35. tracdap/rt/_plugins/format_csv.py +6 -5
  36. tracdap/rt/_plugins/format_parquet.py +6 -5
  37. tracdap/rt/_plugins/repo_git.py +6 -5
  38. tracdap/rt/_plugins/repo_local.py +6 -5
  39. tracdap/rt/_plugins/repo_pypi.py +6 -5
  40. tracdap/rt/_plugins/storage_aws.py +6 -5
  41. tracdap/rt/_plugins/storage_azure.py +6 -5
  42. tracdap/rt/_plugins/storage_gcp.py +6 -5
  43. tracdap/rt/_plugins/storage_local.py +6 -5
  44. tracdap/rt/_plugins/storage_sql.py +418 -0
  45. tracdap/rt/_plugins/storage_sql_dialects.py +118 -0
  46. tracdap/rt/_version.py +7 -6
  47. tracdap/rt/api/__init__.py +23 -5
  48. tracdap/rt/api/experimental.py +85 -37
  49. tracdap/rt/api/hook.py +16 -5
  50. tracdap/rt/api/model_api.py +110 -90
  51. tracdap/rt/api/static_api.py +142 -100
  52. tracdap/rt/config/common.py +26 -27
  53. tracdap/rt/config/job.py +5 -6
  54. tracdap/rt/config/platform.py +41 -42
  55. tracdap/rt/config/result.py +5 -6
  56. tracdap/rt/config/runtime.py +6 -7
  57. tracdap/rt/exceptions.py +13 -7
  58. tracdap/rt/ext/__init__.py +6 -5
  59. tracdap/rt/ext/config.py +6 -5
  60. tracdap/rt/ext/embed.py +6 -5
  61. tracdap/rt/ext/plugins.py +6 -5
  62. tracdap/rt/ext/repos.py +6 -5
  63. tracdap/rt/ext/storage.py +6 -5
  64. tracdap/rt/launch/__init__.py +10 -5
  65. tracdap/rt/launch/__main__.py +6 -5
  66. tracdap/rt/launch/cli.py +6 -5
  67. tracdap/rt/launch/launch.py +38 -15
  68. tracdap/rt/metadata/__init__.py +4 -0
  69. tracdap/rt/metadata/common.py +2 -3
  70. tracdap/rt/metadata/custom.py +3 -4
  71. tracdap/rt/metadata/data.py +30 -31
  72. tracdap/rt/metadata/file.py +6 -7
  73. tracdap/rt/metadata/flow.py +22 -23
  74. tracdap/rt/metadata/job.py +89 -45
  75. tracdap/rt/metadata/model.py +26 -27
  76. tracdap/rt/metadata/object.py +11 -12
  77. tracdap/rt/metadata/object_id.py +23 -24
  78. tracdap/rt/metadata/resource.py +0 -1
  79. tracdap/rt/metadata/search.py +15 -16
  80. tracdap/rt/metadata/stoarge.py +22 -23
  81. tracdap/rt/metadata/tag.py +8 -9
  82. tracdap/rt/metadata/tag_update.py +11 -12
  83. tracdap/rt/metadata/type.py +38 -38
  84. {tracdap_runtime-0.6.5.dist-info → tracdap_runtime-0.7.0rc1.dist-info}/LICENSE +1 -1
  85. {tracdap_runtime-0.6.5.dist-info → tracdap_runtime-0.7.0rc1.dist-info}/METADATA +4 -2
  86. tracdap_runtime-0.7.0rc1.dist-info/RECORD +121 -0
  87. {tracdap_runtime-0.6.5.dist-info → tracdap_runtime-0.7.0rc1.dist-info}/WHEEL +1 -1
  88. tracdap_runtime-0.6.5.dist-info/RECORD +0 -116
  89. {tracdap_runtime-0.6.5.dist-info → tracdap_runtime-0.7.0rc1.dist-info}/top_level.txt +0 -0
@@ -1,8 +1,9 @@
1
- # Copyright 2022 Accenture Global Solutions Limited
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
1
+ # Licensed to the Fintech Open Source Foundation (FINOS) under one or
2
+ # more contributor license agreements. See the NOTICE file distributed
3
+ # with this work for additional information regarding copyright ownership.
4
+ # FINOS licenses this file to you under the Apache License, Version 2.0
5
+ # (the "License"); you may not use this file except in compliance with the
6
+ # License. You may obtain a copy of the License at
6
7
  #
7
8
  # http://www.apache.org/licenses/LICENSE-2.0
8
9
  #
@@ -34,7 +35,15 @@ DEV_MODE_JOB_CONFIG = [
34
35
  re.compile(r"job\.\w+\.outputs\.\w+"),
35
36
  re.compile(r"job\.\w+\.models\.\w+"),
36
37
  re.compile(r"job\.\w+\.model"),
37
- re.compile(r"job\.\w+\.flow")]
38
+ re.compile(r"job\.\w+\.flow"),
39
+
40
+ re.compile(r".*\.jobs\.\d+\.\w+\.parameters\.\w+"),
41
+ re.compile(r".*\.jobs\.\d+\.\w+\.inputs\.\w+"),
42
+ re.compile(r".*\.jobs\.\d+\.\w+\.outputs\.\w+"),
43
+ re.compile(r".*\.jobs\.\d+\.\w+\.models\.\w+"),
44
+ re.compile(r".*\.jobs\.\d+\.\w+\.model"),
45
+ re.compile(r".*\.jobs\.\d+\.\w+\.flow")
46
+ ]
38
47
 
39
48
  DEV_MODE_SYS_CONFIG = []
40
49
 
@@ -58,38 +67,6 @@ class DevModeTranslator:
58
67
 
59
68
  return sys_config
60
69
 
61
- @classmethod
62
- def translate_job_config(
63
- cls,
64
- sys_config: _cfg.RuntimeConfig,
65
- job_config: _cfg.JobConfig,
66
- scratch_dir: pathlib.Path,
67
- config_mgr: _cfg_p.ConfigManager,
68
- model_class: tp.Optional[_api.TracModel.__class__]) \
69
- -> _cfg.JobConfig:
70
-
71
- cls._log.info(f"Applying dev mode config translation to job config")
72
-
73
- # Protobuf semantics for a blank jobId should be an object, but objectId will be an empty string
74
- if not job_config.jobId or not job_config.jobId.objectId:
75
- job_config = cls._process_job_id(job_config)
76
-
77
- if job_config.job.jobType is None or job_config.job.jobType == _meta.JobType.JOB_TYPE_NOT_SET:
78
- job_config = cls._process_job_type(job_config)
79
-
80
- # Load and populate any models provided as a Python class or class name
81
- job_config = cls._process_models(sys_config, job_config, scratch_dir, model_class)
82
-
83
- # Fow flows, load external flow definitions then perform auto-wiring and type inference
84
- if job_config.job.jobType == _meta.JobType.RUN_FLOW:
85
- job_config = cls._process_flow_definition(job_config, config_mgr)
86
-
87
- # Apply processing to the parameters, inputs and outputs
88
- job_config = cls._process_parameters(job_config)
89
- job_config = cls._process_inputs_and_outputs(sys_config, job_config)
90
-
91
- return job_config
92
-
93
70
  @classmethod
94
71
  def _add_integrated_repo(cls, sys_config: _cfg.RuntimeConfig) -> _cfg.RuntimeConfig:
95
72
 
@@ -159,6 +136,86 @@ class DevModeTranslator:
159
136
  cls._log.error(msg)
160
137
  raise _ex.EConfigParse(msg)
161
138
 
139
+
140
+ def __init__(self, sys_config: _cfg.RuntimeConfig, config_mgr: _cfg_p.ConfigManager, scratch_dir: pathlib.Path):
141
+ self._sys_config = sys_config
142
+ self._config_mgr = config_mgr
143
+ self._scratch_dir = scratch_dir
144
+ self._model_loader: tp.Optional[_models.ModelLoader] = None
145
+
146
+ def translate_job_config(
147
+ self, job_config: _cfg.JobConfig,
148
+ model_class: tp.Optional[_api.TracModel.__class__] = None) \
149
+ -> _cfg.JobConfig:
150
+
151
+ try:
152
+ self._log.info(f"Applying dev mode config translation to job config")
153
+
154
+ self._model_loader = _models.ModelLoader(self._sys_config, self._scratch_dir)
155
+ self._model_loader.create_scope("DEV_MODE_TRANSLATION")
156
+
157
+ job_config = copy.deepcopy(job_config)
158
+ job_def = job_config.job
159
+
160
+ # Protobuf semantics for a blank jobId should be an object, but objectId will be an empty string
161
+ if not job_config.jobId or not job_config.jobId.objectId:
162
+ job_config = self._process_job_id(job_config)
163
+
164
+ job_config, job_def = self.translate_job_def(job_config, job_def, model_class)
165
+ job_config.job = job_def
166
+
167
+ return job_config
168
+
169
+ finally:
170
+ self._model_loader.destroy_scope("DEV_MODE_TRANSLATION")
171
+ self._model_loader = None
172
+
173
+ def translate_job_def(
174
+ self, job_config: _cfg.JobConfig, job_def: _meta.JobDefinition,
175
+ model_class: tp.Optional[_api.TracModel.__class__] = None) \
176
+ -> tp.Tuple[_cfg.JobConfig, _meta.JobDefinition]:
177
+
178
+ if job_def.jobType is None or job_def.jobType == _meta.JobType.JOB_TYPE_NOT_SET:
179
+ job_def = self._process_job_type(job_def)
180
+
181
+ # Load and populate any models provided as a Python class or class name
182
+ job_config, job_def = self._process_models(job_config, job_def, model_class)
183
+
184
+ # Fow flows, load external flow definitions then perform auto-wiring and type inference
185
+ if job_def.jobType == _meta.JobType.RUN_FLOW:
186
+ job_config, job_def = self._process_flow_definition(job_config, job_def)
187
+
188
+ if job_def.jobType == _meta.JobType.JOB_GROUP:
189
+ job_config, job_def = self.translate_job_group(job_config, job_def)
190
+
191
+ # Apply processing to the parameters, inputs and outputs
192
+ job_config, job_def = self._process_parameters(job_config, job_def)
193
+ job_config, job_def = self._process_inputs_and_outputs(job_config, job_def)
194
+
195
+ return job_config, job_def
196
+
197
+ def translate_job_group(
198
+ self, job_config: _cfg.JobConfig, job_def: _meta.JobDefinition) \
199
+ -> tp.Tuple[_cfg.JobConfig, _meta.JobDefinition]:
200
+
201
+ job_group = job_def.jobGroup
202
+
203
+ if job_group.jobGroupType is None or job_group.jobGroupType == _meta.JobGroupType.JOB_GROUP_TYPE_NOT_SET:
204
+ job_group = self._process_job_group_type(job_group)
205
+
206
+ group_details = self._get_job_group_detail(job_group)
207
+
208
+ if hasattr(group_details, "jobs"):
209
+ child_jobs = []
210
+ for child_def in group_details.jobs:
211
+ job_config, child_def = self.translate_job_def(job_config, child_def)
212
+ child_jobs.append(child_def)
213
+ group_details.jobs = child_jobs
214
+
215
+ job_def.jobGroup = job_group
216
+
217
+ return job_config, job_def
218
+
162
219
  @classmethod
163
220
  def _add_job_resource(
164
221
  cls, job_config: _cfg.JobConfig,
@@ -183,125 +240,153 @@ class DevModeTranslator:
183
240
  return translated_config
184
241
 
185
242
  @classmethod
186
- def _process_job_type(cls, job_config: _cfg.JobConfig):
243
+ def _process_job_type(cls, job_def: _meta.JobDefinition):
187
244
 
188
- if job_config.job.runModel is not None:
245
+ if job_def.runModel is not None:
189
246
  job_type = _meta.JobType.RUN_MODEL
190
247
 
191
- elif job_config.job.runFlow is not None:
248
+ elif job_def.runFlow is not None:
192
249
  job_type = _meta.JobType.RUN_FLOW
193
250
 
194
- elif job_config.job.importModel is not None:
251
+ elif job_def.importModel is not None:
195
252
  job_type = _meta.JobType.IMPORT_MODEL
196
253
 
197
- elif job_config.job.importData is not None:
254
+ elif job_def.importData is not None:
198
255
  job_type = _meta.JobType.IMPORT_DATA
199
256
 
200
- elif job_config.job.exportData is not None:
257
+ elif job_def.exportData is not None:
201
258
  job_type = _meta.JobType.EXPORT_DATA
202
259
 
260
+ elif job_def.jobGroup is not None:
261
+ job_type = _meta.JobType.JOB_GROUP
262
+
203
263
  else:
204
264
  cls._log.error("Could not infer job type")
205
265
  raise _ex.EConfigParse("Could not infer job type")
206
266
 
207
267
  cls._log.info(f"Inferred job type = [{job_type.name}]")
208
268
 
209
- job_def = copy.copy(job_config.job)
269
+ job_def = copy.copy(job_def)
210
270
  job_def.jobType = job_type
211
271
 
212
- job_config = copy.copy(job_config)
213
- job_config.job = job_def
272
+ return job_def
214
273
 
215
- return job_config
274
+ @classmethod
275
+ def _process_job_group_type(cls, job_group: _meta.JobGroup) -> _meta.JobGroup:
276
+
277
+ if job_group.sequential is not None:
278
+ job_group_type = _meta.JobGroupType.SEQUENTIAL_JOB_GROUP
279
+
280
+ elif job_group.parallel is not None:
281
+ job_group_type = _meta.JobGroupType.PARALLEL_JOB_GROUP
282
+
283
+ else:
284
+ cls._log.error("Could not infer job group type")
285
+ raise _ex.EConfigParse("Could not infer job group type")
286
+
287
+ cls._log.info(f"Inferred job group type = [{job_group_type.name}]")
288
+
289
+ job_group = copy.copy(job_group)
290
+ job_group.jobGroupType = job_group_type
291
+
292
+ return job_group
216
293
 
217
294
  @classmethod
218
- def _get_job_detail(cls, job_config: _cfg.JobConfig):
295
+ def _get_job_detail(cls, job_def: _meta.JobDefinition):
296
+
297
+ if job_def.jobType == _meta.JobType.RUN_MODEL:
298
+ return job_def.runModel
219
299
 
220
- if job_config.job.jobType == _meta.JobType.RUN_MODEL:
221
- return job_config.job.runModel
300
+ if job_def.jobType == _meta.JobType.RUN_FLOW:
301
+ return job_def.runFlow
222
302
 
223
- if job_config.job.jobType == _meta.JobType.RUN_FLOW:
224
- return job_config.job.runFlow
303
+ if job_def.jobType == _meta.JobType.IMPORT_MODEL:
304
+ return job_def.importModel
225
305
 
226
- if job_config.job.jobType == _meta.JobType.IMPORT_MODEL:
227
- return job_config.job.importModel
306
+ if job_def.jobType == _meta.JobType.IMPORT_DATA:
307
+ return job_def.importData
228
308
 
229
- if job_config.job.jobType == _meta.JobType.IMPORT_DATA:
230
- return job_config.job.importData
309
+ if job_def.jobType == _meta.JobType.EXPORT_DATA:
310
+ return job_def.exportData
231
311
 
232
- if job_config.job.jobType == _meta.JobType.EXPORT_DATA:
233
- return job_config.job.exportData
312
+ if job_def.jobType == _meta.JobType.JOB_GROUP:
313
+ return job_def.jobGroup
234
314
 
235
- raise _ex.EConfigParse(f"Could not get job details for job type [{job_config.job.jobType}]")
315
+ raise _ex.EConfigParse(f"Could not get job details for job type [{job_def.jobType}]")
236
316
 
237
317
  @classmethod
318
+ def _get_job_group_detail(cls, job_group: _meta.JobGroup):
319
+
320
+ if job_group.jobGroupType == _meta.JobGroupType.SEQUENTIAL_JOB_GROUP:
321
+ return job_group.sequential
322
+
323
+ if job_group.jobGroupType == _meta.JobGroupType.PARALLEL_JOB_GROUP:
324
+ return job_group.parallel
325
+
326
+ raise _ex.EConfigParse(f"Could not get job group details for group type [{job_group.jobGroupType}]")
327
+
238
328
  def _process_models(
239
- cls,
240
- sys_config: _cfg.RuntimeConfig,
241
- job_config: _cfg.JobConfig,
242
- scratch_dir: pathlib.Path,
329
+ self, job_config: _cfg.JobConfig, job_def: _meta.JobDefinition,
243
330
  model_class: tp.Optional[_api.TracModel.__class__]) \
244
- -> _cfg.JobConfig:
245
-
246
- model_loader = _models.ModelLoader(sys_config, scratch_dir)
247
- model_loader.create_scope("DEV_MODE_TRANSLATION")
331
+ -> tp.Tuple[_cfg.JobConfig, _meta.JobDefinition]:
248
332
 
249
333
  # This processing works on the assumption that job details follow a convention for addressing models
250
334
  # Jobs requiring a single model have a field called "model"
251
335
  # Jobs requiring multiple models have a field called "models@, which is a dict
252
336
 
253
- job_detail = cls._get_job_detail(job_config)
337
+ job_detail = self._get_job_detail(job_def)
254
338
 
255
339
  # If a model class is supplied in code, use that to generate the model def
256
340
  if model_class is not None:
257
341
 
258
342
  # Passing a model class via launch_model() is only supported for job types with a single model
259
343
  if not hasattr(job_detail, "model"):
260
- raise _ex.EJobValidation(f"Job type [{job_config.job.jobType}] cannot be launched using launch_model()")
344
+ raise _ex.EJobValidation(f"Job type [{job_def.jobType}] cannot be launched using launch_model()")
261
345
 
262
- model_id, model_obj = cls._generate_model_for_class(model_loader, model_class)
346
+ model_id, model_obj = self._generate_model_for_class(model_class)
263
347
  job_detail.model = _util.selector_for(model_id)
264
- job_config = cls._add_job_resource(job_config, model_id, model_obj)
348
+ job_config = self._add_job_resource(job_config, model_id, model_obj)
265
349
 
266
350
  # Otherwise look for models specified as a single string, and take that as the entry point
267
351
  else:
268
352
 
269
353
  # Jobs with a single model
270
354
  if hasattr(job_detail, "model") and isinstance(job_detail.model, str):
271
- model_id, model_obj = cls._generate_model_for_entry_point(model_loader, job_detail.model) # noqa
355
+ model_id, model_obj = self._generate_model_for_entry_point(job_detail.model) # noqa
272
356
  job_detail.model = _util.selector_for(model_id)
273
- job_config = cls._add_job_resource(job_config, model_id, model_obj)
357
+ job_config = self._add_job_resource(job_config, model_id, model_obj)
274
358
 
275
- # Jobs with multiple modlels
359
+ elif hasattr(job_detail, "model") and isinstance(job_detail.model, _meta.TagSelector):
360
+ if job_detail.model.objectType == _meta.ObjectType.OBJECT_TYPE_NOT_SET:
361
+ error = f"Missing required property [model] for job type [{job_def.jobType.name}]"
362
+ self._log.error(error)
363
+ raise _ex.EJobValidation(error)
364
+
365
+ # Jobs with multiple models
276
366
  elif hasattr(job_detail, "models") and isinstance(job_detail.models, dict):
277
367
  for model_key, model_detail in job_detail.models.items():
278
368
  if isinstance(model_detail, str):
279
- model_id, model_obj = cls._generate_model_for_entry_point(model_loader, model_detail)
369
+ model_id, model_obj = self._generate_model_for_entry_point(model_detail)
280
370
  job_detail.models[model_key] = _util.selector_for(model_id)
281
- job_config = cls._add_job_resource(job_config, model_id, model_obj)
282
-
283
- model_loader.destroy_scope("DEV_MODE_TRANSLATION")
371
+ job_config = self._add_job_resource(job_config, model_id, model_obj)
284
372
 
285
- return job_config
373
+ return job_config, job_def
286
374
 
287
- @classmethod
288
375
  def _generate_model_for_class(
289
- cls, model_loader: _models.ModelLoader, model_class: _api.TracModel.__class__) \
376
+ self, model_class: _api.TracModel.__class__) \
290
377
  -> (_meta.TagHeader, _meta.ObjectDefinition):
291
378
 
292
379
  model_entry_point = f"{model_class.__module__}.{model_class.__name__}"
380
+ return self._generate_model_for_entry_point(model_entry_point)
293
381
 
294
- return cls._generate_model_for_entry_point(model_loader, model_entry_point)
295
-
296
- @classmethod
297
382
  def _generate_model_for_entry_point(
298
- cls, model_loader: _models.ModelLoader, model_entry_point: str) \
383
+ self, model_entry_point: str) \
299
384
  -> (_meta.TagHeader, _meta.ObjectDefinition):
300
385
 
301
386
  model_id = _util.new_object_id(_meta.ObjectType.MODEL)
302
387
  model_key = _util.object_key(model_id)
303
388
 
304
- cls._log.info(f"Generating model definition for [{model_entry_point}] with ID = [{model_key}]")
389
+ self._log.info(f"Generating model definition for [{model_entry_point}] with ID = [{model_key}]")
305
390
 
306
391
  skeleton_modeL_def = _meta.ModelDefinition( # noqa
307
392
  language="python",
@@ -312,8 +397,8 @@ class DevModeTranslator:
312
397
  inputs={},
313
398
  outputs={})
314
399
 
315
- model_class = model_loader.load_model_class("DEV_MODE_TRANSLATION", skeleton_modeL_def)
316
- model_def = model_loader.scan_model(skeleton_modeL_def, model_class)
400
+ model_class = self._model_loader.load_model_class("DEV_MODE_TRANSLATION", skeleton_modeL_def)
401
+ model_def = self._model_loader.scan_model(skeleton_modeL_def, model_class)
317
402
 
318
403
  model_object = _meta.ObjectDefinition(
319
404
  objectType=_meta.ObjectType.MODEL,
@@ -321,56 +406,57 @@ class DevModeTranslator:
321
406
 
322
407
  return model_id, model_object
323
408
 
324
- @classmethod
325
- def _process_flow_definition(cls, job_config: _cfg.JobConfig, config_mgr: _cfg_p.ConfigManager) -> _cfg.JobConfig:
409
+ def _process_flow_definition(
410
+ self, job_config: _cfg.JobConfig, job_def: _meta.JobDefinition) \
411
+ -> tp.Tuple[_cfg.JobConfig, _meta.JobDefinition]:
326
412
 
327
- flow_details = job_config.job.runFlow.flow
413
+ flow_details = job_def.runFlow.flow
328
414
 
329
415
  # Do not apply translation if flow is specified as an object ID / selector (assume full config is supplied)
330
416
  if isinstance(flow_details, _meta.TagHeader) or isinstance(flow_details, _meta.TagSelector):
331
- return job_config
417
+ return job_config, job_def
332
418
 
333
419
  # Otherwise, flow is specified as the path to dev-mode flow definition
334
420
  if not isinstance(flow_details, str):
335
421
  err = f"Invalid config value for [job.runFlow.flow]: Expected path or tag selector, got [{flow_details}])"
336
- cls._log.error(err)
422
+ self._log.error(err)
337
423
  raise _ex.EConfigParse(err)
338
424
 
339
425
  flow_id = _util.new_object_id(_meta.ObjectType.FLOW)
340
426
  flow_key = _util.object_key(flow_id)
341
427
 
342
- cls._log.info(f"Generating flow definition from [{flow_details}] with ID = [{flow_key}]")
428
+ self._log.info(f"Generating flow definition from [{flow_details}] with ID = [{flow_key}]")
343
429
 
344
- flow_def = config_mgr.load_config_object(flow_details, _meta.FlowDefinition)
430
+ flow_def = self._config_mgr.load_config_object(flow_details, _meta.FlowDefinition)
345
431
 
346
432
  # Validate models against the flow (this could move to _impl.validation and check prod jobs as well)
347
- cls._check_models_for_flow(flow_def, job_config)
433
+ self._check_models_for_flow(flow_def, job_def, job_config)
348
434
 
349
435
  # Auto-wiring and inference only applied to externally loaded flows for now
350
- flow_def = cls._autowire_flow(flow_def, job_config)
351
- flow_def = cls._apply_type_inference(flow_def, job_config)
436
+ flow_def = self._autowire_flow(flow_def, job_def, job_config)
437
+ flow_def = self._apply_type_inference(flow_def, job_def, job_config)
352
438
 
353
439
  flow_obj = _meta.ObjectDefinition(
354
440
  objectType=_meta.ObjectType.FLOW,
355
441
  flow=flow_def)
356
442
 
443
+ job_def = copy.copy(job_def)
444
+ job_def.runFlow = copy.copy(job_def.runFlow)
445
+ job_def.runFlow.flow = _util.selector_for(flow_id)
446
+
357
447
  job_config = copy.copy(job_config)
358
- job_config.job = copy.copy(job_config.job)
359
- job_config.job.runFlow = copy.copy(job_config.job.runFlow)
360
448
  job_config.resources = copy.copy(job_config.resources)
449
+ job_config = self._add_job_resource(job_config, flow_id, flow_obj)
361
450
 
362
- job_config = cls._add_job_resource(job_config, flow_id, flow_obj)
363
- job_config.job.runFlow.flow = _util.selector_for(flow_id)
364
-
365
- return job_config
451
+ return job_config, job_def
366
452
 
367
453
  @classmethod
368
- def _check_models_for_flow(cls, flow: _meta.FlowDefinition, job_config: _cfg.JobConfig):
454
+ def _check_models_for_flow(cls, flow: _meta.FlowDefinition, job_def: _meta.JobDefinition, job_config: _cfg.JobConfig):
369
455
 
370
456
  model_nodes = dict(filter(lambda n: n[1].nodeType == _meta.FlowNodeType.MODEL_NODE, flow.nodes.items()))
371
457
 
372
- missing_models = list(filter(lambda m: m not in job_config.job.runFlow.models, model_nodes.keys()))
373
- extra_models = list(filter(lambda m: m not in model_nodes, job_config.job.runFlow.models.keys()))
458
+ missing_models = list(filter(lambda m: m not in job_def.runFlow.models, model_nodes.keys()))
459
+ extra_models = list(filter(lambda m: m not in model_nodes, job_def.runFlow.models.keys()))
374
460
 
375
461
  if any(missing_models):
376
462
  error = f"Missing models in job definition: {', '.join(missing_models)}"
@@ -384,7 +470,7 @@ class DevModeTranslator:
384
470
 
385
471
  for model_name, model_node in model_nodes.items():
386
472
 
387
- model_selector = job_config.job.runFlow.models[model_name]
473
+ model_selector = job_def.runFlow.models[model_name]
388
474
  model_obj = _util.get_job_resource(model_selector, job_config)
389
475
 
390
476
  model_inputs = set(model_obj.model.inputs.keys())
@@ -396,9 +482,9 @@ class DevModeTranslator:
396
482
  raise _ex.EJobValidation(error)
397
483
 
398
484
  @classmethod
399
- def _autowire_flow(cls, flow: _meta.FlowDefinition, job_config: _cfg.JobConfig):
485
+ def _autowire_flow(cls, flow: _meta.FlowDefinition, job_def: _meta.JobDefinition, job_config: _cfg.JobConfig):
400
486
 
401
- job = job_config.job.runFlow
487
+ job = job_def.runFlow
402
488
  nodes = copy.copy(flow.nodes)
403
489
  edges: tp.Dict[str, _meta.FlowEdge] = dict()
404
490
 
@@ -485,7 +571,10 @@ class DevModeTranslator:
485
571
  return autowired_flow
486
572
 
487
573
  @classmethod
488
- def _apply_type_inference(cls, flow: _meta.FlowDefinition, job_config: _cfg.JobConfig) -> _meta.FlowDefinition:
574
+ def _apply_type_inference(
575
+ cls, flow: _meta.FlowDefinition,
576
+ job_def: _meta.JobDefinition, job_config: _cfg.JobConfig) \
577
+ -> _meta.FlowDefinition:
489
578
 
490
579
  updated_flow = copy.copy(flow)
491
580
  updated_flow.parameters = copy.copy(flow.parameters)
@@ -506,17 +595,17 @@ class DevModeTranslator:
506
595
 
507
596
  if node.nodeType == _meta.FlowNodeType.PARAMETER_NODE and node_name not in flow.parameters:
508
597
  targets = edges_by_source.get(node_name) or []
509
- model_parameter = cls._infer_parameter(node_name, targets, job_config)
598
+ model_parameter = cls._infer_parameter(node_name, targets, job_def, job_config)
510
599
  updated_flow.parameters[node_name] = model_parameter
511
600
 
512
601
  if node.nodeType == _meta.FlowNodeType.INPUT_NODE and node_name not in flow.inputs:
513
602
  targets = edges_by_source.get(node_name) or []
514
- model_input = cls._infer_input_schema(node_name, targets, job_config)
603
+ model_input = cls._infer_input_schema(node_name, targets, job_def, job_config)
515
604
  updated_flow.inputs[node_name] = model_input
516
605
 
517
606
  if node.nodeType == _meta.FlowNodeType.OUTPUT_NODE and node_name not in flow.outputs:
518
607
  sources = edges_by_target.get(node_name) or []
519
- model_output = cls._infer_output_schema(node_name, sources, job_config)
608
+ model_output = cls._infer_output_schema(node_name, sources, job_def, job_config)
520
609
  updated_flow.outputs[node_name] = model_output
521
610
 
522
611
  return updated_flow
@@ -524,13 +613,14 @@ class DevModeTranslator:
524
613
  @classmethod
525
614
  def _infer_parameter(
526
615
  cls, param_name: str, targets: tp.List[_meta.FlowSocket],
527
- job_config: _cfg.JobConfig) -> _meta.ModelParameter:
616
+ job_def: _meta.JobDefinition, job_config: _cfg.JobConfig) \
617
+ -> _meta.ModelParameter:
528
618
 
529
619
  model_params = []
530
620
 
531
621
  for target in targets:
532
622
 
533
- model_selector = job_config.job.runFlow.models.get(target.node)
623
+ model_selector = job_def.runFlow.models.get(target.node)
534
624
  model_obj = _util.get_job_resource(model_selector, job_config)
535
625
  model_param = model_obj.model.parameters.get(target.socket)
536
626
  model_params.append(model_param)
@@ -560,13 +650,14 @@ class DevModeTranslator:
560
650
  @classmethod
561
651
  def _infer_input_schema(
562
652
  cls, input_name: str, targets: tp.List[_meta.FlowSocket],
563
- job_config: _cfg.JobConfig) -> _meta.ModelInputSchema:
653
+ job_def: _meta.JobDefinition, job_config: _cfg.JobConfig) \
654
+ -> _meta.ModelInputSchema:
564
655
 
565
656
  model_inputs = []
566
657
 
567
658
  for target in targets:
568
659
 
569
- model_selector = job_config.job.runFlow.models.get(target.node)
660
+ model_selector = job_def.runFlow.models.get(target.node)
570
661
  model_obj = _util.get_job_resource(model_selector, job_config)
571
662
  model_input = model_obj.model.inputs.get(target.socket)
572
663
  model_inputs.append(model_input)
@@ -594,13 +685,14 @@ class DevModeTranslator:
594
685
  @classmethod
595
686
  def _infer_output_schema(
596
687
  cls, output_name: str, sources: tp.List[_meta.FlowSocket],
597
- job_config: _cfg.JobConfig) -> _meta.ModelOutputSchema:
688
+ job_def: _meta.JobDefinition, job_config: _cfg.JobConfig) \
689
+ -> _meta.ModelOutputSchema:
598
690
 
599
691
  model_outputs = []
600
692
 
601
693
  for source in sources:
602
694
 
603
- model_selector = job_config.job.runFlow.models.get(source.node)
695
+ model_selector = job_def.runFlow.models.get(source.node)
604
696
  model_obj = _util.get_job_resource(model_selector, job_config)
605
697
  model_input = model_obj.model.inputs.get(source.socket)
606
698
  model_outputs.append(model_input)
@@ -624,11 +716,13 @@ class DevModeTranslator:
624
716
  return f"{socket.node}.{socket.socket}" if socket.socket else socket.node
625
717
 
626
718
  @classmethod
627
- def _process_parameters(cls, job_config: _cfg.JobConfig) -> _cfg.JobConfig:
719
+ def _process_parameters(
720
+ cls, job_config: _cfg.JobConfig, job_def: _meta.JobDefinition) \
721
+ -> tp.Tuple[_cfg.JobConfig, _meta.JobDefinition]:
628
722
 
629
723
  # This relies on convention for naming properties across similar job types
630
724
 
631
- job_detail = cls._get_job_detail(job_config)
725
+ job_detail = cls._get_job_detail(job_def)
632
726
 
633
727
  if hasattr(job_detail, "model"):
634
728
  model_key = _util.object_key(job_detail.model)
@@ -646,7 +740,7 @@ class DevModeTranslator:
646
740
 
647
741
  job_detail.parameters = cls._process_parameters_dict(param_specs, raw_values)
648
742
 
649
- return job_config
743
+ return job_config, job_def
650
744
 
651
745
  @classmethod
652
746
  def _process_parameters_dict(
@@ -677,10 +771,11 @@ class DevModeTranslator:
677
771
 
678
772
  return encoded_values
679
773
 
680
- @classmethod
681
- def _process_inputs_and_outputs(cls, sys_config: _cfg.RuntimeConfig, job_config: _cfg.JobConfig) -> _cfg.JobConfig:
774
+ def _process_inputs_and_outputs(
775
+ self, job_config: _cfg.JobConfig, job_def: _meta.JobDefinition) \
776
+ -> tp.Tuple[_cfg.JobConfig, _meta.JobDefinition]:
682
777
 
683
- job_detail = cls._get_job_detail(job_config)
778
+ job_detail = self._get_job_detail(job_def)
684
779
 
685
780
  if hasattr(job_detail, "model"):
686
781
  model_obj = _util.get_job_resource(job_detail.model, job_config)
@@ -693,7 +788,7 @@ class DevModeTranslator:
693
788
  required_outputs = flow_obj.flow.outputs
694
789
 
695
790
  else:
696
- return job_config
791
+ return job_config, job_def
697
792
 
698
793
  job_inputs = job_detail.inputs
699
794
  job_outputs = job_detail.outputs
@@ -705,8 +800,8 @@ class DevModeTranslator:
705
800
  model_input = required_inputs[input_key]
706
801
  input_schema = model_input.schema if model_input and not model_input.dynamic else None
707
802
 
708
- input_id = cls._process_input_or_output(
709
- sys_config, input_key, input_value, job_resources,
803
+ input_id = self._process_input_or_output(
804
+ input_key, input_value, job_resources,
710
805
  new_unique_file=False, schema=input_schema)
711
806
 
712
807
  job_inputs[input_key] = _util.selector_for(input_id)
@@ -717,17 +812,16 @@ class DevModeTranslator:
717
812
  model_output= required_outputs[output_key]
718
813
  output_schema = model_output.schema if model_output and not model_output.dynamic else None
719
814
 
720
- output_id = cls._process_input_or_output(
721
- sys_config, output_key, output_value, job_resources,
815
+ output_id = self._process_input_or_output(
816
+ output_key, output_value, job_resources,
722
817
  new_unique_file=True, schema=output_schema)
723
818
 
724
819
  job_outputs[output_key] = _util.selector_for(output_id)
725
820
 
726
- return job_config
821
+ return job_config, job_def
727
822
 
728
- @classmethod
729
823
  def _process_input_or_output(
730
- cls, sys_config, data_key, data_value,
824
+ self, data_key, data_value,
731
825
  resources: tp.Dict[str, _meta.ObjectDefinition],
732
826
  new_unique_file=False,
733
827
  schema: tp.Optional[_meta.SchemaDefinition] = None) \
@@ -738,8 +832,8 @@ class DevModeTranslator:
738
832
 
739
833
  if isinstance(data_value, str):
740
834
  storage_path = data_value
741
- storage_key = sys_config.storage.defaultBucket
742
- storage_format = cls.infer_format(storage_path, sys_config.storage)
835
+ storage_key = self._sys_config.storage.defaultBucket
836
+ storage_format = self.infer_format(storage_path, self._sys_config.storage)
743
837
  snap_version = 1
744
838
 
745
839
  elif isinstance(data_value, dict):
@@ -749,14 +843,14 @@ class DevModeTranslator:
749
843
  if not storage_path:
750
844
  raise _ex.EConfigParse(f"Invalid configuration for input [{data_key}] (missing required value 'path'")
751
845
 
752
- storage_key = data_value.get("storageKey") or sys_config.storage.defaultBucket
753
- storage_format = data_value.get("format") or cls.infer_format(storage_path, sys_config.storage)
846
+ storage_key = data_value.get("storageKey") or self._sys_config.storage.defaultBucket
847
+ storage_format = data_value.get("format") or self.infer_format(storage_path, self._sys_config.storage)
754
848
  snap_version = 1
755
849
 
756
850
  else:
757
851
  raise _ex.EConfigParse(f"Invalid configuration for input '{data_key}'")
758
852
 
759
- cls._log.info(f"Generating data definition for [{data_key}] with ID = [{_util.object_key(data_id)}]")
853
+ self._log.info(f"Generating data definition for [{data_key}] with ID = [{_util.object_key(data_id)}]")
760
854
 
761
855
  # For unique outputs, increment the snap number to find a new unique snap
762
856
  # These are not incarnations, bc likely in dev mode model code and inputs are changing
@@ -764,7 +858,7 @@ class DevModeTranslator:
764
858
 
765
859
  if new_unique_file:
766
860
 
767
- x_storage_mgr = _storage.StorageManager(sys_config)
861
+ x_storage_mgr = _storage.StorageManager(self._sys_config)
768
862
  x_storage = x_storage_mgr.get_file_storage(storage_key)
769
863
  x_orig_path = pathlib.PurePath(storage_path)
770
864
  x_name = x_orig_path.name
@@ -781,9 +875,9 @@ class DevModeTranslator:
781
875
  x_name = f"{x_orig_path.stem}-{snap_version}"
782
876
  storage_path = str(x_orig_path.parent.joinpath(x_name))
783
877
 
784
- cls._log.info(f"Output for [{data_key}] will be snap version {snap_version}")
878
+ self._log.info(f"Output for [{data_key}] will be snap version {snap_version}")
785
879
 
786
- data_obj, storage_obj = cls._generate_input_definition(
880
+ data_obj, storage_obj = self._generate_input_definition(
787
881
  data_id, storage_id, storage_key, storage_path, storage_format,
788
882
  snap_index=snap_version, delta_index=1, incarnation_index=1,
789
883
  schema=schema)