tracdap-runtime 0.6.4__py3-none-any.whl → 0.6.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. tracdap/rt/_exec/context.py +556 -36
  2. tracdap/rt/_exec/dev_mode.py +320 -198
  3. tracdap/rt/_exec/engine.py +331 -62
  4. tracdap/rt/_exec/functions.py +151 -22
  5. tracdap/rt/_exec/graph.py +47 -13
  6. tracdap/rt/_exec/graph_builder.py +383 -175
  7. tracdap/rt/_exec/runtime.py +7 -5
  8. tracdap/rt/_impl/config_parser.py +11 -4
  9. tracdap/rt/_impl/data.py +329 -152
  10. tracdap/rt/_impl/ext/__init__.py +13 -0
  11. tracdap/rt/_impl/ext/sql.py +116 -0
  12. tracdap/rt/_impl/ext/storage.py +57 -0
  13. tracdap/rt/_impl/grpc/tracdap/metadata/job_pb2.py +82 -30
  14. tracdap/rt/_impl/grpc/tracdap/metadata/job_pb2.pyi +155 -2
  15. tracdap/rt/_impl/grpc/tracdap/metadata/model_pb2.py +12 -10
  16. tracdap/rt/_impl/grpc/tracdap/metadata/model_pb2.pyi +14 -2
  17. tracdap/rt/_impl/grpc/tracdap/metadata/resource_pb2.py +29 -0
  18. tracdap/rt/_impl/grpc/tracdap/metadata/resource_pb2.pyi +16 -0
  19. tracdap/rt/_impl/models.py +8 -0
  20. tracdap/rt/_impl/static_api.py +29 -0
  21. tracdap/rt/_impl/storage.py +39 -27
  22. tracdap/rt/_impl/util.py +10 -0
  23. tracdap/rt/_impl/validation.py +140 -18
  24. tracdap/rt/_plugins/repo_git.py +1 -1
  25. tracdap/rt/_plugins/storage_sql.py +417 -0
  26. tracdap/rt/_plugins/storage_sql_dialects.py +117 -0
  27. tracdap/rt/_version.py +1 -1
  28. tracdap/rt/api/experimental.py +267 -0
  29. tracdap/rt/api/hook.py +14 -0
  30. tracdap/rt/api/model_api.py +48 -6
  31. tracdap/rt/config/__init__.py +2 -2
  32. tracdap/rt/config/common.py +6 -0
  33. tracdap/rt/metadata/__init__.py +29 -20
  34. tracdap/rt/metadata/job.py +99 -0
  35. tracdap/rt/metadata/model.py +18 -0
  36. tracdap/rt/metadata/resource.py +24 -0
  37. {tracdap_runtime-0.6.4.dist-info → tracdap_runtime-0.6.6.dist-info}/METADATA +5 -1
  38. {tracdap_runtime-0.6.4.dist-info → tracdap_runtime-0.6.6.dist-info}/RECORD +41 -32
  39. {tracdap_runtime-0.6.4.dist-info → tracdap_runtime-0.6.6.dist-info}/WHEEL +1 -1
  40. {tracdap_runtime-0.6.4.dist-info → tracdap_runtime-0.6.6.dist-info}/LICENSE +0 -0
  41. {tracdap_runtime-0.6.4.dist-info → tracdap_runtime-0.6.6.dist-info}/top_level.txt +0 -0
@@ -12,8 +12,6 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from __future__ import annotations
16
-
17
15
  import re
18
16
  import typing as tp
19
17
  import copy
@@ -31,12 +29,20 @@ import tracdap.rt._impl.util as _util # noqa
31
29
 
32
30
 
33
31
  DEV_MODE_JOB_CONFIG = [
34
- re.compile(r"job\.run(Model|Flow)\.parameters\.\w+"),
35
- re.compile(r"job\.run(Model|Flow)\.inputs\.\w+"),
36
- re.compile(r"job\.run(Model|Flow)\.outputs\.\w+"),
37
- re.compile(r"job\.runModel\.model"),
38
- re.compile(r"job\.runFlow\.flow"),
39
- re.compile(r"job\.runFlow\.models\.\w+")]
32
+ re.compile(r"job\.\w+\.parameters\.\w+"),
33
+ re.compile(r"job\.\w+\.inputs\.\w+"),
34
+ re.compile(r"job\.\w+\.outputs\.\w+"),
35
+ re.compile(r"job\.\w+\.models\.\w+"),
36
+ re.compile(r"job\.\w+\.model"),
37
+ re.compile(r"job\.\w+\.flow"),
38
+
39
+ re.compile(r".*\.jobs\.\d+\.\w+\.parameters\.\w+"),
40
+ re.compile(r".*\.jobs\.\d+\.\w+\.inputs\.\w+"),
41
+ re.compile(r".*\.jobs\.\d+\.\w+\.outputs\.\w+"),
42
+ re.compile(r".*\.jobs\.\d+\.\w+\.models\.\w+"),
43
+ re.compile(r".*\.jobs\.\d+\.\w+\.model"),
44
+ re.compile(r".*\.jobs\.\d+\.\w+\.flow")
45
+ ]
40
46
 
41
47
  DEV_MODE_SYS_CONFIG = []
42
48
 
@@ -56,43 +62,10 @@ class DevModeTranslator:
56
62
  sys_config.storage = _cfg.StorageConfig()
57
63
 
58
64
  sys_config = cls._add_integrated_repo(sys_config)
59
- sys_config = cls._resolve_relative_storage_root(sys_config, config_mgr)
65
+ sys_config = cls._process_storage(sys_config, config_mgr)
60
66
 
61
67
  return sys_config
62
68
 
63
- @classmethod
64
- def translate_job_config(
65
- cls,
66
- sys_config: _cfg.RuntimeConfig,
67
- job_config: _cfg.JobConfig,
68
- scratch_dir: pathlib.Path,
69
- config_mgr: _cfg_p.ConfigManager,
70
- model_class: tp.Optional[_api.TracModel.__class__]) \
71
- -> _cfg.JobConfig:
72
-
73
- cls._log.info(f"Applying dev mode config translation to job config")
74
-
75
- if not job_config.jobId:
76
- job_config = cls._process_job_id(job_config)
77
-
78
- if job_config.job.jobType is None or job_config.job.jobType == _meta.JobType.JOB_TYPE_NOT_SET:
79
- job_config = cls._process_job_type(job_config)
80
-
81
- # Load and populate any models provided as a Python class or class name
82
- if job_config.job.jobType in [_meta.JobType.RUN_MODEL, _meta.JobType.RUN_FLOW]:
83
- job_config = cls._process_models(sys_config, job_config, scratch_dir, model_class)
84
-
85
- # Fow flows, load external flow definitions then perform auto-wiring and type inference
86
- if job_config.job.jobType == _meta.JobType.RUN_FLOW:
87
- job_config = cls._process_flow_definition(job_config, config_mgr)
88
-
89
- # For run (model|flow) jobs, apply processing to the parameters, inputs and outputs
90
- if job_config.job.jobType in [_meta.JobType.RUN_MODEL, _meta.JobType.RUN_FLOW]:
91
- job_config = cls._process_parameters(job_config)
92
- job_config = cls._process_inputs_and_outputs(sys_config, job_config)
93
-
94
- return job_config
95
-
96
69
  @classmethod
97
70
  def _add_integrated_repo(cls, sys_config: _cfg.RuntimeConfig) -> _cfg.RuntimeConfig:
98
71
 
@@ -107,51 +80,140 @@ class DevModeTranslator:
107
80
  return sys_config
108
81
 
109
82
  @classmethod
110
- def _resolve_relative_storage_root(
83
+ def _process_storage(
111
84
  cls, sys_config: _cfg.RuntimeConfig,
112
85
  config_mgr: _cfg_p.ConfigManager):
113
86
 
114
87
  storage_config = copy.deepcopy(sys_config.storage)
115
88
 
116
89
  for bucket_key, bucket_config in storage_config.buckets.items():
90
+ storage_config.buckets[bucket_key] = cls._resolve_storage_location(
91
+ bucket_key, bucket_config, config_mgr)
117
92
 
118
- if bucket_config.protocol != "LOCAL":
119
- continue
93
+ for bucket_key, bucket_config in storage_config.external.items():
94
+ storage_config.external[bucket_key] = cls._resolve_storage_location(
95
+ bucket_key, bucket_config, config_mgr)
120
96
 
121
- if "rootPath" not in bucket_config.properties:
122
- continue
97
+ sys_config = copy.copy(sys_config)
98
+ sys_config.storage = storage_config
123
99
 
124
- root_path = pathlib.Path(bucket_config.properties["rootPath"])
100
+ return sys_config
101
+
102
+ @classmethod
103
+ def _resolve_storage_location(cls, bucket_key, bucket_config, config_mgr: _cfg_p.ConfigManager):
125
104
 
126
- if root_path.is_absolute():
127
- continue
105
+ if bucket_config.protocol != "LOCAL":
106
+ return bucket_config
128
107
 
129
- cls._log.info(f"Resolving relative path for [{bucket_key}] local storage...")
108
+ if "rootPath" not in bucket_config.properties:
109
+ return bucket_config
130
110
 
131
- sys_config_path = config_mgr.config_dir_path()
132
- if sys_config_path is not None:
133
- absolute_path = sys_config_path.joinpath(root_path).resolve()
134
- if absolute_path.exists():
135
- cls._log.info(f"Resolved [{root_path}] -> [{absolute_path}]")
136
- bucket_config.properties["rootPath"] = str(absolute_path)
137
- continue
111
+ root_path = pathlib.Path(bucket_config.properties["rootPath"])
138
112
 
139
- cwd = pathlib.Path.cwd()
140
- absolute_path = cwd.joinpath(root_path).resolve()
113
+ if root_path.is_absolute():
114
+ return bucket_config
141
115
 
116
+ cls._log.info(f"Resolving relative path for [{bucket_key}] local storage...")
117
+
118
+ sys_config_path = config_mgr.config_dir_path()
119
+ if sys_config_path is not None:
120
+ absolute_path = sys_config_path.joinpath(root_path).resolve()
142
121
  if absolute_path.exists():
143
122
  cls._log.info(f"Resolved [{root_path}] -> [{absolute_path}]")
144
123
  bucket_config.properties["rootPath"] = str(absolute_path)
145
- continue
124
+ return bucket_config
146
125
 
147
- msg = f"Failed to resolve relative storage path [{root_path}]"
148
- cls._log.error(msg)
149
- raise _ex.EConfigParse(msg)
126
+ cwd = pathlib.Path.cwd()
127
+ absolute_path = cwd.joinpath(root_path).resolve()
150
128
 
151
- sys_config = copy.copy(sys_config)
152
- sys_config.storage = storage_config
129
+ if absolute_path.exists():
130
+ cls._log.info(f"Resolved [{root_path}] -> [{absolute_path}]")
131
+ bucket_config.properties["rootPath"] = str(absolute_path)
132
+ return bucket_config
153
133
 
154
- return sys_config
134
+ msg = f"Failed to resolve relative storage path [{root_path}]"
135
+ cls._log.error(msg)
136
+ raise _ex.EConfigParse(msg)
137
+
138
+
139
+ def __init__(self, sys_config: _cfg.RuntimeConfig, config_mgr: _cfg_p.ConfigManager, scratch_dir: pathlib.Path):
140
+ self._sys_config = sys_config
141
+ self._config_mgr = config_mgr
142
+ self._scratch_dir = scratch_dir
143
+ self._model_loader: tp.Optional[_models.ModelLoader] = None
144
+
145
+ def translate_job_config(
146
+ self, job_config: _cfg.JobConfig,
147
+ model_class: tp.Optional[_api.TracModel.__class__] = None) \
148
+ -> _cfg.JobConfig:
149
+
150
+ try:
151
+ self._log.info(f"Applying dev mode config translation to job config")
152
+
153
+ self._model_loader = _models.ModelLoader(self._sys_config, self._scratch_dir)
154
+ self._model_loader.create_scope("DEV_MODE_TRANSLATION")
155
+
156
+ job_config = copy.deepcopy(job_config)
157
+ job_def = job_config.job
158
+
159
+ # Protobuf semantics for a blank jobId should be an object, but objectId will be an empty string
160
+ if not job_config.jobId or not job_config.jobId.objectId:
161
+ job_config = self._process_job_id(job_config)
162
+
163
+ job_config, job_def = self.translate_job_def(job_config, job_def, model_class)
164
+ job_config.job = job_def
165
+
166
+ return job_config
167
+
168
+ finally:
169
+ self._model_loader.destroy_scope("DEV_MODE_TRANSLATION")
170
+ self._model_loader = None
171
+
172
+ def translate_job_def(
173
+ self, job_config: _cfg.JobConfig, job_def: _meta.JobDefinition,
174
+ model_class: tp.Optional[_api.TracModel.__class__] = None) \
175
+ -> tp.Tuple[_cfg.JobConfig, _meta.JobDefinition]:
176
+
177
+ if job_def.jobType is None or job_def.jobType == _meta.JobType.JOB_TYPE_NOT_SET:
178
+ job_def = self._process_job_type(job_def)
179
+
180
+ # Load and populate any models provided as a Python class or class name
181
+ job_config, job_def = self._process_models(job_config, job_def, model_class)
182
+
183
+ # Fow flows, load external flow definitions then perform auto-wiring and type inference
184
+ if job_def.jobType == _meta.JobType.RUN_FLOW:
185
+ job_config, job_def = self._process_flow_definition(job_config, job_def)
186
+
187
+ if job_def.jobType == _meta.JobType.JOB_GROUP:
188
+ job_config, job_def = self.translate_job_group(job_config, job_def)
189
+
190
+ # Apply processing to the parameters, inputs and outputs
191
+ job_config, job_def = self._process_parameters(job_config, job_def)
192
+ job_config, job_def = self._process_inputs_and_outputs(job_config, job_def)
193
+
194
+ return job_config, job_def
195
+
196
+ def translate_job_group(
197
+ self, job_config: _cfg.JobConfig, job_def: _meta.JobDefinition) \
198
+ -> tp.Tuple[_cfg.JobConfig, _meta.JobDefinition]:
199
+
200
+ job_group = job_def.jobGroup
201
+
202
+ if job_group.jobGroupType is None or job_group.jobGroupType == _meta.JobGroupType.JOB_GROUP_TYPE_NOT_SET:
203
+ job_group = self._process_job_group_type(job_group)
204
+
205
+ group_details = self._get_job_group_detail(job_group)
206
+
207
+ if hasattr(group_details, "jobs"):
208
+ child_jobs = []
209
+ for child_def in group_details.jobs:
210
+ job_config, child_def = self.translate_job_def(job_config, child_def)
211
+ child_jobs.append(child_def)
212
+ group_details.jobs = child_jobs
213
+
214
+ job_def.jobGroup = job_group
215
+
216
+ return job_config, job_def
155
217
 
156
218
  @classmethod
157
219
  def _add_job_resource(
@@ -177,101 +239,153 @@ class DevModeTranslator:
177
239
  return translated_config
178
240
 
179
241
  @classmethod
180
- def _process_job_type(cls, job_config: _cfg.JobConfig):
242
+ def _process_job_type(cls, job_def: _meta.JobDefinition):
181
243
 
182
- if job_config.job.runModel is not None:
244
+ if job_def.runModel is not None:
183
245
  job_type = _meta.JobType.RUN_MODEL
184
246
 
185
- elif job_config.job.runFlow is not None:
247
+ elif job_def.runFlow is not None:
186
248
  job_type = _meta.JobType.RUN_FLOW
187
249
 
188
- elif job_config.job.importModel is not None:
250
+ elif job_def.importModel is not None:
189
251
  job_type = _meta.JobType.IMPORT_MODEL
190
252
 
253
+ elif job_def.importData is not None:
254
+ job_type = _meta.JobType.IMPORT_DATA
255
+
256
+ elif job_def.exportData is not None:
257
+ job_type = _meta.JobType.EXPORT_DATA
258
+
259
+ elif job_def.jobGroup is not None:
260
+ job_type = _meta.JobType.JOB_GROUP
261
+
191
262
  else:
192
263
  cls._log.error("Could not infer job type")
193
264
  raise _ex.EConfigParse("Could not infer job type")
194
265
 
195
266
  cls._log.info(f"Inferred job type = [{job_type.name}]")
196
267
 
197
- job_def = copy.copy(job_config.job)
268
+ job_def = copy.copy(job_def)
198
269
  job_def.jobType = job_type
199
270
 
200
- job_config = copy.copy(job_config)
201
- job_config.job = job_def
202
-
203
- return job_config
271
+ return job_def
204
272
 
205
273
  @classmethod
206
- def _process_models(
207
- cls,
208
- sys_config: _cfg.RuntimeConfig,
209
- job_config: _cfg.JobConfig,
210
- scratch_dir: pathlib.Path,
211
- model_class: tp.Optional[_api.TracModel.__class__]) \
212
- -> _cfg.JobConfig:
274
+ def _process_job_group_type(cls, job_group: _meta.JobGroup) -> _meta.JobGroup:
213
275
 
214
- model_loader = _models.ModelLoader(sys_config, scratch_dir)
215
- model_loader.create_scope("DEV_MODE_TRANSLATION")
276
+ if job_group.sequential is not None:
277
+ job_group_type = _meta.JobGroupType.SEQUENTIAL_JOB_GROUP
216
278
 
217
- original_config = job_config
279
+ elif job_group.parallel is not None:
280
+ job_group_type = _meta.JobGroupType.PARALLEL_JOB_GROUP
218
281
 
219
- job_config = copy.copy(job_config)
220
- job_config.job = copy.copy(job_config.job)
221
- job_config.resources = copy.copy(job_config.resources)
282
+ else:
283
+ cls._log.error("Could not infer job group type")
284
+ raise _ex.EConfigParse("Could not infer job group type")
222
285
 
223
- if job_config.job.jobType == _meta.JobType.RUN_MODEL:
286
+ cls._log.info(f"Inferred job group type = [{job_group_type.name}]")
224
287
 
225
- job_config.job.runModel = copy.copy(job_config.job.runModel)
288
+ job_group = copy.copy(job_group)
289
+ job_group.jobGroupType = job_group_type
226
290
 
227
- # If a model class is supplied in code, use that to generate the model def
228
- if model_class is not None:
229
- model_id, model_obj = cls._generate_model_for_class(model_loader, model_class)
230
- job_config = cls._add_job_resource(job_config, model_id, model_obj)
231
- job_config.job.runModel.model = _util.selector_for(model_id)
291
+ return job_group
232
292
 
233
- # Otherwise if model specified as a string instead of a selector, apply the translation
234
- elif isinstance(original_config.job.runModel.model, str):
235
- model_detail = original_config.job.runModel.model
236
- model_id, model_obj = cls._generate_model_for_entry_point(model_loader, model_detail) # noqa
237
- job_config = cls._add_job_resource(job_config, model_id, model_obj)
238
- job_config.job.runModel.model = _util.selector_for(model_id)
293
+ @classmethod
294
+ def _get_job_detail(cls, job_def: _meta.JobDefinition):
239
295
 
240
- if job_config.job.jobType == _meta.JobType.RUN_FLOW:
296
+ if job_def.jobType == _meta.JobType.RUN_MODEL:
297
+ return job_def.runModel
241
298
 
242
- job_config.job.runFlow = copy.copy(job_config.job.runFlow)
243
- job_config.job.runFlow.models = copy.copy(job_config.job.runFlow.models)
299
+ if job_def.jobType == _meta.JobType.RUN_FLOW:
300
+ return job_def.runFlow
244
301
 
245
- for model_key, model_detail in original_config.job.runFlow.models.items():
302
+ if job_def.jobType == _meta.JobType.IMPORT_MODEL:
303
+ return job_def.importModel
246
304
 
247
- # Only apply translation if the model is specified as a string instead of a selector
248
- if isinstance(model_detail, str):
249
- model_id, model_obj = cls._generate_model_for_entry_point(model_loader, model_detail)
250
- job_config = cls._add_job_resource(job_config, model_id, model_obj)
251
- job_config.job.runFlow.models[model_key] = _util.selector_for(model_id)
305
+ if job_def.jobType == _meta.JobType.IMPORT_DATA:
306
+ return job_def.importData
252
307
 
253
- model_loader.destroy_scope("DEV_MODE_TRANSLATION")
308
+ if job_def.jobType == _meta.JobType.EXPORT_DATA:
309
+ return job_def.exportData
254
310
 
255
- return job_config
311
+ if job_def.jobType == _meta.JobType.JOB_GROUP:
312
+ return job_def.jobGroup
313
+
314
+ raise _ex.EConfigParse(f"Could not get job details for job type [{job_def.jobType}]")
256
315
 
257
316
  @classmethod
317
+ def _get_job_group_detail(cls, job_group: _meta.JobGroup):
318
+
319
+ if job_group.jobGroupType == _meta.JobGroupType.SEQUENTIAL_JOB_GROUP:
320
+ return job_group.sequential
321
+
322
+ if job_group.jobGroupType == _meta.JobGroupType.PARALLEL_JOB_GROUP:
323
+ return job_group.parallel
324
+
325
+ raise _ex.EConfigParse(f"Could not get job group details for group type [{job_group.jobGroupType}]")
326
+
327
+ def _process_models(
328
+ self, job_config: _cfg.JobConfig, job_def: _meta.JobDefinition,
329
+ model_class: tp.Optional[_api.TracModel.__class__]) \
330
+ -> tp.Tuple[_cfg.JobConfig, _meta.JobDefinition]:
331
+
332
+ # This processing works on the assumption that job details follow a convention for addressing models
333
+ # Jobs requiring a single model have a field called "model"
334
+ # Jobs requiring multiple models have a field called "models@, which is a dict
335
+
336
+ job_detail = self._get_job_detail(job_def)
337
+
338
+ # If a model class is supplied in code, use that to generate the model def
339
+ if model_class is not None:
340
+
341
+ # Passing a model class via launch_model() is only supported for job types with a single model
342
+ if not hasattr(job_detail, "model"):
343
+ raise _ex.EJobValidation(f"Job type [{job_def.jobType}] cannot be launched using launch_model()")
344
+
345
+ model_id, model_obj = self._generate_model_for_class(model_class)
346
+ job_detail.model = _util.selector_for(model_id)
347
+ job_config = self._add_job_resource(job_config, model_id, model_obj)
348
+
349
+ # Otherwise look for models specified as a single string, and take that as the entry point
350
+ else:
351
+
352
+ # Jobs with a single model
353
+ if hasattr(job_detail, "model") and isinstance(job_detail.model, str):
354
+ model_id, model_obj = self._generate_model_for_entry_point(job_detail.model) # noqa
355
+ job_detail.model = _util.selector_for(model_id)
356
+ job_config = self._add_job_resource(job_config, model_id, model_obj)
357
+
358
+ elif hasattr(job_detail, "model") and isinstance(job_detail.model, _meta.TagSelector):
359
+ if job_detail.model.objectType == _meta.ObjectType.OBJECT_TYPE_NOT_SET:
360
+ error = f"Missing required property [model] for job type [{job_def.jobType.name}]"
361
+ self._log.error(error)
362
+ raise _ex.EJobValidation(error)
363
+
364
+ # Jobs with multiple models
365
+ elif hasattr(job_detail, "models") and isinstance(job_detail.models, dict):
366
+ for model_key, model_detail in job_detail.models.items():
367
+ if isinstance(model_detail, str):
368
+ model_id, model_obj = self._generate_model_for_entry_point(model_detail)
369
+ job_detail.models[model_key] = _util.selector_for(model_id)
370
+ job_config = self._add_job_resource(job_config, model_id, model_obj)
371
+
372
+ return job_config, job_def
373
+
258
374
  def _generate_model_for_class(
259
- cls, model_loader: _models.ModelLoader, model_class: _api.TracModel.__class__) \
375
+ self, model_class: _api.TracModel.__class__) \
260
376
  -> (_meta.TagHeader, _meta.ObjectDefinition):
261
377
 
262
378
  model_entry_point = f"{model_class.__module__}.{model_class.__name__}"
379
+ return self._generate_model_for_entry_point(model_entry_point)
263
380
 
264
- return cls._generate_model_for_entry_point(model_loader, model_entry_point)
265
-
266
- @classmethod
267
381
  def _generate_model_for_entry_point(
268
- cls, model_loader: _models.ModelLoader, model_entry_point: str) \
382
+ self, model_entry_point: str) \
269
383
  -> (_meta.TagHeader, _meta.ObjectDefinition):
270
384
 
271
385
  model_id = _util.new_object_id(_meta.ObjectType.MODEL)
272
386
  model_key = _util.object_key(model_id)
273
387
 
274
- cls._log.info(f"Generating model definition for [{model_entry_point}] with ID = [{model_key}]")
388
+ self._log.info(f"Generating model definition for [{model_entry_point}] with ID = [{model_key}]")
275
389
 
276
390
  skeleton_modeL_def = _meta.ModelDefinition( # noqa
277
391
  language="python",
@@ -282,8 +396,8 @@ class DevModeTranslator:
282
396
  inputs={},
283
397
  outputs={})
284
398
 
285
- model_class = model_loader.load_model_class("DEV_MODE_TRANSLATION", skeleton_modeL_def)
286
- model_def = model_loader.scan_model(skeleton_modeL_def, model_class)
399
+ model_class = self._model_loader.load_model_class("DEV_MODE_TRANSLATION", skeleton_modeL_def)
400
+ model_def = self._model_loader.scan_model(skeleton_modeL_def, model_class)
287
401
 
288
402
  model_object = _meta.ObjectDefinition(
289
403
  objectType=_meta.ObjectType.MODEL,
@@ -291,56 +405,57 @@ class DevModeTranslator:
291
405
 
292
406
  return model_id, model_object
293
407
 
294
- @classmethod
295
- def _process_flow_definition(cls, job_config: _cfg.JobConfig, config_mgr: _cfg_p.ConfigManager) -> _cfg.JobConfig:
408
+ def _process_flow_definition(
409
+ self, job_config: _cfg.JobConfig, job_def: _meta.JobDefinition) \
410
+ -> tp.Tuple[_cfg.JobConfig, _meta.JobDefinition]:
296
411
 
297
- flow_details = job_config.job.runFlow.flow
412
+ flow_details = job_def.runFlow.flow
298
413
 
299
414
  # Do not apply translation if flow is specified as an object ID / selector (assume full config is supplied)
300
415
  if isinstance(flow_details, _meta.TagHeader) or isinstance(flow_details, _meta.TagSelector):
301
- return job_config
416
+ return job_config, job_def
302
417
 
303
418
  # Otherwise, flow is specified as the path to dev-mode flow definition
304
419
  if not isinstance(flow_details, str):
305
420
  err = f"Invalid config value for [job.runFlow.flow]: Expected path or tag selector, got [{flow_details}])"
306
- cls._log.error(err)
421
+ self._log.error(err)
307
422
  raise _ex.EConfigParse(err)
308
423
 
309
424
  flow_id = _util.new_object_id(_meta.ObjectType.FLOW)
310
425
  flow_key = _util.object_key(flow_id)
311
426
 
312
- cls._log.info(f"Generating flow definition from [{flow_details}] with ID = [{flow_key}]")
427
+ self._log.info(f"Generating flow definition from [{flow_details}] with ID = [{flow_key}]")
313
428
 
314
- flow_def = config_mgr.load_config_object(flow_details, _meta.FlowDefinition)
429
+ flow_def = self._config_mgr.load_config_object(flow_details, _meta.FlowDefinition)
315
430
 
316
431
  # Validate models against the flow (this could move to _impl.validation and check prod jobs as well)
317
- cls._check_models_for_flow(flow_def, job_config)
432
+ self._check_models_for_flow(flow_def, job_def, job_config)
318
433
 
319
434
  # Auto-wiring and inference only applied to externally loaded flows for now
320
- flow_def = cls._autowire_flow(flow_def, job_config)
321
- flow_def = cls._apply_type_inference(flow_def, job_config)
435
+ flow_def = self._autowire_flow(flow_def, job_def, job_config)
436
+ flow_def = self._apply_type_inference(flow_def, job_def, job_config)
322
437
 
323
438
  flow_obj = _meta.ObjectDefinition(
324
439
  objectType=_meta.ObjectType.FLOW,
325
440
  flow=flow_def)
326
441
 
442
+ job_def = copy.copy(job_def)
443
+ job_def.runFlow = copy.copy(job_def.runFlow)
444
+ job_def.runFlow.flow = _util.selector_for(flow_id)
445
+
327
446
  job_config = copy.copy(job_config)
328
- job_config.job = copy.copy(job_config.job)
329
- job_config.job.runFlow = copy.copy(job_config.job.runFlow)
330
447
  job_config.resources = copy.copy(job_config.resources)
448
+ job_config = self._add_job_resource(job_config, flow_id, flow_obj)
331
449
 
332
- job_config = cls._add_job_resource(job_config, flow_id, flow_obj)
333
- job_config.job.runFlow.flow = _util.selector_for(flow_id)
334
-
335
- return job_config
450
+ return job_config, job_def
336
451
 
337
452
  @classmethod
338
- def _check_models_for_flow(cls, flow: _meta.FlowDefinition, job_config: _cfg.JobConfig):
453
+ def _check_models_for_flow(cls, flow: _meta.FlowDefinition, job_def: _meta.JobDefinition, job_config: _cfg.JobConfig):
339
454
 
340
455
  model_nodes = dict(filter(lambda n: n[1].nodeType == _meta.FlowNodeType.MODEL_NODE, flow.nodes.items()))
341
456
 
342
- missing_models = list(filter(lambda m: m not in job_config.job.runFlow.models, model_nodes.keys()))
343
- extra_models = list(filter(lambda m: m not in model_nodes, job_config.job.runFlow.models.keys()))
457
+ missing_models = list(filter(lambda m: m not in job_def.runFlow.models, model_nodes.keys()))
458
+ extra_models = list(filter(lambda m: m not in model_nodes, job_def.runFlow.models.keys()))
344
459
 
345
460
  if any(missing_models):
346
461
  error = f"Missing models in job definition: {', '.join(missing_models)}"
@@ -354,7 +469,7 @@ class DevModeTranslator:
354
469
 
355
470
  for model_name, model_node in model_nodes.items():
356
471
 
357
- model_selector = job_config.job.runFlow.models[model_name]
472
+ model_selector = job_def.runFlow.models[model_name]
358
473
  model_obj = _util.get_job_resource(model_selector, job_config)
359
474
 
360
475
  model_inputs = set(model_obj.model.inputs.keys())
@@ -366,9 +481,9 @@ class DevModeTranslator:
366
481
  raise _ex.EJobValidation(error)
367
482
 
368
483
  @classmethod
369
- def _autowire_flow(cls, flow: _meta.FlowDefinition, job_config: _cfg.JobConfig):
484
+ def _autowire_flow(cls, flow: _meta.FlowDefinition, job_def: _meta.JobDefinition, job_config: _cfg.JobConfig):
370
485
 
371
- job = job_config.job.runFlow
486
+ job = job_def.runFlow
372
487
  nodes = copy.copy(flow.nodes)
373
488
  edges: tp.Dict[str, _meta.FlowEdge] = dict()
374
489
 
@@ -455,7 +570,10 @@ class DevModeTranslator:
455
570
  return autowired_flow
456
571
 
457
572
  @classmethod
458
- def _apply_type_inference(cls, flow: _meta.FlowDefinition, job_config: _cfg.JobConfig) -> _meta.FlowDefinition:
573
+ def _apply_type_inference(
574
+ cls, flow: _meta.FlowDefinition,
575
+ job_def: _meta.JobDefinition, job_config: _cfg.JobConfig) \
576
+ -> _meta.FlowDefinition:
459
577
 
460
578
  updated_flow = copy.copy(flow)
461
579
  updated_flow.parameters = copy.copy(flow.parameters)
@@ -476,17 +594,17 @@ class DevModeTranslator:
476
594
 
477
595
  if node.nodeType == _meta.FlowNodeType.PARAMETER_NODE and node_name not in flow.parameters:
478
596
  targets = edges_by_source.get(node_name) or []
479
- model_parameter = cls._infer_parameter(node_name, targets, job_config)
597
+ model_parameter = cls._infer_parameter(node_name, targets, job_def, job_config)
480
598
  updated_flow.parameters[node_name] = model_parameter
481
599
 
482
600
  if node.nodeType == _meta.FlowNodeType.INPUT_NODE and node_name not in flow.inputs:
483
601
  targets = edges_by_source.get(node_name) or []
484
- model_input = cls._infer_input_schema(node_name, targets, job_config)
602
+ model_input = cls._infer_input_schema(node_name, targets, job_def, job_config)
485
603
  updated_flow.inputs[node_name] = model_input
486
604
 
487
605
  if node.nodeType == _meta.FlowNodeType.OUTPUT_NODE and node_name not in flow.outputs:
488
606
  sources = edges_by_target.get(node_name) or []
489
- model_output = cls._infer_output_schema(node_name, sources, job_config)
607
+ model_output = cls._infer_output_schema(node_name, sources, job_def, job_config)
490
608
  updated_flow.outputs[node_name] = model_output
491
609
 
492
610
  return updated_flow
@@ -494,13 +612,14 @@ class DevModeTranslator:
494
612
  @classmethod
495
613
  def _infer_parameter(
496
614
  cls, param_name: str, targets: tp.List[_meta.FlowSocket],
497
- job_config: _cfg.JobConfig) -> _meta.ModelParameter:
615
+ job_def: _meta.JobDefinition, job_config: _cfg.JobConfig) \
616
+ -> _meta.ModelParameter:
498
617
 
499
618
  model_params = []
500
619
 
501
620
  for target in targets:
502
621
 
503
- model_selector = job_config.job.runFlow.models.get(target.node)
622
+ model_selector = job_def.runFlow.models.get(target.node)
504
623
  model_obj = _util.get_job_resource(model_selector, job_config)
505
624
  model_param = model_obj.model.parameters.get(target.socket)
506
625
  model_params.append(model_param)
@@ -530,13 +649,14 @@ class DevModeTranslator:
530
649
  @classmethod
531
650
  def _infer_input_schema(
532
651
  cls, input_name: str, targets: tp.List[_meta.FlowSocket],
533
- job_config: _cfg.JobConfig) -> _meta.ModelInputSchema:
652
+ job_def: _meta.JobDefinition, job_config: _cfg.JobConfig) \
653
+ -> _meta.ModelInputSchema:
534
654
 
535
655
  model_inputs = []
536
656
 
537
657
  for target in targets:
538
658
 
539
- model_selector = job_config.job.runFlow.models.get(target.node)
659
+ model_selector = job_def.runFlow.models.get(target.node)
540
660
  model_obj = _util.get_job_resource(model_selector, job_config)
541
661
  model_input = model_obj.model.inputs.get(target.socket)
542
662
  model_inputs.append(model_input)
@@ -564,13 +684,14 @@ class DevModeTranslator:
564
684
  @classmethod
565
685
  def _infer_output_schema(
566
686
  cls, output_name: str, sources: tp.List[_meta.FlowSocket],
567
- job_config: _cfg.JobConfig) -> _meta.ModelOutputSchema:
687
+ job_def: _meta.JobDefinition, job_config: _cfg.JobConfig) \
688
+ -> _meta.ModelOutputSchema:
568
689
 
569
690
  model_outputs = []
570
691
 
571
692
  for source in sources:
572
693
 
573
- model_selector = job_config.job.runFlow.models.get(source.node)
694
+ model_selector = job_def.runFlow.models.get(source.node)
574
695
  model_obj = _util.get_job_resource(model_selector, job_config)
575
696
  model_input = model_obj.model.inputs.get(source.socket)
576
697
  model_outputs.append(model_input)
@@ -594,37 +715,38 @@ class DevModeTranslator:
594
715
  return f"{socket.node}.{socket.socket}" if socket.socket else socket.node
595
716
 
596
717
  @classmethod
597
- def _process_parameters(cls, job_config: _cfg.JobConfig) -> _cfg.JobConfig:
718
+ def _process_parameters(
719
+ cls, job_config: _cfg.JobConfig, job_def: _meta.JobDefinition) \
720
+ -> tp.Tuple[_cfg.JobConfig, _meta.JobDefinition]:
598
721
 
599
- if job_config.job.jobType == _meta.JobType.RUN_MODEL:
600
-
601
- job_details = job_config.job.runModel
602
- model_key = _util.object_key(job_details.model)
603
- model_or_flow = job_config.resources[model_key].model
722
+ # This relies on convention for naming properties across similar job types
604
723
 
605
- elif job_config.job.jobType == _meta.JobType.RUN_FLOW:
724
+ job_detail = cls._get_job_detail(job_def)
606
725
 
607
- job_details = job_config.job.runFlow
608
- flow_key = _util.object_key(job_details.flow)
726
+ if hasattr(job_detail, "model"):
727
+ model_key = _util.object_key(job_detail.model)
728
+ model_or_flow = job_config.resources[model_key].model
729
+ elif hasattr(job_detail, "flow"):
730
+ flow_key = _util.object_key(job_detail.flow)
609
731
  model_or_flow = job_config.resources[flow_key].flow
610
-
611
732
  else:
612
- raise _ex.EUnexpected()
733
+ model_or_flow = None
613
734
 
614
- param_specs = model_or_flow.parameters
615
- param_values = job_details.parameters
735
+ if model_or_flow is not None:
616
736
 
617
- # Set encoded params on runModel or runFlow depending on the job type
618
- job_details.parameters = cls._process_parameters_dict(param_specs, param_values)
737
+ param_specs = model_or_flow.parameters
738
+ raw_values = job_detail.parameters
619
739
 
620
- return job_config
740
+ job_detail.parameters = cls._process_parameters_dict(param_specs, raw_values)
741
+
742
+ return job_config, job_def
621
743
 
622
744
  @classmethod
623
745
  def _process_parameters_dict(
624
746
  cls, param_specs: tp.Dict[str, _meta.ModelParameter],
625
- param_values: tp.Dict[str, _meta.Value]) -> tp.Dict[str, _meta.Value]:
747
+ raw_values: tp.Dict[str, _meta.Value]) -> tp.Dict[str, _meta.Value]:
626
748
 
627
- unknown_params = list(filter(lambda p: p not in param_specs, param_values))
749
+ unknown_params = list(filter(lambda p: p not in param_specs, raw_values))
628
750
 
629
751
  if any(unknown_params):
630
752
  msg = f"Unknown parameters cannot be translated: [{', '.join(unknown_params)}]"
@@ -633,7 +755,7 @@ class DevModeTranslator:
633
755
 
634
756
  encoded_values = dict()
635
757
 
636
- for p_name, p_value in param_values.items():
758
+ for p_name, p_value in raw_values.items():
637
759
 
638
760
  if isinstance(p_value, _meta.Value):
639
761
  encoded_values[p_name] = p_value
@@ -648,26 +770,27 @@ class DevModeTranslator:
648
770
 
649
771
  return encoded_values
650
772
 
651
- @classmethod
652
- def _process_inputs_and_outputs(cls, sys_config: _cfg.RuntimeConfig, job_config: _cfg.JobConfig) -> _cfg.JobConfig:
773
+ def _process_inputs_and_outputs(
774
+ self, job_config: _cfg.JobConfig, job_def: _meta.JobDefinition) \
775
+ -> tp.Tuple[_cfg.JobConfig, _meta.JobDefinition]:
776
+
777
+ job_detail = self._get_job_detail(job_def)
653
778
 
654
- if job_config.job.jobType == _meta.JobType.RUN_MODEL:
655
- job_details = job_config.job.runModel
656
- model_obj = _util.get_job_resource(job_details.model, job_config)
779
+ if hasattr(job_detail, "model"):
780
+ model_obj = _util.get_job_resource(job_detail.model, job_config)
657
781
  required_inputs = model_obj.model.inputs
658
782
  required_outputs = model_obj.model.outputs
659
783
 
660
- elif job_config.job.jobType == _meta.JobType.RUN_FLOW:
661
- job_details = job_config.job.runFlow
662
- flow_obj = _util.get_job_resource(job_details.flow, job_config)
784
+ elif hasattr(job_detail, "flow"):
785
+ flow_obj = _util.get_job_resource(job_detail.flow, job_config)
663
786
  required_inputs = flow_obj.flow.inputs
664
787
  required_outputs = flow_obj.flow.outputs
665
788
 
666
789
  else:
667
- return job_config
790
+ return job_config, job_def
668
791
 
669
- job_inputs = job_details.inputs
670
- job_outputs = job_details.outputs
792
+ job_inputs = job_detail.inputs
793
+ job_outputs = job_detail.outputs
671
794
  job_resources = job_config.resources
672
795
 
673
796
  for input_key, input_value in job_inputs.items():
@@ -676,8 +799,8 @@ class DevModeTranslator:
676
799
  model_input = required_inputs[input_key]
677
800
  input_schema = model_input.schema if model_input and not model_input.dynamic else None
678
801
 
679
- input_id = cls._process_input_or_output(
680
- sys_config, input_key, input_value, job_resources,
802
+ input_id = self._process_input_or_output(
803
+ input_key, input_value, job_resources,
681
804
  new_unique_file=False, schema=input_schema)
682
805
 
683
806
  job_inputs[input_key] = _util.selector_for(input_id)
@@ -688,17 +811,16 @@ class DevModeTranslator:
688
811
  model_output= required_outputs[output_key]
689
812
  output_schema = model_output.schema if model_output and not model_output.dynamic else None
690
813
 
691
- output_id = cls._process_input_or_output(
692
- sys_config, output_key, output_value, job_resources,
814
+ output_id = self._process_input_or_output(
815
+ output_key, output_value, job_resources,
693
816
  new_unique_file=True, schema=output_schema)
694
817
 
695
818
  job_outputs[output_key] = _util.selector_for(output_id)
696
819
 
697
- return job_config
820
+ return job_config, job_def
698
821
 
699
- @classmethod
700
822
  def _process_input_or_output(
701
- cls, sys_config, data_key, data_value,
823
+ self, data_key, data_value,
702
824
  resources: tp.Dict[str, _meta.ObjectDefinition],
703
825
  new_unique_file=False,
704
826
  schema: tp.Optional[_meta.SchemaDefinition] = None) \
@@ -709,8 +831,8 @@ class DevModeTranslator:
709
831
 
710
832
  if isinstance(data_value, str):
711
833
  storage_path = data_value
712
- storage_key = sys_config.storage.defaultBucket
713
- storage_format = cls.infer_format(storage_path, sys_config.storage)
834
+ storage_key = self._sys_config.storage.defaultBucket
835
+ storage_format = self.infer_format(storage_path, self._sys_config.storage)
714
836
  snap_version = 1
715
837
 
716
838
  elif isinstance(data_value, dict):
@@ -720,14 +842,14 @@ class DevModeTranslator:
720
842
  if not storage_path:
721
843
  raise _ex.EConfigParse(f"Invalid configuration for input [{data_key}] (missing required value 'path'")
722
844
 
723
- storage_key = data_value.get("storageKey") or sys_config.storage.defaultBucket
724
- storage_format = data_value.get("format") or cls.infer_format(storage_path, sys_config.storage)
845
+ storage_key = data_value.get("storageKey") or self._sys_config.storage.defaultBucket
846
+ storage_format = data_value.get("format") or self.infer_format(storage_path, self._sys_config.storage)
725
847
  snap_version = 1
726
848
 
727
849
  else:
728
850
  raise _ex.EConfigParse(f"Invalid configuration for input '{data_key}'")
729
851
 
730
- cls._log.info(f"Generating data definition for [{data_key}] with ID = [{_util.object_key(data_id)}]")
852
+ self._log.info(f"Generating data definition for [{data_key}] with ID = [{_util.object_key(data_id)}]")
731
853
 
732
854
  # For unique outputs, increment the snap number to find a new unique snap
733
855
  # These are not incarnations, bc likely in dev mode model code and inputs are changing
@@ -735,7 +857,7 @@ class DevModeTranslator:
735
857
 
736
858
  if new_unique_file:
737
859
 
738
- x_storage_mgr = _storage.StorageManager(sys_config)
860
+ x_storage_mgr = _storage.StorageManager(self._sys_config)
739
861
  x_storage = x_storage_mgr.get_file_storage(storage_key)
740
862
  x_orig_path = pathlib.PurePath(storage_path)
741
863
  x_name = x_orig_path.name
@@ -752,9 +874,9 @@ class DevModeTranslator:
752
874
  x_name = f"{x_orig_path.stem}-{snap_version}"
753
875
  storage_path = str(x_orig_path.parent.joinpath(x_name))
754
876
 
755
- cls._log.info(f"Output for [{data_key}] will be snap version {snap_version}")
877
+ self._log.info(f"Output for [{data_key}] will be snap version {snap_version}")
756
878
 
757
- data_obj, storage_obj = cls._generate_input_definition(
879
+ data_obj, storage_obj = self._generate_input_definition(
758
880
  data_id, storage_id, storage_key, storage_path, storage_format,
759
881
  snap_index=snap_version, delta_index=1, incarnation_index=1,
760
882
  schema=schema)