tracdap-runtime 0.6.5__py3-none-any.whl → 0.6.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tracdap/rt/_exec/context.py +272 -105
- tracdap/rt/_exec/dev_mode.py +231 -138
- tracdap/rt/_exec/engine.py +217 -59
- tracdap/rt/_exec/functions.py +25 -1
- tracdap/rt/_exec/graph.py +9 -0
- tracdap/rt/_exec/graph_builder.py +295 -198
- tracdap/rt/_exec/runtime.py +7 -5
- tracdap/rt/_impl/config_parser.py +11 -4
- tracdap/rt/_impl/data.py +278 -167
- tracdap/rt/_impl/ext/__init__.py +13 -0
- tracdap/rt/_impl/ext/sql.py +116 -0
- tracdap/rt/_impl/ext/storage.py +57 -0
- tracdap/rt/_impl/grpc/tracdap/metadata/job_pb2.py +62 -54
- tracdap/rt/_impl/grpc/tracdap/metadata/job_pb2.pyi +37 -2
- tracdap/rt/_impl/static_api.py +24 -11
- tracdap/rt/_impl/storage.py +2 -2
- tracdap/rt/_impl/util.py +10 -0
- tracdap/rt/_impl/validation.py +66 -13
- tracdap/rt/_plugins/storage_sql.py +417 -0
- tracdap/rt/_plugins/storage_sql_dialects.py +117 -0
- tracdap/rt/_version.py +1 -1
- tracdap/rt/api/experimental.py +79 -32
- tracdap/rt/api/hook.py +10 -0
- tracdap/rt/metadata/__init__.py +4 -0
- tracdap/rt/metadata/job.py +45 -0
- {tracdap_runtime-0.6.5.dist-info → tracdap_runtime-0.6.6.dist-info}/METADATA +3 -1
- {tracdap_runtime-0.6.5.dist-info → tracdap_runtime-0.6.6.dist-info}/RECORD +30 -25
- {tracdap_runtime-0.6.5.dist-info → tracdap_runtime-0.6.6.dist-info}/WHEEL +1 -1
- {tracdap_runtime-0.6.5.dist-info → tracdap_runtime-0.6.6.dist-info}/LICENSE +0 -0
- {tracdap_runtime-0.6.5.dist-info → tracdap_runtime-0.6.6.dist-info}/top_level.txt +0 -0
tracdap/rt/_exec/dev_mode.py
CHANGED
@@ -34,7 +34,15 @@ DEV_MODE_JOB_CONFIG = [
|
|
34
34
|
re.compile(r"job\.\w+\.outputs\.\w+"),
|
35
35
|
re.compile(r"job\.\w+\.models\.\w+"),
|
36
36
|
re.compile(r"job\.\w+\.model"),
|
37
|
-
re.compile(r"job\.\w+\.flow")
|
37
|
+
re.compile(r"job\.\w+\.flow"),
|
38
|
+
|
39
|
+
re.compile(r".*\.jobs\.\d+\.\w+\.parameters\.\w+"),
|
40
|
+
re.compile(r".*\.jobs\.\d+\.\w+\.inputs\.\w+"),
|
41
|
+
re.compile(r".*\.jobs\.\d+\.\w+\.outputs\.\w+"),
|
42
|
+
re.compile(r".*\.jobs\.\d+\.\w+\.models\.\w+"),
|
43
|
+
re.compile(r".*\.jobs\.\d+\.\w+\.model"),
|
44
|
+
re.compile(r".*\.jobs\.\d+\.\w+\.flow")
|
45
|
+
]
|
38
46
|
|
39
47
|
DEV_MODE_SYS_CONFIG = []
|
40
48
|
|
@@ -58,38 +66,6 @@ class DevModeTranslator:
|
|
58
66
|
|
59
67
|
return sys_config
|
60
68
|
|
61
|
-
@classmethod
|
62
|
-
def translate_job_config(
|
63
|
-
cls,
|
64
|
-
sys_config: _cfg.RuntimeConfig,
|
65
|
-
job_config: _cfg.JobConfig,
|
66
|
-
scratch_dir: pathlib.Path,
|
67
|
-
config_mgr: _cfg_p.ConfigManager,
|
68
|
-
model_class: tp.Optional[_api.TracModel.__class__]) \
|
69
|
-
-> _cfg.JobConfig:
|
70
|
-
|
71
|
-
cls._log.info(f"Applying dev mode config translation to job config")
|
72
|
-
|
73
|
-
# Protobuf semantics for a blank jobId should be an object, but objectId will be an empty string
|
74
|
-
if not job_config.jobId or not job_config.jobId.objectId:
|
75
|
-
job_config = cls._process_job_id(job_config)
|
76
|
-
|
77
|
-
if job_config.job.jobType is None or job_config.job.jobType == _meta.JobType.JOB_TYPE_NOT_SET:
|
78
|
-
job_config = cls._process_job_type(job_config)
|
79
|
-
|
80
|
-
# Load and populate any models provided as a Python class or class name
|
81
|
-
job_config = cls._process_models(sys_config, job_config, scratch_dir, model_class)
|
82
|
-
|
83
|
-
# Fow flows, load external flow definitions then perform auto-wiring and type inference
|
84
|
-
if job_config.job.jobType == _meta.JobType.RUN_FLOW:
|
85
|
-
job_config = cls._process_flow_definition(job_config, config_mgr)
|
86
|
-
|
87
|
-
# Apply processing to the parameters, inputs and outputs
|
88
|
-
job_config = cls._process_parameters(job_config)
|
89
|
-
job_config = cls._process_inputs_and_outputs(sys_config, job_config)
|
90
|
-
|
91
|
-
return job_config
|
92
|
-
|
93
69
|
@classmethod
|
94
70
|
def _add_integrated_repo(cls, sys_config: _cfg.RuntimeConfig) -> _cfg.RuntimeConfig:
|
95
71
|
|
@@ -159,6 +135,86 @@ class DevModeTranslator:
|
|
159
135
|
cls._log.error(msg)
|
160
136
|
raise _ex.EConfigParse(msg)
|
161
137
|
|
138
|
+
|
139
|
+
def __init__(self, sys_config: _cfg.RuntimeConfig, config_mgr: _cfg_p.ConfigManager, scratch_dir: pathlib.Path):
|
140
|
+
self._sys_config = sys_config
|
141
|
+
self._config_mgr = config_mgr
|
142
|
+
self._scratch_dir = scratch_dir
|
143
|
+
self._model_loader: tp.Optional[_models.ModelLoader] = None
|
144
|
+
|
145
|
+
def translate_job_config(
|
146
|
+
self, job_config: _cfg.JobConfig,
|
147
|
+
model_class: tp.Optional[_api.TracModel.__class__] = None) \
|
148
|
+
-> _cfg.JobConfig:
|
149
|
+
|
150
|
+
try:
|
151
|
+
self._log.info(f"Applying dev mode config translation to job config")
|
152
|
+
|
153
|
+
self._model_loader = _models.ModelLoader(self._sys_config, self._scratch_dir)
|
154
|
+
self._model_loader.create_scope("DEV_MODE_TRANSLATION")
|
155
|
+
|
156
|
+
job_config = copy.deepcopy(job_config)
|
157
|
+
job_def = job_config.job
|
158
|
+
|
159
|
+
# Protobuf semantics for a blank jobId should be an object, but objectId will be an empty string
|
160
|
+
if not job_config.jobId or not job_config.jobId.objectId:
|
161
|
+
job_config = self._process_job_id(job_config)
|
162
|
+
|
163
|
+
job_config, job_def = self.translate_job_def(job_config, job_def, model_class)
|
164
|
+
job_config.job = job_def
|
165
|
+
|
166
|
+
return job_config
|
167
|
+
|
168
|
+
finally:
|
169
|
+
self._model_loader.destroy_scope("DEV_MODE_TRANSLATION")
|
170
|
+
self._model_loader = None
|
171
|
+
|
172
|
+
def translate_job_def(
|
173
|
+
self, job_config: _cfg.JobConfig, job_def: _meta.JobDefinition,
|
174
|
+
model_class: tp.Optional[_api.TracModel.__class__] = None) \
|
175
|
+
-> tp.Tuple[_cfg.JobConfig, _meta.JobDefinition]:
|
176
|
+
|
177
|
+
if job_def.jobType is None or job_def.jobType == _meta.JobType.JOB_TYPE_NOT_SET:
|
178
|
+
job_def = self._process_job_type(job_def)
|
179
|
+
|
180
|
+
# Load and populate any models provided as a Python class or class name
|
181
|
+
job_config, job_def = self._process_models(job_config, job_def, model_class)
|
182
|
+
|
183
|
+
# Fow flows, load external flow definitions then perform auto-wiring and type inference
|
184
|
+
if job_def.jobType == _meta.JobType.RUN_FLOW:
|
185
|
+
job_config, job_def = self._process_flow_definition(job_config, job_def)
|
186
|
+
|
187
|
+
if job_def.jobType == _meta.JobType.JOB_GROUP:
|
188
|
+
job_config, job_def = self.translate_job_group(job_config, job_def)
|
189
|
+
|
190
|
+
# Apply processing to the parameters, inputs and outputs
|
191
|
+
job_config, job_def = self._process_parameters(job_config, job_def)
|
192
|
+
job_config, job_def = self._process_inputs_and_outputs(job_config, job_def)
|
193
|
+
|
194
|
+
return job_config, job_def
|
195
|
+
|
196
|
+
def translate_job_group(
|
197
|
+
self, job_config: _cfg.JobConfig, job_def: _meta.JobDefinition) \
|
198
|
+
-> tp.Tuple[_cfg.JobConfig, _meta.JobDefinition]:
|
199
|
+
|
200
|
+
job_group = job_def.jobGroup
|
201
|
+
|
202
|
+
if job_group.jobGroupType is None or job_group.jobGroupType == _meta.JobGroupType.JOB_GROUP_TYPE_NOT_SET:
|
203
|
+
job_group = self._process_job_group_type(job_group)
|
204
|
+
|
205
|
+
group_details = self._get_job_group_detail(job_group)
|
206
|
+
|
207
|
+
if hasattr(group_details, "jobs"):
|
208
|
+
child_jobs = []
|
209
|
+
for child_def in group_details.jobs:
|
210
|
+
job_config, child_def = self.translate_job_def(job_config, child_def)
|
211
|
+
child_jobs.append(child_def)
|
212
|
+
group_details.jobs = child_jobs
|
213
|
+
|
214
|
+
job_def.jobGroup = job_group
|
215
|
+
|
216
|
+
return job_config, job_def
|
217
|
+
|
162
218
|
@classmethod
|
163
219
|
def _add_job_resource(
|
164
220
|
cls, job_config: _cfg.JobConfig,
|
@@ -183,125 +239,153 @@ class DevModeTranslator:
|
|
183
239
|
return translated_config
|
184
240
|
|
185
241
|
@classmethod
|
186
|
-
def _process_job_type(cls,
|
242
|
+
def _process_job_type(cls, job_def: _meta.JobDefinition):
|
187
243
|
|
188
|
-
if
|
244
|
+
if job_def.runModel is not None:
|
189
245
|
job_type = _meta.JobType.RUN_MODEL
|
190
246
|
|
191
|
-
elif
|
247
|
+
elif job_def.runFlow is not None:
|
192
248
|
job_type = _meta.JobType.RUN_FLOW
|
193
249
|
|
194
|
-
elif
|
250
|
+
elif job_def.importModel is not None:
|
195
251
|
job_type = _meta.JobType.IMPORT_MODEL
|
196
252
|
|
197
|
-
elif
|
253
|
+
elif job_def.importData is not None:
|
198
254
|
job_type = _meta.JobType.IMPORT_DATA
|
199
255
|
|
200
|
-
elif
|
256
|
+
elif job_def.exportData is not None:
|
201
257
|
job_type = _meta.JobType.EXPORT_DATA
|
202
258
|
|
259
|
+
elif job_def.jobGroup is not None:
|
260
|
+
job_type = _meta.JobType.JOB_GROUP
|
261
|
+
|
203
262
|
else:
|
204
263
|
cls._log.error("Could not infer job type")
|
205
264
|
raise _ex.EConfigParse("Could not infer job type")
|
206
265
|
|
207
266
|
cls._log.info(f"Inferred job type = [{job_type.name}]")
|
208
267
|
|
209
|
-
job_def = copy.copy(
|
268
|
+
job_def = copy.copy(job_def)
|
210
269
|
job_def.jobType = job_type
|
211
270
|
|
212
|
-
|
213
|
-
job_config.job = job_def
|
271
|
+
return job_def
|
214
272
|
|
215
|
-
|
273
|
+
@classmethod
|
274
|
+
def _process_job_group_type(cls, job_group: _meta.JobGroup) -> _meta.JobGroup:
|
275
|
+
|
276
|
+
if job_group.sequential is not None:
|
277
|
+
job_group_type = _meta.JobGroupType.SEQUENTIAL_JOB_GROUP
|
278
|
+
|
279
|
+
elif job_group.parallel is not None:
|
280
|
+
job_group_type = _meta.JobGroupType.PARALLEL_JOB_GROUP
|
281
|
+
|
282
|
+
else:
|
283
|
+
cls._log.error("Could not infer job group type")
|
284
|
+
raise _ex.EConfigParse("Could not infer job group type")
|
285
|
+
|
286
|
+
cls._log.info(f"Inferred job group type = [{job_group_type.name}]")
|
287
|
+
|
288
|
+
job_group = copy.copy(job_group)
|
289
|
+
job_group.jobGroupType = job_group_type
|
290
|
+
|
291
|
+
return job_group
|
216
292
|
|
217
293
|
@classmethod
|
218
|
-
def _get_job_detail(cls,
|
294
|
+
def _get_job_detail(cls, job_def: _meta.JobDefinition):
|
295
|
+
|
296
|
+
if job_def.jobType == _meta.JobType.RUN_MODEL:
|
297
|
+
return job_def.runModel
|
219
298
|
|
220
|
-
if
|
221
|
-
return
|
299
|
+
if job_def.jobType == _meta.JobType.RUN_FLOW:
|
300
|
+
return job_def.runFlow
|
222
301
|
|
223
|
-
if
|
224
|
-
return
|
302
|
+
if job_def.jobType == _meta.JobType.IMPORT_MODEL:
|
303
|
+
return job_def.importModel
|
225
304
|
|
226
|
-
if
|
227
|
-
return
|
305
|
+
if job_def.jobType == _meta.JobType.IMPORT_DATA:
|
306
|
+
return job_def.importData
|
228
307
|
|
229
|
-
if
|
230
|
-
return
|
308
|
+
if job_def.jobType == _meta.JobType.EXPORT_DATA:
|
309
|
+
return job_def.exportData
|
231
310
|
|
232
|
-
if
|
233
|
-
return
|
311
|
+
if job_def.jobType == _meta.JobType.JOB_GROUP:
|
312
|
+
return job_def.jobGroup
|
234
313
|
|
235
|
-
raise _ex.EConfigParse(f"Could not get job details for job type [{
|
314
|
+
raise _ex.EConfigParse(f"Could not get job details for job type [{job_def.jobType}]")
|
236
315
|
|
237
316
|
@classmethod
|
317
|
+
def _get_job_group_detail(cls, job_group: _meta.JobGroup):
|
318
|
+
|
319
|
+
if job_group.jobGroupType == _meta.JobGroupType.SEQUENTIAL_JOB_GROUP:
|
320
|
+
return job_group.sequential
|
321
|
+
|
322
|
+
if job_group.jobGroupType == _meta.JobGroupType.PARALLEL_JOB_GROUP:
|
323
|
+
return job_group.parallel
|
324
|
+
|
325
|
+
raise _ex.EConfigParse(f"Could not get job group details for group type [{job_group.jobGroupType}]")
|
326
|
+
|
238
327
|
def _process_models(
|
239
|
-
|
240
|
-
sys_config: _cfg.RuntimeConfig,
|
241
|
-
job_config: _cfg.JobConfig,
|
242
|
-
scratch_dir: pathlib.Path,
|
328
|
+
self, job_config: _cfg.JobConfig, job_def: _meta.JobDefinition,
|
243
329
|
model_class: tp.Optional[_api.TracModel.__class__]) \
|
244
|
-
-> _cfg.JobConfig:
|
245
|
-
|
246
|
-
model_loader = _models.ModelLoader(sys_config, scratch_dir)
|
247
|
-
model_loader.create_scope("DEV_MODE_TRANSLATION")
|
330
|
+
-> tp.Tuple[_cfg.JobConfig, _meta.JobDefinition]:
|
248
331
|
|
249
332
|
# This processing works on the assumption that job details follow a convention for addressing models
|
250
333
|
# Jobs requiring a single model have a field called "model"
|
251
334
|
# Jobs requiring multiple models have a field called "models@, which is a dict
|
252
335
|
|
253
|
-
job_detail =
|
336
|
+
job_detail = self._get_job_detail(job_def)
|
254
337
|
|
255
338
|
# If a model class is supplied in code, use that to generate the model def
|
256
339
|
if model_class is not None:
|
257
340
|
|
258
341
|
# Passing a model class via launch_model() is only supported for job types with a single model
|
259
342
|
if not hasattr(job_detail, "model"):
|
260
|
-
raise _ex.EJobValidation(f"Job type [{
|
343
|
+
raise _ex.EJobValidation(f"Job type [{job_def.jobType}] cannot be launched using launch_model()")
|
261
344
|
|
262
|
-
model_id, model_obj =
|
345
|
+
model_id, model_obj = self._generate_model_for_class(model_class)
|
263
346
|
job_detail.model = _util.selector_for(model_id)
|
264
|
-
job_config =
|
347
|
+
job_config = self._add_job_resource(job_config, model_id, model_obj)
|
265
348
|
|
266
349
|
# Otherwise look for models specified as a single string, and take that as the entry point
|
267
350
|
else:
|
268
351
|
|
269
352
|
# Jobs with a single model
|
270
353
|
if hasattr(job_detail, "model") and isinstance(job_detail.model, str):
|
271
|
-
model_id, model_obj =
|
354
|
+
model_id, model_obj = self._generate_model_for_entry_point(job_detail.model) # noqa
|
272
355
|
job_detail.model = _util.selector_for(model_id)
|
273
|
-
job_config =
|
356
|
+
job_config = self._add_job_resource(job_config, model_id, model_obj)
|
274
357
|
|
275
|
-
|
358
|
+
elif hasattr(job_detail, "model") and isinstance(job_detail.model, _meta.TagSelector):
|
359
|
+
if job_detail.model.objectType == _meta.ObjectType.OBJECT_TYPE_NOT_SET:
|
360
|
+
error = f"Missing required property [model] for job type [{job_def.jobType.name}]"
|
361
|
+
self._log.error(error)
|
362
|
+
raise _ex.EJobValidation(error)
|
363
|
+
|
364
|
+
# Jobs with multiple models
|
276
365
|
elif hasattr(job_detail, "models") and isinstance(job_detail.models, dict):
|
277
366
|
for model_key, model_detail in job_detail.models.items():
|
278
367
|
if isinstance(model_detail, str):
|
279
|
-
model_id, model_obj =
|
368
|
+
model_id, model_obj = self._generate_model_for_entry_point(model_detail)
|
280
369
|
job_detail.models[model_key] = _util.selector_for(model_id)
|
281
|
-
job_config =
|
282
|
-
|
283
|
-
model_loader.destroy_scope("DEV_MODE_TRANSLATION")
|
370
|
+
job_config = self._add_job_resource(job_config, model_id, model_obj)
|
284
371
|
|
285
|
-
return job_config
|
372
|
+
return job_config, job_def
|
286
373
|
|
287
|
-
@classmethod
|
288
374
|
def _generate_model_for_class(
|
289
|
-
|
375
|
+
self, model_class: _api.TracModel.__class__) \
|
290
376
|
-> (_meta.TagHeader, _meta.ObjectDefinition):
|
291
377
|
|
292
378
|
model_entry_point = f"{model_class.__module__}.{model_class.__name__}"
|
379
|
+
return self._generate_model_for_entry_point(model_entry_point)
|
293
380
|
|
294
|
-
return cls._generate_model_for_entry_point(model_loader, model_entry_point)
|
295
|
-
|
296
|
-
@classmethod
|
297
381
|
def _generate_model_for_entry_point(
|
298
|
-
|
382
|
+
self, model_entry_point: str) \
|
299
383
|
-> (_meta.TagHeader, _meta.ObjectDefinition):
|
300
384
|
|
301
385
|
model_id = _util.new_object_id(_meta.ObjectType.MODEL)
|
302
386
|
model_key = _util.object_key(model_id)
|
303
387
|
|
304
|
-
|
388
|
+
self._log.info(f"Generating model definition for [{model_entry_point}] with ID = [{model_key}]")
|
305
389
|
|
306
390
|
skeleton_modeL_def = _meta.ModelDefinition( # noqa
|
307
391
|
language="python",
|
@@ -312,8 +396,8 @@ class DevModeTranslator:
|
|
312
396
|
inputs={},
|
313
397
|
outputs={})
|
314
398
|
|
315
|
-
model_class =
|
316
|
-
model_def =
|
399
|
+
model_class = self._model_loader.load_model_class("DEV_MODE_TRANSLATION", skeleton_modeL_def)
|
400
|
+
model_def = self._model_loader.scan_model(skeleton_modeL_def, model_class)
|
317
401
|
|
318
402
|
model_object = _meta.ObjectDefinition(
|
319
403
|
objectType=_meta.ObjectType.MODEL,
|
@@ -321,56 +405,57 @@ class DevModeTranslator:
|
|
321
405
|
|
322
406
|
return model_id, model_object
|
323
407
|
|
324
|
-
|
325
|
-
|
408
|
+
def _process_flow_definition(
|
409
|
+
self, job_config: _cfg.JobConfig, job_def: _meta.JobDefinition) \
|
410
|
+
-> tp.Tuple[_cfg.JobConfig, _meta.JobDefinition]:
|
326
411
|
|
327
|
-
flow_details =
|
412
|
+
flow_details = job_def.runFlow.flow
|
328
413
|
|
329
414
|
# Do not apply translation if flow is specified as an object ID / selector (assume full config is supplied)
|
330
415
|
if isinstance(flow_details, _meta.TagHeader) or isinstance(flow_details, _meta.TagSelector):
|
331
|
-
return job_config
|
416
|
+
return job_config, job_def
|
332
417
|
|
333
418
|
# Otherwise, flow is specified as the path to dev-mode flow definition
|
334
419
|
if not isinstance(flow_details, str):
|
335
420
|
err = f"Invalid config value for [job.runFlow.flow]: Expected path or tag selector, got [{flow_details}])"
|
336
|
-
|
421
|
+
self._log.error(err)
|
337
422
|
raise _ex.EConfigParse(err)
|
338
423
|
|
339
424
|
flow_id = _util.new_object_id(_meta.ObjectType.FLOW)
|
340
425
|
flow_key = _util.object_key(flow_id)
|
341
426
|
|
342
|
-
|
427
|
+
self._log.info(f"Generating flow definition from [{flow_details}] with ID = [{flow_key}]")
|
343
428
|
|
344
|
-
flow_def =
|
429
|
+
flow_def = self._config_mgr.load_config_object(flow_details, _meta.FlowDefinition)
|
345
430
|
|
346
431
|
# Validate models against the flow (this could move to _impl.validation and check prod jobs as well)
|
347
|
-
|
432
|
+
self._check_models_for_flow(flow_def, job_def, job_config)
|
348
433
|
|
349
434
|
# Auto-wiring and inference only applied to externally loaded flows for now
|
350
|
-
flow_def =
|
351
|
-
flow_def =
|
435
|
+
flow_def = self._autowire_flow(flow_def, job_def, job_config)
|
436
|
+
flow_def = self._apply_type_inference(flow_def, job_def, job_config)
|
352
437
|
|
353
438
|
flow_obj = _meta.ObjectDefinition(
|
354
439
|
objectType=_meta.ObjectType.FLOW,
|
355
440
|
flow=flow_def)
|
356
441
|
|
442
|
+
job_def = copy.copy(job_def)
|
443
|
+
job_def.runFlow = copy.copy(job_def.runFlow)
|
444
|
+
job_def.runFlow.flow = _util.selector_for(flow_id)
|
445
|
+
|
357
446
|
job_config = copy.copy(job_config)
|
358
|
-
job_config.job = copy.copy(job_config.job)
|
359
|
-
job_config.job.runFlow = copy.copy(job_config.job.runFlow)
|
360
447
|
job_config.resources = copy.copy(job_config.resources)
|
448
|
+
job_config = self._add_job_resource(job_config, flow_id, flow_obj)
|
361
449
|
|
362
|
-
|
363
|
-
job_config.job.runFlow.flow = _util.selector_for(flow_id)
|
364
|
-
|
365
|
-
return job_config
|
450
|
+
return job_config, job_def
|
366
451
|
|
367
452
|
@classmethod
|
368
|
-
def _check_models_for_flow(cls, flow: _meta.FlowDefinition, job_config: _cfg.JobConfig):
|
453
|
+
def _check_models_for_flow(cls, flow: _meta.FlowDefinition, job_def: _meta.JobDefinition, job_config: _cfg.JobConfig):
|
369
454
|
|
370
455
|
model_nodes = dict(filter(lambda n: n[1].nodeType == _meta.FlowNodeType.MODEL_NODE, flow.nodes.items()))
|
371
456
|
|
372
|
-
missing_models = list(filter(lambda m: m not in
|
373
|
-
extra_models = list(filter(lambda m: m not in model_nodes,
|
457
|
+
missing_models = list(filter(lambda m: m not in job_def.runFlow.models, model_nodes.keys()))
|
458
|
+
extra_models = list(filter(lambda m: m not in model_nodes, job_def.runFlow.models.keys()))
|
374
459
|
|
375
460
|
if any(missing_models):
|
376
461
|
error = f"Missing models in job definition: {', '.join(missing_models)}"
|
@@ -384,7 +469,7 @@ class DevModeTranslator:
|
|
384
469
|
|
385
470
|
for model_name, model_node in model_nodes.items():
|
386
471
|
|
387
|
-
model_selector =
|
472
|
+
model_selector = job_def.runFlow.models[model_name]
|
388
473
|
model_obj = _util.get_job_resource(model_selector, job_config)
|
389
474
|
|
390
475
|
model_inputs = set(model_obj.model.inputs.keys())
|
@@ -396,9 +481,9 @@ class DevModeTranslator:
|
|
396
481
|
raise _ex.EJobValidation(error)
|
397
482
|
|
398
483
|
@classmethod
|
399
|
-
def _autowire_flow(cls, flow: _meta.FlowDefinition, job_config: _cfg.JobConfig):
|
484
|
+
def _autowire_flow(cls, flow: _meta.FlowDefinition, job_def: _meta.JobDefinition, job_config: _cfg.JobConfig):
|
400
485
|
|
401
|
-
job =
|
486
|
+
job = job_def.runFlow
|
402
487
|
nodes = copy.copy(flow.nodes)
|
403
488
|
edges: tp.Dict[str, _meta.FlowEdge] = dict()
|
404
489
|
|
@@ -485,7 +570,10 @@ class DevModeTranslator:
|
|
485
570
|
return autowired_flow
|
486
571
|
|
487
572
|
@classmethod
|
488
|
-
def _apply_type_inference(
|
573
|
+
def _apply_type_inference(
|
574
|
+
cls, flow: _meta.FlowDefinition,
|
575
|
+
job_def: _meta.JobDefinition, job_config: _cfg.JobConfig) \
|
576
|
+
-> _meta.FlowDefinition:
|
489
577
|
|
490
578
|
updated_flow = copy.copy(flow)
|
491
579
|
updated_flow.parameters = copy.copy(flow.parameters)
|
@@ -506,17 +594,17 @@ class DevModeTranslator:
|
|
506
594
|
|
507
595
|
if node.nodeType == _meta.FlowNodeType.PARAMETER_NODE and node_name not in flow.parameters:
|
508
596
|
targets = edges_by_source.get(node_name) or []
|
509
|
-
model_parameter = cls._infer_parameter(node_name, targets, job_config)
|
597
|
+
model_parameter = cls._infer_parameter(node_name, targets, job_def, job_config)
|
510
598
|
updated_flow.parameters[node_name] = model_parameter
|
511
599
|
|
512
600
|
if node.nodeType == _meta.FlowNodeType.INPUT_NODE and node_name not in flow.inputs:
|
513
601
|
targets = edges_by_source.get(node_name) or []
|
514
|
-
model_input = cls._infer_input_schema(node_name, targets, job_config)
|
602
|
+
model_input = cls._infer_input_schema(node_name, targets, job_def, job_config)
|
515
603
|
updated_flow.inputs[node_name] = model_input
|
516
604
|
|
517
605
|
if node.nodeType == _meta.FlowNodeType.OUTPUT_NODE and node_name not in flow.outputs:
|
518
606
|
sources = edges_by_target.get(node_name) or []
|
519
|
-
model_output = cls._infer_output_schema(node_name, sources, job_config)
|
607
|
+
model_output = cls._infer_output_schema(node_name, sources, job_def, job_config)
|
520
608
|
updated_flow.outputs[node_name] = model_output
|
521
609
|
|
522
610
|
return updated_flow
|
@@ -524,13 +612,14 @@ class DevModeTranslator:
|
|
524
612
|
@classmethod
|
525
613
|
def _infer_parameter(
|
526
614
|
cls, param_name: str, targets: tp.List[_meta.FlowSocket],
|
527
|
-
job_config: _cfg.JobConfig)
|
615
|
+
job_def: _meta.JobDefinition, job_config: _cfg.JobConfig) \
|
616
|
+
-> _meta.ModelParameter:
|
528
617
|
|
529
618
|
model_params = []
|
530
619
|
|
531
620
|
for target in targets:
|
532
621
|
|
533
|
-
model_selector =
|
622
|
+
model_selector = job_def.runFlow.models.get(target.node)
|
534
623
|
model_obj = _util.get_job_resource(model_selector, job_config)
|
535
624
|
model_param = model_obj.model.parameters.get(target.socket)
|
536
625
|
model_params.append(model_param)
|
@@ -560,13 +649,14 @@ class DevModeTranslator:
|
|
560
649
|
@classmethod
|
561
650
|
def _infer_input_schema(
|
562
651
|
cls, input_name: str, targets: tp.List[_meta.FlowSocket],
|
563
|
-
job_config: _cfg.JobConfig)
|
652
|
+
job_def: _meta.JobDefinition, job_config: _cfg.JobConfig) \
|
653
|
+
-> _meta.ModelInputSchema:
|
564
654
|
|
565
655
|
model_inputs = []
|
566
656
|
|
567
657
|
for target in targets:
|
568
658
|
|
569
|
-
model_selector =
|
659
|
+
model_selector = job_def.runFlow.models.get(target.node)
|
570
660
|
model_obj = _util.get_job_resource(model_selector, job_config)
|
571
661
|
model_input = model_obj.model.inputs.get(target.socket)
|
572
662
|
model_inputs.append(model_input)
|
@@ -594,13 +684,14 @@ class DevModeTranslator:
|
|
594
684
|
@classmethod
|
595
685
|
def _infer_output_schema(
|
596
686
|
cls, output_name: str, sources: tp.List[_meta.FlowSocket],
|
597
|
-
job_config: _cfg.JobConfig)
|
687
|
+
job_def: _meta.JobDefinition, job_config: _cfg.JobConfig) \
|
688
|
+
-> _meta.ModelOutputSchema:
|
598
689
|
|
599
690
|
model_outputs = []
|
600
691
|
|
601
692
|
for source in sources:
|
602
693
|
|
603
|
-
model_selector =
|
694
|
+
model_selector = job_def.runFlow.models.get(source.node)
|
604
695
|
model_obj = _util.get_job_resource(model_selector, job_config)
|
605
696
|
model_input = model_obj.model.inputs.get(source.socket)
|
606
697
|
model_outputs.append(model_input)
|
@@ -624,11 +715,13 @@ class DevModeTranslator:
|
|
624
715
|
return f"{socket.node}.{socket.socket}" if socket.socket else socket.node
|
625
716
|
|
626
717
|
@classmethod
|
627
|
-
def _process_parameters(
|
718
|
+
def _process_parameters(
|
719
|
+
cls, job_config: _cfg.JobConfig, job_def: _meta.JobDefinition) \
|
720
|
+
-> tp.Tuple[_cfg.JobConfig, _meta.JobDefinition]:
|
628
721
|
|
629
722
|
# This relies on convention for naming properties across similar job types
|
630
723
|
|
631
|
-
job_detail = cls._get_job_detail(
|
724
|
+
job_detail = cls._get_job_detail(job_def)
|
632
725
|
|
633
726
|
if hasattr(job_detail, "model"):
|
634
727
|
model_key = _util.object_key(job_detail.model)
|
@@ -646,7 +739,7 @@ class DevModeTranslator:
|
|
646
739
|
|
647
740
|
job_detail.parameters = cls._process_parameters_dict(param_specs, raw_values)
|
648
741
|
|
649
|
-
return job_config
|
742
|
+
return job_config, job_def
|
650
743
|
|
651
744
|
@classmethod
|
652
745
|
def _process_parameters_dict(
|
@@ -677,10 +770,11 @@ class DevModeTranslator:
|
|
677
770
|
|
678
771
|
return encoded_values
|
679
772
|
|
680
|
-
|
681
|
-
|
773
|
+
def _process_inputs_and_outputs(
|
774
|
+
self, job_config: _cfg.JobConfig, job_def: _meta.JobDefinition) \
|
775
|
+
-> tp.Tuple[_cfg.JobConfig, _meta.JobDefinition]:
|
682
776
|
|
683
|
-
job_detail =
|
777
|
+
job_detail = self._get_job_detail(job_def)
|
684
778
|
|
685
779
|
if hasattr(job_detail, "model"):
|
686
780
|
model_obj = _util.get_job_resource(job_detail.model, job_config)
|
@@ -693,7 +787,7 @@ class DevModeTranslator:
|
|
693
787
|
required_outputs = flow_obj.flow.outputs
|
694
788
|
|
695
789
|
else:
|
696
|
-
return job_config
|
790
|
+
return job_config, job_def
|
697
791
|
|
698
792
|
job_inputs = job_detail.inputs
|
699
793
|
job_outputs = job_detail.outputs
|
@@ -705,8 +799,8 @@ class DevModeTranslator:
|
|
705
799
|
model_input = required_inputs[input_key]
|
706
800
|
input_schema = model_input.schema if model_input and not model_input.dynamic else None
|
707
801
|
|
708
|
-
input_id =
|
709
|
-
|
802
|
+
input_id = self._process_input_or_output(
|
803
|
+
input_key, input_value, job_resources,
|
710
804
|
new_unique_file=False, schema=input_schema)
|
711
805
|
|
712
806
|
job_inputs[input_key] = _util.selector_for(input_id)
|
@@ -717,17 +811,16 @@ class DevModeTranslator:
|
|
717
811
|
model_output= required_outputs[output_key]
|
718
812
|
output_schema = model_output.schema if model_output and not model_output.dynamic else None
|
719
813
|
|
720
|
-
output_id =
|
721
|
-
|
814
|
+
output_id = self._process_input_or_output(
|
815
|
+
output_key, output_value, job_resources,
|
722
816
|
new_unique_file=True, schema=output_schema)
|
723
817
|
|
724
818
|
job_outputs[output_key] = _util.selector_for(output_id)
|
725
819
|
|
726
|
-
return job_config
|
820
|
+
return job_config, job_def
|
727
821
|
|
728
|
-
@classmethod
|
729
822
|
def _process_input_or_output(
|
730
|
-
|
823
|
+
self, data_key, data_value,
|
731
824
|
resources: tp.Dict[str, _meta.ObjectDefinition],
|
732
825
|
new_unique_file=False,
|
733
826
|
schema: tp.Optional[_meta.SchemaDefinition] = None) \
|
@@ -738,8 +831,8 @@ class DevModeTranslator:
|
|
738
831
|
|
739
832
|
if isinstance(data_value, str):
|
740
833
|
storage_path = data_value
|
741
|
-
storage_key =
|
742
|
-
storage_format =
|
834
|
+
storage_key = self._sys_config.storage.defaultBucket
|
835
|
+
storage_format = self.infer_format(storage_path, self._sys_config.storage)
|
743
836
|
snap_version = 1
|
744
837
|
|
745
838
|
elif isinstance(data_value, dict):
|
@@ -749,14 +842,14 @@ class DevModeTranslator:
|
|
749
842
|
if not storage_path:
|
750
843
|
raise _ex.EConfigParse(f"Invalid configuration for input [{data_key}] (missing required value 'path'")
|
751
844
|
|
752
|
-
storage_key = data_value.get("storageKey") or
|
753
|
-
storage_format = data_value.get("format") or
|
845
|
+
storage_key = data_value.get("storageKey") or self._sys_config.storage.defaultBucket
|
846
|
+
storage_format = data_value.get("format") or self.infer_format(storage_path, self._sys_config.storage)
|
754
847
|
snap_version = 1
|
755
848
|
|
756
849
|
else:
|
757
850
|
raise _ex.EConfigParse(f"Invalid configuration for input '{data_key}'")
|
758
851
|
|
759
|
-
|
852
|
+
self._log.info(f"Generating data definition for [{data_key}] with ID = [{_util.object_key(data_id)}]")
|
760
853
|
|
761
854
|
# For unique outputs, increment the snap number to find a new unique snap
|
762
855
|
# These are not incarnations, bc likely in dev mode model code and inputs are changing
|
@@ -764,7 +857,7 @@ class DevModeTranslator:
|
|
764
857
|
|
765
858
|
if new_unique_file:
|
766
859
|
|
767
|
-
x_storage_mgr = _storage.StorageManager(
|
860
|
+
x_storage_mgr = _storage.StorageManager(self._sys_config)
|
768
861
|
x_storage = x_storage_mgr.get_file_storage(storage_key)
|
769
862
|
x_orig_path = pathlib.PurePath(storage_path)
|
770
863
|
x_name = x_orig_path.name
|
@@ -781,9 +874,9 @@ class DevModeTranslator:
|
|
781
874
|
x_name = f"{x_orig_path.stem}-{snap_version}"
|
782
875
|
storage_path = str(x_orig_path.parent.joinpath(x_name))
|
783
876
|
|
784
|
-
|
877
|
+
self._log.info(f"Output for [{data_key}] will be snap version {snap_version}")
|
785
878
|
|
786
|
-
data_obj, storage_obj =
|
879
|
+
data_obj, storage_obj = self._generate_input_definition(
|
787
880
|
data_id, storage_id, storage_key, storage_path, storage_format,
|
788
881
|
snap_index=snap_version, delta_index=1, incarnation_index=1,
|
789
882
|
schema=schema)
|