tracdap-runtime 0.6.5__py3-none-any.whl → 0.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tracdap/rt/__init__.py +6 -5
- tracdap/rt/_exec/actors.py +6 -5
- tracdap/rt/_exec/context.py +278 -110
- tracdap/rt/_exec/dev_mode.py +237 -143
- tracdap/rt/_exec/engine.py +223 -64
- tracdap/rt/_exec/functions.py +31 -6
- tracdap/rt/_exec/graph.py +15 -5
- tracdap/rt/_exec/graph_builder.py +301 -203
- tracdap/rt/_exec/runtime.py +13 -10
- tracdap/rt/_exec/server.py +6 -5
- tracdap/rt/_impl/__init__.py +6 -5
- tracdap/rt/_impl/config_parser.py +17 -9
- tracdap/rt/_impl/data.py +284 -172
- tracdap/rt/_impl/ext/__init__.py +14 -0
- tracdap/rt/_impl/ext/sql.py +117 -0
- tracdap/rt/_impl/ext/storage.py +58 -0
- tracdap/rt/_impl/grpc/__init__.py +6 -5
- tracdap/rt/_impl/grpc/codec.py +6 -5
- tracdap/rt/_impl/grpc/tracdap/metadata/job_pb2.py +62 -54
- tracdap/rt/_impl/grpc/tracdap/metadata/job_pb2.pyi +37 -2
- tracdap/rt/_impl/guard_rails.py +6 -5
- tracdap/rt/_impl/models.py +6 -5
- tracdap/rt/_impl/repos.py +6 -5
- tracdap/rt/_impl/schemas.py +6 -5
- tracdap/rt/_impl/shim.py +6 -5
- tracdap/rt/_impl/static_api.py +30 -16
- tracdap/rt/_impl/storage.py +8 -7
- tracdap/rt/_impl/type_system.py +6 -5
- tracdap/rt/_impl/util.py +16 -5
- tracdap/rt/_impl/validation.py +72 -18
- tracdap/rt/_plugins/__init__.py +6 -5
- tracdap/rt/_plugins/_helpers.py +6 -5
- tracdap/rt/_plugins/config_local.py +6 -5
- tracdap/rt/_plugins/format_arrow.py +6 -5
- tracdap/rt/_plugins/format_csv.py +6 -5
- tracdap/rt/_plugins/format_parquet.py +6 -5
- tracdap/rt/_plugins/repo_git.py +6 -5
- tracdap/rt/_plugins/repo_local.py +6 -5
- tracdap/rt/_plugins/repo_pypi.py +6 -5
- tracdap/rt/_plugins/storage_aws.py +6 -5
- tracdap/rt/_plugins/storage_azure.py +6 -5
- tracdap/rt/_plugins/storage_gcp.py +6 -5
- tracdap/rt/_plugins/storage_local.py +6 -5
- tracdap/rt/_plugins/storage_sql.py +418 -0
- tracdap/rt/_plugins/storage_sql_dialects.py +118 -0
- tracdap/rt/_version.py +7 -6
- tracdap/rt/api/__init__.py +23 -5
- tracdap/rt/api/experimental.py +85 -37
- tracdap/rt/api/hook.py +16 -5
- tracdap/rt/api/model_api.py +110 -90
- tracdap/rt/api/static_api.py +142 -100
- tracdap/rt/config/common.py +26 -27
- tracdap/rt/config/job.py +5 -6
- tracdap/rt/config/platform.py +41 -42
- tracdap/rt/config/result.py +5 -6
- tracdap/rt/config/runtime.py +6 -7
- tracdap/rt/exceptions.py +13 -7
- tracdap/rt/ext/__init__.py +6 -5
- tracdap/rt/ext/config.py +6 -5
- tracdap/rt/ext/embed.py +6 -5
- tracdap/rt/ext/plugins.py +6 -5
- tracdap/rt/ext/repos.py +6 -5
- tracdap/rt/ext/storage.py +6 -5
- tracdap/rt/launch/__init__.py +10 -5
- tracdap/rt/launch/__main__.py +6 -5
- tracdap/rt/launch/cli.py +6 -5
- tracdap/rt/launch/launch.py +38 -15
- tracdap/rt/metadata/__init__.py +4 -0
- tracdap/rt/metadata/common.py +2 -3
- tracdap/rt/metadata/custom.py +3 -4
- tracdap/rt/metadata/data.py +30 -31
- tracdap/rt/metadata/file.py +6 -7
- tracdap/rt/metadata/flow.py +22 -23
- tracdap/rt/metadata/job.py +89 -45
- tracdap/rt/metadata/model.py +26 -27
- tracdap/rt/metadata/object.py +11 -12
- tracdap/rt/metadata/object_id.py +23 -24
- tracdap/rt/metadata/resource.py +0 -1
- tracdap/rt/metadata/search.py +15 -16
- tracdap/rt/metadata/stoarge.py +22 -23
- tracdap/rt/metadata/tag.py +8 -9
- tracdap/rt/metadata/tag_update.py +11 -12
- tracdap/rt/metadata/type.py +38 -38
- {tracdap_runtime-0.6.5.dist-info → tracdap_runtime-0.7.0.dist-info}/LICENSE +1 -1
- {tracdap_runtime-0.6.5.dist-info → tracdap_runtime-0.7.0.dist-info}/METADATA +4 -2
- tracdap_runtime-0.7.0.dist-info/RECORD +121 -0
- {tracdap_runtime-0.6.5.dist-info → tracdap_runtime-0.7.0.dist-info}/WHEEL +1 -1
- tracdap_runtime-0.6.5.dist-info/RECORD +0 -116
- {tracdap_runtime-0.6.5.dist-info → tracdap_runtime-0.7.0.dist-info}/top_level.txt +0 -0
tracdap/rt/_exec/dev_mode.py
CHANGED
@@ -1,8 +1,9 @@
|
|
1
|
-
#
|
2
|
-
#
|
3
|
-
#
|
4
|
-
#
|
5
|
-
#
|
1
|
+
# Licensed to the Fintech Open Source Foundation (FINOS) under one or
|
2
|
+
# more contributor license agreements. See the NOTICE file distributed
|
3
|
+
# with this work for additional information regarding copyright ownership.
|
4
|
+
# FINOS licenses this file to you under the Apache License, Version 2.0
|
5
|
+
# (the "License"); you may not use this file except in compliance with the
|
6
|
+
# License. You may obtain a copy of the License at
|
6
7
|
#
|
7
8
|
# http://www.apache.org/licenses/LICENSE-2.0
|
8
9
|
#
|
@@ -34,7 +35,15 @@ DEV_MODE_JOB_CONFIG = [
|
|
34
35
|
re.compile(r"job\.\w+\.outputs\.\w+"),
|
35
36
|
re.compile(r"job\.\w+\.models\.\w+"),
|
36
37
|
re.compile(r"job\.\w+\.model"),
|
37
|
-
re.compile(r"job\.\w+\.flow")
|
38
|
+
re.compile(r"job\.\w+\.flow"),
|
39
|
+
|
40
|
+
re.compile(r".*\.jobs\.\d+\.\w+\.parameters\.\w+"),
|
41
|
+
re.compile(r".*\.jobs\.\d+\.\w+\.inputs\.\w+"),
|
42
|
+
re.compile(r".*\.jobs\.\d+\.\w+\.outputs\.\w+"),
|
43
|
+
re.compile(r".*\.jobs\.\d+\.\w+\.models\.\w+"),
|
44
|
+
re.compile(r".*\.jobs\.\d+\.\w+\.model"),
|
45
|
+
re.compile(r".*\.jobs\.\d+\.\w+\.flow")
|
46
|
+
]
|
38
47
|
|
39
48
|
DEV_MODE_SYS_CONFIG = []
|
40
49
|
|
@@ -58,38 +67,6 @@ class DevModeTranslator:
|
|
58
67
|
|
59
68
|
return sys_config
|
60
69
|
|
61
|
-
@classmethod
|
62
|
-
def translate_job_config(
|
63
|
-
cls,
|
64
|
-
sys_config: _cfg.RuntimeConfig,
|
65
|
-
job_config: _cfg.JobConfig,
|
66
|
-
scratch_dir: pathlib.Path,
|
67
|
-
config_mgr: _cfg_p.ConfigManager,
|
68
|
-
model_class: tp.Optional[_api.TracModel.__class__]) \
|
69
|
-
-> _cfg.JobConfig:
|
70
|
-
|
71
|
-
cls._log.info(f"Applying dev mode config translation to job config")
|
72
|
-
|
73
|
-
# Protobuf semantics for a blank jobId should be an object, but objectId will be an empty string
|
74
|
-
if not job_config.jobId or not job_config.jobId.objectId:
|
75
|
-
job_config = cls._process_job_id(job_config)
|
76
|
-
|
77
|
-
if job_config.job.jobType is None or job_config.job.jobType == _meta.JobType.JOB_TYPE_NOT_SET:
|
78
|
-
job_config = cls._process_job_type(job_config)
|
79
|
-
|
80
|
-
# Load and populate any models provided as a Python class or class name
|
81
|
-
job_config = cls._process_models(sys_config, job_config, scratch_dir, model_class)
|
82
|
-
|
83
|
-
# Fow flows, load external flow definitions then perform auto-wiring and type inference
|
84
|
-
if job_config.job.jobType == _meta.JobType.RUN_FLOW:
|
85
|
-
job_config = cls._process_flow_definition(job_config, config_mgr)
|
86
|
-
|
87
|
-
# Apply processing to the parameters, inputs and outputs
|
88
|
-
job_config = cls._process_parameters(job_config)
|
89
|
-
job_config = cls._process_inputs_and_outputs(sys_config, job_config)
|
90
|
-
|
91
|
-
return job_config
|
92
|
-
|
93
70
|
@classmethod
|
94
71
|
def _add_integrated_repo(cls, sys_config: _cfg.RuntimeConfig) -> _cfg.RuntimeConfig:
|
95
72
|
|
@@ -159,6 +136,86 @@ class DevModeTranslator:
|
|
159
136
|
cls._log.error(msg)
|
160
137
|
raise _ex.EConfigParse(msg)
|
161
138
|
|
139
|
+
|
140
|
+
def __init__(self, sys_config: _cfg.RuntimeConfig, config_mgr: _cfg_p.ConfigManager, scratch_dir: pathlib.Path):
|
141
|
+
self._sys_config = sys_config
|
142
|
+
self._config_mgr = config_mgr
|
143
|
+
self._scratch_dir = scratch_dir
|
144
|
+
self._model_loader: tp.Optional[_models.ModelLoader] = None
|
145
|
+
|
146
|
+
def translate_job_config(
|
147
|
+
self, job_config: _cfg.JobConfig,
|
148
|
+
model_class: tp.Optional[_api.TracModel.__class__] = None) \
|
149
|
+
-> _cfg.JobConfig:
|
150
|
+
|
151
|
+
try:
|
152
|
+
self._log.info(f"Applying dev mode config translation to job config")
|
153
|
+
|
154
|
+
self._model_loader = _models.ModelLoader(self._sys_config, self._scratch_dir)
|
155
|
+
self._model_loader.create_scope("DEV_MODE_TRANSLATION")
|
156
|
+
|
157
|
+
job_config = copy.deepcopy(job_config)
|
158
|
+
job_def = job_config.job
|
159
|
+
|
160
|
+
# Protobuf semantics for a blank jobId should be an object, but objectId will be an empty string
|
161
|
+
if not job_config.jobId or not job_config.jobId.objectId:
|
162
|
+
job_config = self._process_job_id(job_config)
|
163
|
+
|
164
|
+
job_config, job_def = self.translate_job_def(job_config, job_def, model_class)
|
165
|
+
job_config.job = job_def
|
166
|
+
|
167
|
+
return job_config
|
168
|
+
|
169
|
+
finally:
|
170
|
+
self._model_loader.destroy_scope("DEV_MODE_TRANSLATION")
|
171
|
+
self._model_loader = None
|
172
|
+
|
173
|
+
def translate_job_def(
|
174
|
+
self, job_config: _cfg.JobConfig, job_def: _meta.JobDefinition,
|
175
|
+
model_class: tp.Optional[_api.TracModel.__class__] = None) \
|
176
|
+
-> tp.Tuple[_cfg.JobConfig, _meta.JobDefinition]:
|
177
|
+
|
178
|
+
if job_def.jobType is None or job_def.jobType == _meta.JobType.JOB_TYPE_NOT_SET:
|
179
|
+
job_def = self._process_job_type(job_def)
|
180
|
+
|
181
|
+
# Load and populate any models provided as a Python class or class name
|
182
|
+
job_config, job_def = self._process_models(job_config, job_def, model_class)
|
183
|
+
|
184
|
+
# Fow flows, load external flow definitions then perform auto-wiring and type inference
|
185
|
+
if job_def.jobType == _meta.JobType.RUN_FLOW:
|
186
|
+
job_config, job_def = self._process_flow_definition(job_config, job_def)
|
187
|
+
|
188
|
+
if job_def.jobType == _meta.JobType.JOB_GROUP:
|
189
|
+
job_config, job_def = self.translate_job_group(job_config, job_def)
|
190
|
+
|
191
|
+
# Apply processing to the parameters, inputs and outputs
|
192
|
+
job_config, job_def = self._process_parameters(job_config, job_def)
|
193
|
+
job_config, job_def = self._process_inputs_and_outputs(job_config, job_def)
|
194
|
+
|
195
|
+
return job_config, job_def
|
196
|
+
|
197
|
+
def translate_job_group(
|
198
|
+
self, job_config: _cfg.JobConfig, job_def: _meta.JobDefinition) \
|
199
|
+
-> tp.Tuple[_cfg.JobConfig, _meta.JobDefinition]:
|
200
|
+
|
201
|
+
job_group = job_def.jobGroup
|
202
|
+
|
203
|
+
if job_group.jobGroupType is None or job_group.jobGroupType == _meta.JobGroupType.JOB_GROUP_TYPE_NOT_SET:
|
204
|
+
job_group = self._process_job_group_type(job_group)
|
205
|
+
|
206
|
+
group_details = self._get_job_group_detail(job_group)
|
207
|
+
|
208
|
+
if hasattr(group_details, "jobs"):
|
209
|
+
child_jobs = []
|
210
|
+
for child_def in group_details.jobs:
|
211
|
+
job_config, child_def = self.translate_job_def(job_config, child_def)
|
212
|
+
child_jobs.append(child_def)
|
213
|
+
group_details.jobs = child_jobs
|
214
|
+
|
215
|
+
job_def.jobGroup = job_group
|
216
|
+
|
217
|
+
return job_config, job_def
|
218
|
+
|
162
219
|
@classmethod
|
163
220
|
def _add_job_resource(
|
164
221
|
cls, job_config: _cfg.JobConfig,
|
@@ -183,125 +240,153 @@ class DevModeTranslator:
|
|
183
240
|
return translated_config
|
184
241
|
|
185
242
|
@classmethod
|
186
|
-
def _process_job_type(cls,
|
243
|
+
def _process_job_type(cls, job_def: _meta.JobDefinition):
|
187
244
|
|
188
|
-
if
|
245
|
+
if job_def.runModel is not None:
|
189
246
|
job_type = _meta.JobType.RUN_MODEL
|
190
247
|
|
191
|
-
elif
|
248
|
+
elif job_def.runFlow is not None:
|
192
249
|
job_type = _meta.JobType.RUN_FLOW
|
193
250
|
|
194
|
-
elif
|
251
|
+
elif job_def.importModel is not None:
|
195
252
|
job_type = _meta.JobType.IMPORT_MODEL
|
196
253
|
|
197
|
-
elif
|
254
|
+
elif job_def.importData is not None:
|
198
255
|
job_type = _meta.JobType.IMPORT_DATA
|
199
256
|
|
200
|
-
elif
|
257
|
+
elif job_def.exportData is not None:
|
201
258
|
job_type = _meta.JobType.EXPORT_DATA
|
202
259
|
|
260
|
+
elif job_def.jobGroup is not None:
|
261
|
+
job_type = _meta.JobType.JOB_GROUP
|
262
|
+
|
203
263
|
else:
|
204
264
|
cls._log.error("Could not infer job type")
|
205
265
|
raise _ex.EConfigParse("Could not infer job type")
|
206
266
|
|
207
267
|
cls._log.info(f"Inferred job type = [{job_type.name}]")
|
208
268
|
|
209
|
-
job_def = copy.copy(
|
269
|
+
job_def = copy.copy(job_def)
|
210
270
|
job_def.jobType = job_type
|
211
271
|
|
212
|
-
|
213
|
-
job_config.job = job_def
|
272
|
+
return job_def
|
214
273
|
|
215
|
-
|
274
|
+
@classmethod
|
275
|
+
def _process_job_group_type(cls, job_group: _meta.JobGroup) -> _meta.JobGroup:
|
276
|
+
|
277
|
+
if job_group.sequential is not None:
|
278
|
+
job_group_type = _meta.JobGroupType.SEQUENTIAL_JOB_GROUP
|
279
|
+
|
280
|
+
elif job_group.parallel is not None:
|
281
|
+
job_group_type = _meta.JobGroupType.PARALLEL_JOB_GROUP
|
282
|
+
|
283
|
+
else:
|
284
|
+
cls._log.error("Could not infer job group type")
|
285
|
+
raise _ex.EConfigParse("Could not infer job group type")
|
286
|
+
|
287
|
+
cls._log.info(f"Inferred job group type = [{job_group_type.name}]")
|
288
|
+
|
289
|
+
job_group = copy.copy(job_group)
|
290
|
+
job_group.jobGroupType = job_group_type
|
291
|
+
|
292
|
+
return job_group
|
216
293
|
|
217
294
|
@classmethod
|
218
|
-
def _get_job_detail(cls,
|
295
|
+
def _get_job_detail(cls, job_def: _meta.JobDefinition):
|
296
|
+
|
297
|
+
if job_def.jobType == _meta.JobType.RUN_MODEL:
|
298
|
+
return job_def.runModel
|
219
299
|
|
220
|
-
if
|
221
|
-
return
|
300
|
+
if job_def.jobType == _meta.JobType.RUN_FLOW:
|
301
|
+
return job_def.runFlow
|
222
302
|
|
223
|
-
if
|
224
|
-
return
|
303
|
+
if job_def.jobType == _meta.JobType.IMPORT_MODEL:
|
304
|
+
return job_def.importModel
|
225
305
|
|
226
|
-
if
|
227
|
-
return
|
306
|
+
if job_def.jobType == _meta.JobType.IMPORT_DATA:
|
307
|
+
return job_def.importData
|
228
308
|
|
229
|
-
if
|
230
|
-
return
|
309
|
+
if job_def.jobType == _meta.JobType.EXPORT_DATA:
|
310
|
+
return job_def.exportData
|
231
311
|
|
232
|
-
if
|
233
|
-
return
|
312
|
+
if job_def.jobType == _meta.JobType.JOB_GROUP:
|
313
|
+
return job_def.jobGroup
|
234
314
|
|
235
|
-
raise _ex.EConfigParse(f"Could not get job details for job type [{
|
315
|
+
raise _ex.EConfigParse(f"Could not get job details for job type [{job_def.jobType}]")
|
236
316
|
|
237
317
|
@classmethod
|
318
|
+
def _get_job_group_detail(cls, job_group: _meta.JobGroup):
|
319
|
+
|
320
|
+
if job_group.jobGroupType == _meta.JobGroupType.SEQUENTIAL_JOB_GROUP:
|
321
|
+
return job_group.sequential
|
322
|
+
|
323
|
+
if job_group.jobGroupType == _meta.JobGroupType.PARALLEL_JOB_GROUP:
|
324
|
+
return job_group.parallel
|
325
|
+
|
326
|
+
raise _ex.EConfigParse(f"Could not get job group details for group type [{job_group.jobGroupType}]")
|
327
|
+
|
238
328
|
def _process_models(
|
239
|
-
|
240
|
-
sys_config: _cfg.RuntimeConfig,
|
241
|
-
job_config: _cfg.JobConfig,
|
242
|
-
scratch_dir: pathlib.Path,
|
329
|
+
self, job_config: _cfg.JobConfig, job_def: _meta.JobDefinition,
|
243
330
|
model_class: tp.Optional[_api.TracModel.__class__]) \
|
244
|
-
-> _cfg.JobConfig:
|
245
|
-
|
246
|
-
model_loader = _models.ModelLoader(sys_config, scratch_dir)
|
247
|
-
model_loader.create_scope("DEV_MODE_TRANSLATION")
|
331
|
+
-> tp.Tuple[_cfg.JobConfig, _meta.JobDefinition]:
|
248
332
|
|
249
333
|
# This processing works on the assumption that job details follow a convention for addressing models
|
250
334
|
# Jobs requiring a single model have a field called "model"
|
251
335
|
# Jobs requiring multiple models have a field called "models@, which is a dict
|
252
336
|
|
253
|
-
job_detail =
|
337
|
+
job_detail = self._get_job_detail(job_def)
|
254
338
|
|
255
339
|
# If a model class is supplied in code, use that to generate the model def
|
256
340
|
if model_class is not None:
|
257
341
|
|
258
342
|
# Passing a model class via launch_model() is only supported for job types with a single model
|
259
343
|
if not hasattr(job_detail, "model"):
|
260
|
-
raise _ex.EJobValidation(f"Job type [{
|
344
|
+
raise _ex.EJobValidation(f"Job type [{job_def.jobType}] cannot be launched using launch_model()")
|
261
345
|
|
262
|
-
model_id, model_obj =
|
346
|
+
model_id, model_obj = self._generate_model_for_class(model_class)
|
263
347
|
job_detail.model = _util.selector_for(model_id)
|
264
|
-
job_config =
|
348
|
+
job_config = self._add_job_resource(job_config, model_id, model_obj)
|
265
349
|
|
266
350
|
# Otherwise look for models specified as a single string, and take that as the entry point
|
267
351
|
else:
|
268
352
|
|
269
353
|
# Jobs with a single model
|
270
354
|
if hasattr(job_detail, "model") and isinstance(job_detail.model, str):
|
271
|
-
model_id, model_obj =
|
355
|
+
model_id, model_obj = self._generate_model_for_entry_point(job_detail.model) # noqa
|
272
356
|
job_detail.model = _util.selector_for(model_id)
|
273
|
-
job_config =
|
357
|
+
job_config = self._add_job_resource(job_config, model_id, model_obj)
|
274
358
|
|
275
|
-
|
359
|
+
elif hasattr(job_detail, "model") and isinstance(job_detail.model, _meta.TagSelector):
|
360
|
+
if job_detail.model.objectType == _meta.ObjectType.OBJECT_TYPE_NOT_SET:
|
361
|
+
error = f"Missing required property [model] for job type [{job_def.jobType.name}]"
|
362
|
+
self._log.error(error)
|
363
|
+
raise _ex.EJobValidation(error)
|
364
|
+
|
365
|
+
# Jobs with multiple models
|
276
366
|
elif hasattr(job_detail, "models") and isinstance(job_detail.models, dict):
|
277
367
|
for model_key, model_detail in job_detail.models.items():
|
278
368
|
if isinstance(model_detail, str):
|
279
|
-
model_id, model_obj =
|
369
|
+
model_id, model_obj = self._generate_model_for_entry_point(model_detail)
|
280
370
|
job_detail.models[model_key] = _util.selector_for(model_id)
|
281
|
-
job_config =
|
282
|
-
|
283
|
-
model_loader.destroy_scope("DEV_MODE_TRANSLATION")
|
371
|
+
job_config = self._add_job_resource(job_config, model_id, model_obj)
|
284
372
|
|
285
|
-
return job_config
|
373
|
+
return job_config, job_def
|
286
374
|
|
287
|
-
@classmethod
|
288
375
|
def _generate_model_for_class(
|
289
|
-
|
376
|
+
self, model_class: _api.TracModel.__class__) \
|
290
377
|
-> (_meta.TagHeader, _meta.ObjectDefinition):
|
291
378
|
|
292
379
|
model_entry_point = f"{model_class.__module__}.{model_class.__name__}"
|
380
|
+
return self._generate_model_for_entry_point(model_entry_point)
|
293
381
|
|
294
|
-
return cls._generate_model_for_entry_point(model_loader, model_entry_point)
|
295
|
-
|
296
|
-
@classmethod
|
297
382
|
def _generate_model_for_entry_point(
|
298
|
-
|
383
|
+
self, model_entry_point: str) \
|
299
384
|
-> (_meta.TagHeader, _meta.ObjectDefinition):
|
300
385
|
|
301
386
|
model_id = _util.new_object_id(_meta.ObjectType.MODEL)
|
302
387
|
model_key = _util.object_key(model_id)
|
303
388
|
|
304
|
-
|
389
|
+
self._log.info(f"Generating model definition for [{model_entry_point}] with ID = [{model_key}]")
|
305
390
|
|
306
391
|
skeleton_modeL_def = _meta.ModelDefinition( # noqa
|
307
392
|
language="python",
|
@@ -312,8 +397,8 @@ class DevModeTranslator:
|
|
312
397
|
inputs={},
|
313
398
|
outputs={})
|
314
399
|
|
315
|
-
model_class =
|
316
|
-
model_def =
|
400
|
+
model_class = self._model_loader.load_model_class("DEV_MODE_TRANSLATION", skeleton_modeL_def)
|
401
|
+
model_def = self._model_loader.scan_model(skeleton_modeL_def, model_class)
|
317
402
|
|
318
403
|
model_object = _meta.ObjectDefinition(
|
319
404
|
objectType=_meta.ObjectType.MODEL,
|
@@ -321,56 +406,57 @@ class DevModeTranslator:
|
|
321
406
|
|
322
407
|
return model_id, model_object
|
323
408
|
|
324
|
-
|
325
|
-
|
409
|
+
def _process_flow_definition(
|
410
|
+
self, job_config: _cfg.JobConfig, job_def: _meta.JobDefinition) \
|
411
|
+
-> tp.Tuple[_cfg.JobConfig, _meta.JobDefinition]:
|
326
412
|
|
327
|
-
flow_details =
|
413
|
+
flow_details = job_def.runFlow.flow
|
328
414
|
|
329
415
|
# Do not apply translation if flow is specified as an object ID / selector (assume full config is supplied)
|
330
416
|
if isinstance(flow_details, _meta.TagHeader) or isinstance(flow_details, _meta.TagSelector):
|
331
|
-
return job_config
|
417
|
+
return job_config, job_def
|
332
418
|
|
333
419
|
# Otherwise, flow is specified as the path to dev-mode flow definition
|
334
420
|
if not isinstance(flow_details, str):
|
335
421
|
err = f"Invalid config value for [job.runFlow.flow]: Expected path or tag selector, got [{flow_details}])"
|
336
|
-
|
422
|
+
self._log.error(err)
|
337
423
|
raise _ex.EConfigParse(err)
|
338
424
|
|
339
425
|
flow_id = _util.new_object_id(_meta.ObjectType.FLOW)
|
340
426
|
flow_key = _util.object_key(flow_id)
|
341
427
|
|
342
|
-
|
428
|
+
self._log.info(f"Generating flow definition from [{flow_details}] with ID = [{flow_key}]")
|
343
429
|
|
344
|
-
flow_def =
|
430
|
+
flow_def = self._config_mgr.load_config_object(flow_details, _meta.FlowDefinition)
|
345
431
|
|
346
432
|
# Validate models against the flow (this could move to _impl.validation and check prod jobs as well)
|
347
|
-
|
433
|
+
self._check_models_for_flow(flow_def, job_def, job_config)
|
348
434
|
|
349
435
|
# Auto-wiring and inference only applied to externally loaded flows for now
|
350
|
-
flow_def =
|
351
|
-
flow_def =
|
436
|
+
flow_def = self._autowire_flow(flow_def, job_def, job_config)
|
437
|
+
flow_def = self._apply_type_inference(flow_def, job_def, job_config)
|
352
438
|
|
353
439
|
flow_obj = _meta.ObjectDefinition(
|
354
440
|
objectType=_meta.ObjectType.FLOW,
|
355
441
|
flow=flow_def)
|
356
442
|
|
443
|
+
job_def = copy.copy(job_def)
|
444
|
+
job_def.runFlow = copy.copy(job_def.runFlow)
|
445
|
+
job_def.runFlow.flow = _util.selector_for(flow_id)
|
446
|
+
|
357
447
|
job_config = copy.copy(job_config)
|
358
|
-
job_config.job = copy.copy(job_config.job)
|
359
|
-
job_config.job.runFlow = copy.copy(job_config.job.runFlow)
|
360
448
|
job_config.resources = copy.copy(job_config.resources)
|
449
|
+
job_config = self._add_job_resource(job_config, flow_id, flow_obj)
|
361
450
|
|
362
|
-
|
363
|
-
job_config.job.runFlow.flow = _util.selector_for(flow_id)
|
364
|
-
|
365
|
-
return job_config
|
451
|
+
return job_config, job_def
|
366
452
|
|
367
453
|
@classmethod
|
368
|
-
def _check_models_for_flow(cls, flow: _meta.FlowDefinition, job_config: _cfg.JobConfig):
|
454
|
+
def _check_models_for_flow(cls, flow: _meta.FlowDefinition, job_def: _meta.JobDefinition, job_config: _cfg.JobConfig):
|
369
455
|
|
370
456
|
model_nodes = dict(filter(lambda n: n[1].nodeType == _meta.FlowNodeType.MODEL_NODE, flow.nodes.items()))
|
371
457
|
|
372
|
-
missing_models = list(filter(lambda m: m not in
|
373
|
-
extra_models = list(filter(lambda m: m not in model_nodes,
|
458
|
+
missing_models = list(filter(lambda m: m not in job_def.runFlow.models, model_nodes.keys()))
|
459
|
+
extra_models = list(filter(lambda m: m not in model_nodes, job_def.runFlow.models.keys()))
|
374
460
|
|
375
461
|
if any(missing_models):
|
376
462
|
error = f"Missing models in job definition: {', '.join(missing_models)}"
|
@@ -384,7 +470,7 @@ class DevModeTranslator:
|
|
384
470
|
|
385
471
|
for model_name, model_node in model_nodes.items():
|
386
472
|
|
387
|
-
model_selector =
|
473
|
+
model_selector = job_def.runFlow.models[model_name]
|
388
474
|
model_obj = _util.get_job_resource(model_selector, job_config)
|
389
475
|
|
390
476
|
model_inputs = set(model_obj.model.inputs.keys())
|
@@ -396,9 +482,9 @@ class DevModeTranslator:
|
|
396
482
|
raise _ex.EJobValidation(error)
|
397
483
|
|
398
484
|
@classmethod
|
399
|
-
def _autowire_flow(cls, flow: _meta.FlowDefinition, job_config: _cfg.JobConfig):
|
485
|
+
def _autowire_flow(cls, flow: _meta.FlowDefinition, job_def: _meta.JobDefinition, job_config: _cfg.JobConfig):
|
400
486
|
|
401
|
-
job =
|
487
|
+
job = job_def.runFlow
|
402
488
|
nodes = copy.copy(flow.nodes)
|
403
489
|
edges: tp.Dict[str, _meta.FlowEdge] = dict()
|
404
490
|
|
@@ -485,7 +571,10 @@ class DevModeTranslator:
|
|
485
571
|
return autowired_flow
|
486
572
|
|
487
573
|
@classmethod
|
488
|
-
def _apply_type_inference(
|
574
|
+
def _apply_type_inference(
|
575
|
+
cls, flow: _meta.FlowDefinition,
|
576
|
+
job_def: _meta.JobDefinition, job_config: _cfg.JobConfig) \
|
577
|
+
-> _meta.FlowDefinition:
|
489
578
|
|
490
579
|
updated_flow = copy.copy(flow)
|
491
580
|
updated_flow.parameters = copy.copy(flow.parameters)
|
@@ -506,17 +595,17 @@ class DevModeTranslator:
|
|
506
595
|
|
507
596
|
if node.nodeType == _meta.FlowNodeType.PARAMETER_NODE and node_name not in flow.parameters:
|
508
597
|
targets = edges_by_source.get(node_name) or []
|
509
|
-
model_parameter = cls._infer_parameter(node_name, targets, job_config)
|
598
|
+
model_parameter = cls._infer_parameter(node_name, targets, job_def, job_config)
|
510
599
|
updated_flow.parameters[node_name] = model_parameter
|
511
600
|
|
512
601
|
if node.nodeType == _meta.FlowNodeType.INPUT_NODE and node_name not in flow.inputs:
|
513
602
|
targets = edges_by_source.get(node_name) or []
|
514
|
-
model_input = cls._infer_input_schema(node_name, targets, job_config)
|
603
|
+
model_input = cls._infer_input_schema(node_name, targets, job_def, job_config)
|
515
604
|
updated_flow.inputs[node_name] = model_input
|
516
605
|
|
517
606
|
if node.nodeType == _meta.FlowNodeType.OUTPUT_NODE and node_name not in flow.outputs:
|
518
607
|
sources = edges_by_target.get(node_name) or []
|
519
|
-
model_output = cls._infer_output_schema(node_name, sources, job_config)
|
608
|
+
model_output = cls._infer_output_schema(node_name, sources, job_def, job_config)
|
520
609
|
updated_flow.outputs[node_name] = model_output
|
521
610
|
|
522
611
|
return updated_flow
|
@@ -524,13 +613,14 @@ class DevModeTranslator:
|
|
524
613
|
@classmethod
|
525
614
|
def _infer_parameter(
|
526
615
|
cls, param_name: str, targets: tp.List[_meta.FlowSocket],
|
527
|
-
job_config: _cfg.JobConfig)
|
616
|
+
job_def: _meta.JobDefinition, job_config: _cfg.JobConfig) \
|
617
|
+
-> _meta.ModelParameter:
|
528
618
|
|
529
619
|
model_params = []
|
530
620
|
|
531
621
|
for target in targets:
|
532
622
|
|
533
|
-
model_selector =
|
623
|
+
model_selector = job_def.runFlow.models.get(target.node)
|
534
624
|
model_obj = _util.get_job_resource(model_selector, job_config)
|
535
625
|
model_param = model_obj.model.parameters.get(target.socket)
|
536
626
|
model_params.append(model_param)
|
@@ -560,13 +650,14 @@ class DevModeTranslator:
|
|
560
650
|
@classmethod
|
561
651
|
def _infer_input_schema(
|
562
652
|
cls, input_name: str, targets: tp.List[_meta.FlowSocket],
|
563
|
-
job_config: _cfg.JobConfig)
|
653
|
+
job_def: _meta.JobDefinition, job_config: _cfg.JobConfig) \
|
654
|
+
-> _meta.ModelInputSchema:
|
564
655
|
|
565
656
|
model_inputs = []
|
566
657
|
|
567
658
|
for target in targets:
|
568
659
|
|
569
|
-
model_selector =
|
660
|
+
model_selector = job_def.runFlow.models.get(target.node)
|
570
661
|
model_obj = _util.get_job_resource(model_selector, job_config)
|
571
662
|
model_input = model_obj.model.inputs.get(target.socket)
|
572
663
|
model_inputs.append(model_input)
|
@@ -594,13 +685,14 @@ class DevModeTranslator:
|
|
594
685
|
@classmethod
|
595
686
|
def _infer_output_schema(
|
596
687
|
cls, output_name: str, sources: tp.List[_meta.FlowSocket],
|
597
|
-
job_config: _cfg.JobConfig)
|
688
|
+
job_def: _meta.JobDefinition, job_config: _cfg.JobConfig) \
|
689
|
+
-> _meta.ModelOutputSchema:
|
598
690
|
|
599
691
|
model_outputs = []
|
600
692
|
|
601
693
|
for source in sources:
|
602
694
|
|
603
|
-
model_selector =
|
695
|
+
model_selector = job_def.runFlow.models.get(source.node)
|
604
696
|
model_obj = _util.get_job_resource(model_selector, job_config)
|
605
697
|
model_input = model_obj.model.inputs.get(source.socket)
|
606
698
|
model_outputs.append(model_input)
|
@@ -624,11 +716,13 @@ class DevModeTranslator:
|
|
624
716
|
return f"{socket.node}.{socket.socket}" if socket.socket else socket.node
|
625
717
|
|
626
718
|
@classmethod
|
627
|
-
def _process_parameters(
|
719
|
+
def _process_parameters(
|
720
|
+
cls, job_config: _cfg.JobConfig, job_def: _meta.JobDefinition) \
|
721
|
+
-> tp.Tuple[_cfg.JobConfig, _meta.JobDefinition]:
|
628
722
|
|
629
723
|
# This relies on convention for naming properties across similar job types
|
630
724
|
|
631
|
-
job_detail = cls._get_job_detail(
|
725
|
+
job_detail = cls._get_job_detail(job_def)
|
632
726
|
|
633
727
|
if hasattr(job_detail, "model"):
|
634
728
|
model_key = _util.object_key(job_detail.model)
|
@@ -646,7 +740,7 @@ class DevModeTranslator:
|
|
646
740
|
|
647
741
|
job_detail.parameters = cls._process_parameters_dict(param_specs, raw_values)
|
648
742
|
|
649
|
-
return job_config
|
743
|
+
return job_config, job_def
|
650
744
|
|
651
745
|
@classmethod
|
652
746
|
def _process_parameters_dict(
|
@@ -677,10 +771,11 @@ class DevModeTranslator:
|
|
677
771
|
|
678
772
|
return encoded_values
|
679
773
|
|
680
|
-
|
681
|
-
|
774
|
+
def _process_inputs_and_outputs(
|
775
|
+
self, job_config: _cfg.JobConfig, job_def: _meta.JobDefinition) \
|
776
|
+
-> tp.Tuple[_cfg.JobConfig, _meta.JobDefinition]:
|
682
777
|
|
683
|
-
job_detail =
|
778
|
+
job_detail = self._get_job_detail(job_def)
|
684
779
|
|
685
780
|
if hasattr(job_detail, "model"):
|
686
781
|
model_obj = _util.get_job_resource(job_detail.model, job_config)
|
@@ -693,7 +788,7 @@ class DevModeTranslator:
|
|
693
788
|
required_outputs = flow_obj.flow.outputs
|
694
789
|
|
695
790
|
else:
|
696
|
-
return job_config
|
791
|
+
return job_config, job_def
|
697
792
|
|
698
793
|
job_inputs = job_detail.inputs
|
699
794
|
job_outputs = job_detail.outputs
|
@@ -705,8 +800,8 @@ class DevModeTranslator:
|
|
705
800
|
model_input = required_inputs[input_key]
|
706
801
|
input_schema = model_input.schema if model_input and not model_input.dynamic else None
|
707
802
|
|
708
|
-
input_id =
|
709
|
-
|
803
|
+
input_id = self._process_input_or_output(
|
804
|
+
input_key, input_value, job_resources,
|
710
805
|
new_unique_file=False, schema=input_schema)
|
711
806
|
|
712
807
|
job_inputs[input_key] = _util.selector_for(input_id)
|
@@ -717,17 +812,16 @@ class DevModeTranslator:
|
|
717
812
|
model_output= required_outputs[output_key]
|
718
813
|
output_schema = model_output.schema if model_output and not model_output.dynamic else None
|
719
814
|
|
720
|
-
output_id =
|
721
|
-
|
815
|
+
output_id = self._process_input_or_output(
|
816
|
+
output_key, output_value, job_resources,
|
722
817
|
new_unique_file=True, schema=output_schema)
|
723
818
|
|
724
819
|
job_outputs[output_key] = _util.selector_for(output_id)
|
725
820
|
|
726
|
-
return job_config
|
821
|
+
return job_config, job_def
|
727
822
|
|
728
|
-
@classmethod
|
729
823
|
def _process_input_or_output(
|
730
|
-
|
824
|
+
self, data_key, data_value,
|
731
825
|
resources: tp.Dict[str, _meta.ObjectDefinition],
|
732
826
|
new_unique_file=False,
|
733
827
|
schema: tp.Optional[_meta.SchemaDefinition] = None) \
|
@@ -738,8 +832,8 @@ class DevModeTranslator:
|
|
738
832
|
|
739
833
|
if isinstance(data_value, str):
|
740
834
|
storage_path = data_value
|
741
|
-
storage_key =
|
742
|
-
storage_format =
|
835
|
+
storage_key = self._sys_config.storage.defaultBucket
|
836
|
+
storage_format = self.infer_format(storage_path, self._sys_config.storage)
|
743
837
|
snap_version = 1
|
744
838
|
|
745
839
|
elif isinstance(data_value, dict):
|
@@ -749,14 +843,14 @@ class DevModeTranslator:
|
|
749
843
|
if not storage_path:
|
750
844
|
raise _ex.EConfigParse(f"Invalid configuration for input [{data_key}] (missing required value 'path'")
|
751
845
|
|
752
|
-
storage_key = data_value.get("storageKey") or
|
753
|
-
storage_format = data_value.get("format") or
|
846
|
+
storage_key = data_value.get("storageKey") or self._sys_config.storage.defaultBucket
|
847
|
+
storage_format = data_value.get("format") or self.infer_format(storage_path, self._sys_config.storage)
|
754
848
|
snap_version = 1
|
755
849
|
|
756
850
|
else:
|
757
851
|
raise _ex.EConfigParse(f"Invalid configuration for input '{data_key}'")
|
758
852
|
|
759
|
-
|
853
|
+
self._log.info(f"Generating data definition for [{data_key}] with ID = [{_util.object_key(data_id)}]")
|
760
854
|
|
761
855
|
# For unique outputs, increment the snap number to find a new unique snap
|
762
856
|
# These are not incarnations, bc likely in dev mode model code and inputs are changing
|
@@ -764,7 +858,7 @@ class DevModeTranslator:
|
|
764
858
|
|
765
859
|
if new_unique_file:
|
766
860
|
|
767
|
-
x_storage_mgr = _storage.StorageManager(
|
861
|
+
x_storage_mgr = _storage.StorageManager(self._sys_config)
|
768
862
|
x_storage = x_storage_mgr.get_file_storage(storage_key)
|
769
863
|
x_orig_path = pathlib.PurePath(storage_path)
|
770
864
|
x_name = x_orig_path.name
|
@@ -781,9 +875,9 @@ class DevModeTranslator:
|
|
781
875
|
x_name = f"{x_orig_path.stem}-{snap_version}"
|
782
876
|
storage_path = str(x_orig_path.parent.joinpath(x_name))
|
783
877
|
|
784
|
-
|
878
|
+
self._log.info(f"Output for [{data_key}] will be snap version {snap_version}")
|
785
879
|
|
786
|
-
data_obj, storage_obj =
|
880
|
+
data_obj, storage_obj = self._generate_input_definition(
|
787
881
|
data_id, storage_id, storage_key, storage_path, storage_format,
|
788
882
|
snap_index=snap_version, delta_index=1, incarnation_index=1,
|
789
883
|
schema=schema)
|