tracdap-runtime 0.6.4__py3-none-any.whl → 0.6.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tracdap/rt/_exec/context.py +556 -36
- tracdap/rt/_exec/dev_mode.py +320 -198
- tracdap/rt/_exec/engine.py +331 -62
- tracdap/rt/_exec/functions.py +151 -22
- tracdap/rt/_exec/graph.py +47 -13
- tracdap/rt/_exec/graph_builder.py +383 -175
- tracdap/rt/_exec/runtime.py +7 -5
- tracdap/rt/_impl/config_parser.py +11 -4
- tracdap/rt/_impl/data.py +329 -152
- tracdap/rt/_impl/ext/__init__.py +13 -0
- tracdap/rt/_impl/ext/sql.py +116 -0
- tracdap/rt/_impl/ext/storage.py +57 -0
- tracdap/rt/_impl/grpc/tracdap/metadata/job_pb2.py +82 -30
- tracdap/rt/_impl/grpc/tracdap/metadata/job_pb2.pyi +155 -2
- tracdap/rt/_impl/grpc/tracdap/metadata/model_pb2.py +12 -10
- tracdap/rt/_impl/grpc/tracdap/metadata/model_pb2.pyi +14 -2
- tracdap/rt/_impl/grpc/tracdap/metadata/resource_pb2.py +29 -0
- tracdap/rt/_impl/grpc/tracdap/metadata/resource_pb2.pyi +16 -0
- tracdap/rt/_impl/models.py +8 -0
- tracdap/rt/_impl/static_api.py +29 -0
- tracdap/rt/_impl/storage.py +39 -27
- tracdap/rt/_impl/util.py +10 -0
- tracdap/rt/_impl/validation.py +140 -18
- tracdap/rt/_plugins/repo_git.py +1 -1
- tracdap/rt/_plugins/storage_sql.py +417 -0
- tracdap/rt/_plugins/storage_sql_dialects.py +117 -0
- tracdap/rt/_version.py +1 -1
- tracdap/rt/api/experimental.py +267 -0
- tracdap/rt/api/hook.py +14 -0
- tracdap/rt/api/model_api.py +48 -6
- tracdap/rt/config/__init__.py +2 -2
- tracdap/rt/config/common.py +6 -0
- tracdap/rt/metadata/__init__.py +29 -20
- tracdap/rt/metadata/job.py +99 -0
- tracdap/rt/metadata/model.py +18 -0
- tracdap/rt/metadata/resource.py +24 -0
- {tracdap_runtime-0.6.4.dist-info → tracdap_runtime-0.6.6.dist-info}/METADATA +5 -1
- {tracdap_runtime-0.6.4.dist-info → tracdap_runtime-0.6.6.dist-info}/RECORD +41 -32
- {tracdap_runtime-0.6.4.dist-info → tracdap_runtime-0.6.6.dist-info}/WHEEL +1 -1
- {tracdap_runtime-0.6.4.dist-info → tracdap_runtime-0.6.6.dist-info}/LICENSE +0 -0
- {tracdap_runtime-0.6.4.dist-info → tracdap_runtime-0.6.6.dist-info}/top_level.txt +0 -0
tracdap/rt/_exec/dev_mode.py
CHANGED
@@ -12,8 +12,6 @@
|
|
12
12
|
# See the License for the specific language governing permissions and
|
13
13
|
# limitations under the License.
|
14
14
|
|
15
|
-
from __future__ import annotations
|
16
|
-
|
17
15
|
import re
|
18
16
|
import typing as tp
|
19
17
|
import copy
|
@@ -31,12 +29,20 @@ import tracdap.rt._impl.util as _util # noqa
|
|
31
29
|
|
32
30
|
|
33
31
|
DEV_MODE_JOB_CONFIG = [
|
34
|
-
re.compile(r"job
|
35
|
-
re.compile(r"job
|
36
|
-
re.compile(r"job
|
37
|
-
re.compile(r"job
|
38
|
-
re.compile(r"job
|
39
|
-
re.compile(r"job
|
32
|
+
re.compile(r"job\.\w+\.parameters\.\w+"),
|
33
|
+
re.compile(r"job\.\w+\.inputs\.\w+"),
|
34
|
+
re.compile(r"job\.\w+\.outputs\.\w+"),
|
35
|
+
re.compile(r"job\.\w+\.models\.\w+"),
|
36
|
+
re.compile(r"job\.\w+\.model"),
|
37
|
+
re.compile(r"job\.\w+\.flow"),
|
38
|
+
|
39
|
+
re.compile(r".*\.jobs\.\d+\.\w+\.parameters\.\w+"),
|
40
|
+
re.compile(r".*\.jobs\.\d+\.\w+\.inputs\.\w+"),
|
41
|
+
re.compile(r".*\.jobs\.\d+\.\w+\.outputs\.\w+"),
|
42
|
+
re.compile(r".*\.jobs\.\d+\.\w+\.models\.\w+"),
|
43
|
+
re.compile(r".*\.jobs\.\d+\.\w+\.model"),
|
44
|
+
re.compile(r".*\.jobs\.\d+\.\w+\.flow")
|
45
|
+
]
|
40
46
|
|
41
47
|
DEV_MODE_SYS_CONFIG = []
|
42
48
|
|
@@ -56,43 +62,10 @@ class DevModeTranslator:
|
|
56
62
|
sys_config.storage = _cfg.StorageConfig()
|
57
63
|
|
58
64
|
sys_config = cls._add_integrated_repo(sys_config)
|
59
|
-
sys_config = cls.
|
65
|
+
sys_config = cls._process_storage(sys_config, config_mgr)
|
60
66
|
|
61
67
|
return sys_config
|
62
68
|
|
63
|
-
@classmethod
|
64
|
-
def translate_job_config(
|
65
|
-
cls,
|
66
|
-
sys_config: _cfg.RuntimeConfig,
|
67
|
-
job_config: _cfg.JobConfig,
|
68
|
-
scratch_dir: pathlib.Path,
|
69
|
-
config_mgr: _cfg_p.ConfigManager,
|
70
|
-
model_class: tp.Optional[_api.TracModel.__class__]) \
|
71
|
-
-> _cfg.JobConfig:
|
72
|
-
|
73
|
-
cls._log.info(f"Applying dev mode config translation to job config")
|
74
|
-
|
75
|
-
if not job_config.jobId:
|
76
|
-
job_config = cls._process_job_id(job_config)
|
77
|
-
|
78
|
-
if job_config.job.jobType is None or job_config.job.jobType == _meta.JobType.JOB_TYPE_NOT_SET:
|
79
|
-
job_config = cls._process_job_type(job_config)
|
80
|
-
|
81
|
-
# Load and populate any models provided as a Python class or class name
|
82
|
-
if job_config.job.jobType in [_meta.JobType.RUN_MODEL, _meta.JobType.RUN_FLOW]:
|
83
|
-
job_config = cls._process_models(sys_config, job_config, scratch_dir, model_class)
|
84
|
-
|
85
|
-
# Fow flows, load external flow definitions then perform auto-wiring and type inference
|
86
|
-
if job_config.job.jobType == _meta.JobType.RUN_FLOW:
|
87
|
-
job_config = cls._process_flow_definition(job_config, config_mgr)
|
88
|
-
|
89
|
-
# For run (model|flow) jobs, apply processing to the parameters, inputs and outputs
|
90
|
-
if job_config.job.jobType in [_meta.JobType.RUN_MODEL, _meta.JobType.RUN_FLOW]:
|
91
|
-
job_config = cls._process_parameters(job_config)
|
92
|
-
job_config = cls._process_inputs_and_outputs(sys_config, job_config)
|
93
|
-
|
94
|
-
return job_config
|
95
|
-
|
96
69
|
@classmethod
|
97
70
|
def _add_integrated_repo(cls, sys_config: _cfg.RuntimeConfig) -> _cfg.RuntimeConfig:
|
98
71
|
|
@@ -107,51 +80,140 @@ class DevModeTranslator:
|
|
107
80
|
return sys_config
|
108
81
|
|
109
82
|
@classmethod
|
110
|
-
def
|
83
|
+
def _process_storage(
|
111
84
|
cls, sys_config: _cfg.RuntimeConfig,
|
112
85
|
config_mgr: _cfg_p.ConfigManager):
|
113
86
|
|
114
87
|
storage_config = copy.deepcopy(sys_config.storage)
|
115
88
|
|
116
89
|
for bucket_key, bucket_config in storage_config.buckets.items():
|
90
|
+
storage_config.buckets[bucket_key] = cls._resolve_storage_location(
|
91
|
+
bucket_key, bucket_config, config_mgr)
|
117
92
|
|
118
|
-
|
119
|
-
|
93
|
+
for bucket_key, bucket_config in storage_config.external.items():
|
94
|
+
storage_config.external[bucket_key] = cls._resolve_storage_location(
|
95
|
+
bucket_key, bucket_config, config_mgr)
|
120
96
|
|
121
|
-
|
122
|
-
|
97
|
+
sys_config = copy.copy(sys_config)
|
98
|
+
sys_config.storage = storage_config
|
123
99
|
|
124
|
-
|
100
|
+
return sys_config
|
101
|
+
|
102
|
+
@classmethod
|
103
|
+
def _resolve_storage_location(cls, bucket_key, bucket_config, config_mgr: _cfg_p.ConfigManager):
|
125
104
|
|
126
|
-
|
127
|
-
|
105
|
+
if bucket_config.protocol != "LOCAL":
|
106
|
+
return bucket_config
|
128
107
|
|
129
|
-
|
108
|
+
if "rootPath" not in bucket_config.properties:
|
109
|
+
return bucket_config
|
130
110
|
|
131
|
-
|
132
|
-
if sys_config_path is not None:
|
133
|
-
absolute_path = sys_config_path.joinpath(root_path).resolve()
|
134
|
-
if absolute_path.exists():
|
135
|
-
cls._log.info(f"Resolved [{root_path}] -> [{absolute_path}]")
|
136
|
-
bucket_config.properties["rootPath"] = str(absolute_path)
|
137
|
-
continue
|
111
|
+
root_path = pathlib.Path(bucket_config.properties["rootPath"])
|
138
112
|
|
139
|
-
|
140
|
-
|
113
|
+
if root_path.is_absolute():
|
114
|
+
return bucket_config
|
141
115
|
|
116
|
+
cls._log.info(f"Resolving relative path for [{bucket_key}] local storage...")
|
117
|
+
|
118
|
+
sys_config_path = config_mgr.config_dir_path()
|
119
|
+
if sys_config_path is not None:
|
120
|
+
absolute_path = sys_config_path.joinpath(root_path).resolve()
|
142
121
|
if absolute_path.exists():
|
143
122
|
cls._log.info(f"Resolved [{root_path}] -> [{absolute_path}]")
|
144
123
|
bucket_config.properties["rootPath"] = str(absolute_path)
|
145
|
-
|
124
|
+
return bucket_config
|
146
125
|
|
147
|
-
|
148
|
-
|
149
|
-
raise _ex.EConfigParse(msg)
|
126
|
+
cwd = pathlib.Path.cwd()
|
127
|
+
absolute_path = cwd.joinpath(root_path).resolve()
|
150
128
|
|
151
|
-
|
152
|
-
|
129
|
+
if absolute_path.exists():
|
130
|
+
cls._log.info(f"Resolved [{root_path}] -> [{absolute_path}]")
|
131
|
+
bucket_config.properties["rootPath"] = str(absolute_path)
|
132
|
+
return bucket_config
|
153
133
|
|
154
|
-
|
134
|
+
msg = f"Failed to resolve relative storage path [{root_path}]"
|
135
|
+
cls._log.error(msg)
|
136
|
+
raise _ex.EConfigParse(msg)
|
137
|
+
|
138
|
+
|
139
|
+
def __init__(self, sys_config: _cfg.RuntimeConfig, config_mgr: _cfg_p.ConfigManager, scratch_dir: pathlib.Path):
|
140
|
+
self._sys_config = sys_config
|
141
|
+
self._config_mgr = config_mgr
|
142
|
+
self._scratch_dir = scratch_dir
|
143
|
+
self._model_loader: tp.Optional[_models.ModelLoader] = None
|
144
|
+
|
145
|
+
def translate_job_config(
|
146
|
+
self, job_config: _cfg.JobConfig,
|
147
|
+
model_class: tp.Optional[_api.TracModel.__class__] = None) \
|
148
|
+
-> _cfg.JobConfig:
|
149
|
+
|
150
|
+
try:
|
151
|
+
self._log.info(f"Applying dev mode config translation to job config")
|
152
|
+
|
153
|
+
self._model_loader = _models.ModelLoader(self._sys_config, self._scratch_dir)
|
154
|
+
self._model_loader.create_scope("DEV_MODE_TRANSLATION")
|
155
|
+
|
156
|
+
job_config = copy.deepcopy(job_config)
|
157
|
+
job_def = job_config.job
|
158
|
+
|
159
|
+
# Protobuf semantics for a blank jobId should be an object, but objectId will be an empty string
|
160
|
+
if not job_config.jobId or not job_config.jobId.objectId:
|
161
|
+
job_config = self._process_job_id(job_config)
|
162
|
+
|
163
|
+
job_config, job_def = self.translate_job_def(job_config, job_def, model_class)
|
164
|
+
job_config.job = job_def
|
165
|
+
|
166
|
+
return job_config
|
167
|
+
|
168
|
+
finally:
|
169
|
+
self._model_loader.destroy_scope("DEV_MODE_TRANSLATION")
|
170
|
+
self._model_loader = None
|
171
|
+
|
172
|
+
def translate_job_def(
|
173
|
+
self, job_config: _cfg.JobConfig, job_def: _meta.JobDefinition,
|
174
|
+
model_class: tp.Optional[_api.TracModel.__class__] = None) \
|
175
|
+
-> tp.Tuple[_cfg.JobConfig, _meta.JobDefinition]:
|
176
|
+
|
177
|
+
if job_def.jobType is None or job_def.jobType == _meta.JobType.JOB_TYPE_NOT_SET:
|
178
|
+
job_def = self._process_job_type(job_def)
|
179
|
+
|
180
|
+
# Load and populate any models provided as a Python class or class name
|
181
|
+
job_config, job_def = self._process_models(job_config, job_def, model_class)
|
182
|
+
|
183
|
+
# Fow flows, load external flow definitions then perform auto-wiring and type inference
|
184
|
+
if job_def.jobType == _meta.JobType.RUN_FLOW:
|
185
|
+
job_config, job_def = self._process_flow_definition(job_config, job_def)
|
186
|
+
|
187
|
+
if job_def.jobType == _meta.JobType.JOB_GROUP:
|
188
|
+
job_config, job_def = self.translate_job_group(job_config, job_def)
|
189
|
+
|
190
|
+
# Apply processing to the parameters, inputs and outputs
|
191
|
+
job_config, job_def = self._process_parameters(job_config, job_def)
|
192
|
+
job_config, job_def = self._process_inputs_and_outputs(job_config, job_def)
|
193
|
+
|
194
|
+
return job_config, job_def
|
195
|
+
|
196
|
+
def translate_job_group(
|
197
|
+
self, job_config: _cfg.JobConfig, job_def: _meta.JobDefinition) \
|
198
|
+
-> tp.Tuple[_cfg.JobConfig, _meta.JobDefinition]:
|
199
|
+
|
200
|
+
job_group = job_def.jobGroup
|
201
|
+
|
202
|
+
if job_group.jobGroupType is None or job_group.jobGroupType == _meta.JobGroupType.JOB_GROUP_TYPE_NOT_SET:
|
203
|
+
job_group = self._process_job_group_type(job_group)
|
204
|
+
|
205
|
+
group_details = self._get_job_group_detail(job_group)
|
206
|
+
|
207
|
+
if hasattr(group_details, "jobs"):
|
208
|
+
child_jobs = []
|
209
|
+
for child_def in group_details.jobs:
|
210
|
+
job_config, child_def = self.translate_job_def(job_config, child_def)
|
211
|
+
child_jobs.append(child_def)
|
212
|
+
group_details.jobs = child_jobs
|
213
|
+
|
214
|
+
job_def.jobGroup = job_group
|
215
|
+
|
216
|
+
return job_config, job_def
|
155
217
|
|
156
218
|
@classmethod
|
157
219
|
def _add_job_resource(
|
@@ -177,101 +239,153 @@ class DevModeTranslator:
|
|
177
239
|
return translated_config
|
178
240
|
|
179
241
|
@classmethod
|
180
|
-
def _process_job_type(cls,
|
242
|
+
def _process_job_type(cls, job_def: _meta.JobDefinition):
|
181
243
|
|
182
|
-
if
|
244
|
+
if job_def.runModel is not None:
|
183
245
|
job_type = _meta.JobType.RUN_MODEL
|
184
246
|
|
185
|
-
elif
|
247
|
+
elif job_def.runFlow is not None:
|
186
248
|
job_type = _meta.JobType.RUN_FLOW
|
187
249
|
|
188
|
-
elif
|
250
|
+
elif job_def.importModel is not None:
|
189
251
|
job_type = _meta.JobType.IMPORT_MODEL
|
190
252
|
|
253
|
+
elif job_def.importData is not None:
|
254
|
+
job_type = _meta.JobType.IMPORT_DATA
|
255
|
+
|
256
|
+
elif job_def.exportData is not None:
|
257
|
+
job_type = _meta.JobType.EXPORT_DATA
|
258
|
+
|
259
|
+
elif job_def.jobGroup is not None:
|
260
|
+
job_type = _meta.JobType.JOB_GROUP
|
261
|
+
|
191
262
|
else:
|
192
263
|
cls._log.error("Could not infer job type")
|
193
264
|
raise _ex.EConfigParse("Could not infer job type")
|
194
265
|
|
195
266
|
cls._log.info(f"Inferred job type = [{job_type.name}]")
|
196
267
|
|
197
|
-
job_def = copy.copy(
|
268
|
+
job_def = copy.copy(job_def)
|
198
269
|
job_def.jobType = job_type
|
199
270
|
|
200
|
-
|
201
|
-
job_config.job = job_def
|
202
|
-
|
203
|
-
return job_config
|
271
|
+
return job_def
|
204
272
|
|
205
273
|
@classmethod
|
206
|
-
def
|
207
|
-
cls,
|
208
|
-
sys_config: _cfg.RuntimeConfig,
|
209
|
-
job_config: _cfg.JobConfig,
|
210
|
-
scratch_dir: pathlib.Path,
|
211
|
-
model_class: tp.Optional[_api.TracModel.__class__]) \
|
212
|
-
-> _cfg.JobConfig:
|
274
|
+
def _process_job_group_type(cls, job_group: _meta.JobGroup) -> _meta.JobGroup:
|
213
275
|
|
214
|
-
|
215
|
-
|
276
|
+
if job_group.sequential is not None:
|
277
|
+
job_group_type = _meta.JobGroupType.SEQUENTIAL_JOB_GROUP
|
216
278
|
|
217
|
-
|
279
|
+
elif job_group.parallel is not None:
|
280
|
+
job_group_type = _meta.JobGroupType.PARALLEL_JOB_GROUP
|
218
281
|
|
219
|
-
|
220
|
-
|
221
|
-
|
282
|
+
else:
|
283
|
+
cls._log.error("Could not infer job group type")
|
284
|
+
raise _ex.EConfigParse("Could not infer job group type")
|
222
285
|
|
223
|
-
|
286
|
+
cls._log.info(f"Inferred job group type = [{job_group_type.name}]")
|
224
287
|
|
225
|
-
|
288
|
+
job_group = copy.copy(job_group)
|
289
|
+
job_group.jobGroupType = job_group_type
|
226
290
|
|
227
|
-
|
228
|
-
if model_class is not None:
|
229
|
-
model_id, model_obj = cls._generate_model_for_class(model_loader, model_class)
|
230
|
-
job_config = cls._add_job_resource(job_config, model_id, model_obj)
|
231
|
-
job_config.job.runModel.model = _util.selector_for(model_id)
|
291
|
+
return job_group
|
232
292
|
|
233
|
-
|
234
|
-
|
235
|
-
model_detail = original_config.job.runModel.model
|
236
|
-
model_id, model_obj = cls._generate_model_for_entry_point(model_loader, model_detail) # noqa
|
237
|
-
job_config = cls._add_job_resource(job_config, model_id, model_obj)
|
238
|
-
job_config.job.runModel.model = _util.selector_for(model_id)
|
293
|
+
@classmethod
|
294
|
+
def _get_job_detail(cls, job_def: _meta.JobDefinition):
|
239
295
|
|
240
|
-
if
|
296
|
+
if job_def.jobType == _meta.JobType.RUN_MODEL:
|
297
|
+
return job_def.runModel
|
241
298
|
|
242
|
-
|
243
|
-
|
299
|
+
if job_def.jobType == _meta.JobType.RUN_FLOW:
|
300
|
+
return job_def.runFlow
|
244
301
|
|
245
|
-
|
302
|
+
if job_def.jobType == _meta.JobType.IMPORT_MODEL:
|
303
|
+
return job_def.importModel
|
246
304
|
|
247
|
-
|
248
|
-
|
249
|
-
model_id, model_obj = cls._generate_model_for_entry_point(model_loader, model_detail)
|
250
|
-
job_config = cls._add_job_resource(job_config, model_id, model_obj)
|
251
|
-
job_config.job.runFlow.models[model_key] = _util.selector_for(model_id)
|
305
|
+
if job_def.jobType == _meta.JobType.IMPORT_DATA:
|
306
|
+
return job_def.importData
|
252
307
|
|
253
|
-
|
308
|
+
if job_def.jobType == _meta.JobType.EXPORT_DATA:
|
309
|
+
return job_def.exportData
|
254
310
|
|
255
|
-
|
311
|
+
if job_def.jobType == _meta.JobType.JOB_GROUP:
|
312
|
+
return job_def.jobGroup
|
313
|
+
|
314
|
+
raise _ex.EConfigParse(f"Could not get job details for job type [{job_def.jobType}]")
|
256
315
|
|
257
316
|
@classmethod
|
317
|
+
def _get_job_group_detail(cls, job_group: _meta.JobGroup):
|
318
|
+
|
319
|
+
if job_group.jobGroupType == _meta.JobGroupType.SEQUENTIAL_JOB_GROUP:
|
320
|
+
return job_group.sequential
|
321
|
+
|
322
|
+
if job_group.jobGroupType == _meta.JobGroupType.PARALLEL_JOB_GROUP:
|
323
|
+
return job_group.parallel
|
324
|
+
|
325
|
+
raise _ex.EConfigParse(f"Could not get job group details for group type [{job_group.jobGroupType}]")
|
326
|
+
|
327
|
+
def _process_models(
|
328
|
+
self, job_config: _cfg.JobConfig, job_def: _meta.JobDefinition,
|
329
|
+
model_class: tp.Optional[_api.TracModel.__class__]) \
|
330
|
+
-> tp.Tuple[_cfg.JobConfig, _meta.JobDefinition]:
|
331
|
+
|
332
|
+
# This processing works on the assumption that job details follow a convention for addressing models
|
333
|
+
# Jobs requiring a single model have a field called "model"
|
334
|
+
# Jobs requiring multiple models have a field called "models@, which is a dict
|
335
|
+
|
336
|
+
job_detail = self._get_job_detail(job_def)
|
337
|
+
|
338
|
+
# If a model class is supplied in code, use that to generate the model def
|
339
|
+
if model_class is not None:
|
340
|
+
|
341
|
+
# Passing a model class via launch_model() is only supported for job types with a single model
|
342
|
+
if not hasattr(job_detail, "model"):
|
343
|
+
raise _ex.EJobValidation(f"Job type [{job_def.jobType}] cannot be launched using launch_model()")
|
344
|
+
|
345
|
+
model_id, model_obj = self._generate_model_for_class(model_class)
|
346
|
+
job_detail.model = _util.selector_for(model_id)
|
347
|
+
job_config = self._add_job_resource(job_config, model_id, model_obj)
|
348
|
+
|
349
|
+
# Otherwise look for models specified as a single string, and take that as the entry point
|
350
|
+
else:
|
351
|
+
|
352
|
+
# Jobs with a single model
|
353
|
+
if hasattr(job_detail, "model") and isinstance(job_detail.model, str):
|
354
|
+
model_id, model_obj = self._generate_model_for_entry_point(job_detail.model) # noqa
|
355
|
+
job_detail.model = _util.selector_for(model_id)
|
356
|
+
job_config = self._add_job_resource(job_config, model_id, model_obj)
|
357
|
+
|
358
|
+
elif hasattr(job_detail, "model") and isinstance(job_detail.model, _meta.TagSelector):
|
359
|
+
if job_detail.model.objectType == _meta.ObjectType.OBJECT_TYPE_NOT_SET:
|
360
|
+
error = f"Missing required property [model] for job type [{job_def.jobType.name}]"
|
361
|
+
self._log.error(error)
|
362
|
+
raise _ex.EJobValidation(error)
|
363
|
+
|
364
|
+
# Jobs with multiple models
|
365
|
+
elif hasattr(job_detail, "models") and isinstance(job_detail.models, dict):
|
366
|
+
for model_key, model_detail in job_detail.models.items():
|
367
|
+
if isinstance(model_detail, str):
|
368
|
+
model_id, model_obj = self._generate_model_for_entry_point(model_detail)
|
369
|
+
job_detail.models[model_key] = _util.selector_for(model_id)
|
370
|
+
job_config = self._add_job_resource(job_config, model_id, model_obj)
|
371
|
+
|
372
|
+
return job_config, job_def
|
373
|
+
|
258
374
|
def _generate_model_for_class(
|
259
|
-
|
375
|
+
self, model_class: _api.TracModel.__class__) \
|
260
376
|
-> (_meta.TagHeader, _meta.ObjectDefinition):
|
261
377
|
|
262
378
|
model_entry_point = f"{model_class.__module__}.{model_class.__name__}"
|
379
|
+
return self._generate_model_for_entry_point(model_entry_point)
|
263
380
|
|
264
|
-
return cls._generate_model_for_entry_point(model_loader, model_entry_point)
|
265
|
-
|
266
|
-
@classmethod
|
267
381
|
def _generate_model_for_entry_point(
|
268
|
-
|
382
|
+
self, model_entry_point: str) \
|
269
383
|
-> (_meta.TagHeader, _meta.ObjectDefinition):
|
270
384
|
|
271
385
|
model_id = _util.new_object_id(_meta.ObjectType.MODEL)
|
272
386
|
model_key = _util.object_key(model_id)
|
273
387
|
|
274
|
-
|
388
|
+
self._log.info(f"Generating model definition for [{model_entry_point}] with ID = [{model_key}]")
|
275
389
|
|
276
390
|
skeleton_modeL_def = _meta.ModelDefinition( # noqa
|
277
391
|
language="python",
|
@@ -282,8 +396,8 @@ class DevModeTranslator:
|
|
282
396
|
inputs={},
|
283
397
|
outputs={})
|
284
398
|
|
285
|
-
model_class =
|
286
|
-
model_def =
|
399
|
+
model_class = self._model_loader.load_model_class("DEV_MODE_TRANSLATION", skeleton_modeL_def)
|
400
|
+
model_def = self._model_loader.scan_model(skeleton_modeL_def, model_class)
|
287
401
|
|
288
402
|
model_object = _meta.ObjectDefinition(
|
289
403
|
objectType=_meta.ObjectType.MODEL,
|
@@ -291,56 +405,57 @@ class DevModeTranslator:
|
|
291
405
|
|
292
406
|
return model_id, model_object
|
293
407
|
|
294
|
-
|
295
|
-
|
408
|
+
def _process_flow_definition(
|
409
|
+
self, job_config: _cfg.JobConfig, job_def: _meta.JobDefinition) \
|
410
|
+
-> tp.Tuple[_cfg.JobConfig, _meta.JobDefinition]:
|
296
411
|
|
297
|
-
flow_details =
|
412
|
+
flow_details = job_def.runFlow.flow
|
298
413
|
|
299
414
|
# Do not apply translation if flow is specified as an object ID / selector (assume full config is supplied)
|
300
415
|
if isinstance(flow_details, _meta.TagHeader) or isinstance(flow_details, _meta.TagSelector):
|
301
|
-
return job_config
|
416
|
+
return job_config, job_def
|
302
417
|
|
303
418
|
# Otherwise, flow is specified as the path to dev-mode flow definition
|
304
419
|
if not isinstance(flow_details, str):
|
305
420
|
err = f"Invalid config value for [job.runFlow.flow]: Expected path or tag selector, got [{flow_details}])"
|
306
|
-
|
421
|
+
self._log.error(err)
|
307
422
|
raise _ex.EConfigParse(err)
|
308
423
|
|
309
424
|
flow_id = _util.new_object_id(_meta.ObjectType.FLOW)
|
310
425
|
flow_key = _util.object_key(flow_id)
|
311
426
|
|
312
|
-
|
427
|
+
self._log.info(f"Generating flow definition from [{flow_details}] with ID = [{flow_key}]")
|
313
428
|
|
314
|
-
flow_def =
|
429
|
+
flow_def = self._config_mgr.load_config_object(flow_details, _meta.FlowDefinition)
|
315
430
|
|
316
431
|
# Validate models against the flow (this could move to _impl.validation and check prod jobs as well)
|
317
|
-
|
432
|
+
self._check_models_for_flow(flow_def, job_def, job_config)
|
318
433
|
|
319
434
|
# Auto-wiring and inference only applied to externally loaded flows for now
|
320
|
-
flow_def =
|
321
|
-
flow_def =
|
435
|
+
flow_def = self._autowire_flow(flow_def, job_def, job_config)
|
436
|
+
flow_def = self._apply_type_inference(flow_def, job_def, job_config)
|
322
437
|
|
323
438
|
flow_obj = _meta.ObjectDefinition(
|
324
439
|
objectType=_meta.ObjectType.FLOW,
|
325
440
|
flow=flow_def)
|
326
441
|
|
442
|
+
job_def = copy.copy(job_def)
|
443
|
+
job_def.runFlow = copy.copy(job_def.runFlow)
|
444
|
+
job_def.runFlow.flow = _util.selector_for(flow_id)
|
445
|
+
|
327
446
|
job_config = copy.copy(job_config)
|
328
|
-
job_config.job = copy.copy(job_config.job)
|
329
|
-
job_config.job.runFlow = copy.copy(job_config.job.runFlow)
|
330
447
|
job_config.resources = copy.copy(job_config.resources)
|
448
|
+
job_config = self._add_job_resource(job_config, flow_id, flow_obj)
|
331
449
|
|
332
|
-
|
333
|
-
job_config.job.runFlow.flow = _util.selector_for(flow_id)
|
334
|
-
|
335
|
-
return job_config
|
450
|
+
return job_config, job_def
|
336
451
|
|
337
452
|
@classmethod
|
338
|
-
def _check_models_for_flow(cls, flow: _meta.FlowDefinition, job_config: _cfg.JobConfig):
|
453
|
+
def _check_models_for_flow(cls, flow: _meta.FlowDefinition, job_def: _meta.JobDefinition, job_config: _cfg.JobConfig):
|
339
454
|
|
340
455
|
model_nodes = dict(filter(lambda n: n[1].nodeType == _meta.FlowNodeType.MODEL_NODE, flow.nodes.items()))
|
341
456
|
|
342
|
-
missing_models = list(filter(lambda m: m not in
|
343
|
-
extra_models = list(filter(lambda m: m not in model_nodes,
|
457
|
+
missing_models = list(filter(lambda m: m not in job_def.runFlow.models, model_nodes.keys()))
|
458
|
+
extra_models = list(filter(lambda m: m not in model_nodes, job_def.runFlow.models.keys()))
|
344
459
|
|
345
460
|
if any(missing_models):
|
346
461
|
error = f"Missing models in job definition: {', '.join(missing_models)}"
|
@@ -354,7 +469,7 @@ class DevModeTranslator:
|
|
354
469
|
|
355
470
|
for model_name, model_node in model_nodes.items():
|
356
471
|
|
357
|
-
model_selector =
|
472
|
+
model_selector = job_def.runFlow.models[model_name]
|
358
473
|
model_obj = _util.get_job_resource(model_selector, job_config)
|
359
474
|
|
360
475
|
model_inputs = set(model_obj.model.inputs.keys())
|
@@ -366,9 +481,9 @@ class DevModeTranslator:
|
|
366
481
|
raise _ex.EJobValidation(error)
|
367
482
|
|
368
483
|
@classmethod
|
369
|
-
def _autowire_flow(cls, flow: _meta.FlowDefinition, job_config: _cfg.JobConfig):
|
484
|
+
def _autowire_flow(cls, flow: _meta.FlowDefinition, job_def: _meta.JobDefinition, job_config: _cfg.JobConfig):
|
370
485
|
|
371
|
-
job =
|
486
|
+
job = job_def.runFlow
|
372
487
|
nodes = copy.copy(flow.nodes)
|
373
488
|
edges: tp.Dict[str, _meta.FlowEdge] = dict()
|
374
489
|
|
@@ -455,7 +570,10 @@ class DevModeTranslator:
|
|
455
570
|
return autowired_flow
|
456
571
|
|
457
572
|
@classmethod
|
458
|
-
def _apply_type_inference(
|
573
|
+
def _apply_type_inference(
|
574
|
+
cls, flow: _meta.FlowDefinition,
|
575
|
+
job_def: _meta.JobDefinition, job_config: _cfg.JobConfig) \
|
576
|
+
-> _meta.FlowDefinition:
|
459
577
|
|
460
578
|
updated_flow = copy.copy(flow)
|
461
579
|
updated_flow.parameters = copy.copy(flow.parameters)
|
@@ -476,17 +594,17 @@ class DevModeTranslator:
|
|
476
594
|
|
477
595
|
if node.nodeType == _meta.FlowNodeType.PARAMETER_NODE and node_name not in flow.parameters:
|
478
596
|
targets = edges_by_source.get(node_name) or []
|
479
|
-
model_parameter = cls._infer_parameter(node_name, targets, job_config)
|
597
|
+
model_parameter = cls._infer_parameter(node_name, targets, job_def, job_config)
|
480
598
|
updated_flow.parameters[node_name] = model_parameter
|
481
599
|
|
482
600
|
if node.nodeType == _meta.FlowNodeType.INPUT_NODE and node_name not in flow.inputs:
|
483
601
|
targets = edges_by_source.get(node_name) or []
|
484
|
-
model_input = cls._infer_input_schema(node_name, targets, job_config)
|
602
|
+
model_input = cls._infer_input_schema(node_name, targets, job_def, job_config)
|
485
603
|
updated_flow.inputs[node_name] = model_input
|
486
604
|
|
487
605
|
if node.nodeType == _meta.FlowNodeType.OUTPUT_NODE and node_name not in flow.outputs:
|
488
606
|
sources = edges_by_target.get(node_name) or []
|
489
|
-
model_output = cls._infer_output_schema(node_name, sources, job_config)
|
607
|
+
model_output = cls._infer_output_schema(node_name, sources, job_def, job_config)
|
490
608
|
updated_flow.outputs[node_name] = model_output
|
491
609
|
|
492
610
|
return updated_flow
|
@@ -494,13 +612,14 @@ class DevModeTranslator:
|
|
494
612
|
@classmethod
|
495
613
|
def _infer_parameter(
|
496
614
|
cls, param_name: str, targets: tp.List[_meta.FlowSocket],
|
497
|
-
job_config: _cfg.JobConfig)
|
615
|
+
job_def: _meta.JobDefinition, job_config: _cfg.JobConfig) \
|
616
|
+
-> _meta.ModelParameter:
|
498
617
|
|
499
618
|
model_params = []
|
500
619
|
|
501
620
|
for target in targets:
|
502
621
|
|
503
|
-
model_selector =
|
622
|
+
model_selector = job_def.runFlow.models.get(target.node)
|
504
623
|
model_obj = _util.get_job_resource(model_selector, job_config)
|
505
624
|
model_param = model_obj.model.parameters.get(target.socket)
|
506
625
|
model_params.append(model_param)
|
@@ -530,13 +649,14 @@ class DevModeTranslator:
|
|
530
649
|
@classmethod
|
531
650
|
def _infer_input_schema(
|
532
651
|
cls, input_name: str, targets: tp.List[_meta.FlowSocket],
|
533
|
-
job_config: _cfg.JobConfig)
|
652
|
+
job_def: _meta.JobDefinition, job_config: _cfg.JobConfig) \
|
653
|
+
-> _meta.ModelInputSchema:
|
534
654
|
|
535
655
|
model_inputs = []
|
536
656
|
|
537
657
|
for target in targets:
|
538
658
|
|
539
|
-
model_selector =
|
659
|
+
model_selector = job_def.runFlow.models.get(target.node)
|
540
660
|
model_obj = _util.get_job_resource(model_selector, job_config)
|
541
661
|
model_input = model_obj.model.inputs.get(target.socket)
|
542
662
|
model_inputs.append(model_input)
|
@@ -564,13 +684,14 @@ class DevModeTranslator:
|
|
564
684
|
@classmethod
|
565
685
|
def _infer_output_schema(
|
566
686
|
cls, output_name: str, sources: tp.List[_meta.FlowSocket],
|
567
|
-
job_config: _cfg.JobConfig)
|
687
|
+
job_def: _meta.JobDefinition, job_config: _cfg.JobConfig) \
|
688
|
+
-> _meta.ModelOutputSchema:
|
568
689
|
|
569
690
|
model_outputs = []
|
570
691
|
|
571
692
|
for source in sources:
|
572
693
|
|
573
|
-
model_selector =
|
694
|
+
model_selector = job_def.runFlow.models.get(source.node)
|
574
695
|
model_obj = _util.get_job_resource(model_selector, job_config)
|
575
696
|
model_input = model_obj.model.inputs.get(source.socket)
|
576
697
|
model_outputs.append(model_input)
|
@@ -594,37 +715,38 @@ class DevModeTranslator:
|
|
594
715
|
return f"{socket.node}.{socket.socket}" if socket.socket else socket.node
|
595
716
|
|
596
717
|
@classmethod
|
597
|
-
def _process_parameters(
|
718
|
+
def _process_parameters(
|
719
|
+
cls, job_config: _cfg.JobConfig, job_def: _meta.JobDefinition) \
|
720
|
+
-> tp.Tuple[_cfg.JobConfig, _meta.JobDefinition]:
|
598
721
|
|
599
|
-
|
600
|
-
|
601
|
-
job_details = job_config.job.runModel
|
602
|
-
model_key = _util.object_key(job_details.model)
|
603
|
-
model_or_flow = job_config.resources[model_key].model
|
722
|
+
# This relies on convention for naming properties across similar job types
|
604
723
|
|
605
|
-
|
724
|
+
job_detail = cls._get_job_detail(job_def)
|
606
725
|
|
607
|
-
|
608
|
-
|
726
|
+
if hasattr(job_detail, "model"):
|
727
|
+
model_key = _util.object_key(job_detail.model)
|
728
|
+
model_or_flow = job_config.resources[model_key].model
|
729
|
+
elif hasattr(job_detail, "flow"):
|
730
|
+
flow_key = _util.object_key(job_detail.flow)
|
609
731
|
model_or_flow = job_config.resources[flow_key].flow
|
610
|
-
|
611
732
|
else:
|
612
|
-
|
733
|
+
model_or_flow = None
|
613
734
|
|
614
|
-
|
615
|
-
param_values = job_details.parameters
|
735
|
+
if model_or_flow is not None:
|
616
736
|
|
617
|
-
|
618
|
-
|
737
|
+
param_specs = model_or_flow.parameters
|
738
|
+
raw_values = job_detail.parameters
|
619
739
|
|
620
|
-
|
740
|
+
job_detail.parameters = cls._process_parameters_dict(param_specs, raw_values)
|
741
|
+
|
742
|
+
return job_config, job_def
|
621
743
|
|
622
744
|
@classmethod
|
623
745
|
def _process_parameters_dict(
|
624
746
|
cls, param_specs: tp.Dict[str, _meta.ModelParameter],
|
625
|
-
|
747
|
+
raw_values: tp.Dict[str, _meta.Value]) -> tp.Dict[str, _meta.Value]:
|
626
748
|
|
627
|
-
unknown_params = list(filter(lambda p: p not in param_specs,
|
749
|
+
unknown_params = list(filter(lambda p: p not in param_specs, raw_values))
|
628
750
|
|
629
751
|
if any(unknown_params):
|
630
752
|
msg = f"Unknown parameters cannot be translated: [{', '.join(unknown_params)}]"
|
@@ -633,7 +755,7 @@ class DevModeTranslator:
|
|
633
755
|
|
634
756
|
encoded_values = dict()
|
635
757
|
|
636
|
-
for p_name, p_value in
|
758
|
+
for p_name, p_value in raw_values.items():
|
637
759
|
|
638
760
|
if isinstance(p_value, _meta.Value):
|
639
761
|
encoded_values[p_name] = p_value
|
@@ -648,26 +770,27 @@ class DevModeTranslator:
|
|
648
770
|
|
649
771
|
return encoded_values
|
650
772
|
|
651
|
-
|
652
|
-
|
773
|
+
def _process_inputs_and_outputs(
|
774
|
+
self, job_config: _cfg.JobConfig, job_def: _meta.JobDefinition) \
|
775
|
+
-> tp.Tuple[_cfg.JobConfig, _meta.JobDefinition]:
|
776
|
+
|
777
|
+
job_detail = self._get_job_detail(job_def)
|
653
778
|
|
654
|
-
if
|
655
|
-
|
656
|
-
model_obj = _util.get_job_resource(job_details.model, job_config)
|
779
|
+
if hasattr(job_detail, "model"):
|
780
|
+
model_obj = _util.get_job_resource(job_detail.model, job_config)
|
657
781
|
required_inputs = model_obj.model.inputs
|
658
782
|
required_outputs = model_obj.model.outputs
|
659
783
|
|
660
|
-
elif
|
661
|
-
|
662
|
-
flow_obj = _util.get_job_resource(job_details.flow, job_config)
|
784
|
+
elif hasattr(job_detail, "flow"):
|
785
|
+
flow_obj = _util.get_job_resource(job_detail.flow, job_config)
|
663
786
|
required_inputs = flow_obj.flow.inputs
|
664
787
|
required_outputs = flow_obj.flow.outputs
|
665
788
|
|
666
789
|
else:
|
667
|
-
return job_config
|
790
|
+
return job_config, job_def
|
668
791
|
|
669
|
-
job_inputs =
|
670
|
-
job_outputs =
|
792
|
+
job_inputs = job_detail.inputs
|
793
|
+
job_outputs = job_detail.outputs
|
671
794
|
job_resources = job_config.resources
|
672
795
|
|
673
796
|
for input_key, input_value in job_inputs.items():
|
@@ -676,8 +799,8 @@ class DevModeTranslator:
|
|
676
799
|
model_input = required_inputs[input_key]
|
677
800
|
input_schema = model_input.schema if model_input and not model_input.dynamic else None
|
678
801
|
|
679
|
-
input_id =
|
680
|
-
|
802
|
+
input_id = self._process_input_or_output(
|
803
|
+
input_key, input_value, job_resources,
|
681
804
|
new_unique_file=False, schema=input_schema)
|
682
805
|
|
683
806
|
job_inputs[input_key] = _util.selector_for(input_id)
|
@@ -688,17 +811,16 @@ class DevModeTranslator:
|
|
688
811
|
model_output= required_outputs[output_key]
|
689
812
|
output_schema = model_output.schema if model_output and not model_output.dynamic else None
|
690
813
|
|
691
|
-
output_id =
|
692
|
-
|
814
|
+
output_id = self._process_input_or_output(
|
815
|
+
output_key, output_value, job_resources,
|
693
816
|
new_unique_file=True, schema=output_schema)
|
694
817
|
|
695
818
|
job_outputs[output_key] = _util.selector_for(output_id)
|
696
819
|
|
697
|
-
return job_config
|
820
|
+
return job_config, job_def
|
698
821
|
|
699
|
-
@classmethod
|
700
822
|
def _process_input_or_output(
|
701
|
-
|
823
|
+
self, data_key, data_value,
|
702
824
|
resources: tp.Dict[str, _meta.ObjectDefinition],
|
703
825
|
new_unique_file=False,
|
704
826
|
schema: tp.Optional[_meta.SchemaDefinition] = None) \
|
@@ -709,8 +831,8 @@ class DevModeTranslator:
|
|
709
831
|
|
710
832
|
if isinstance(data_value, str):
|
711
833
|
storage_path = data_value
|
712
|
-
storage_key =
|
713
|
-
storage_format =
|
834
|
+
storage_key = self._sys_config.storage.defaultBucket
|
835
|
+
storage_format = self.infer_format(storage_path, self._sys_config.storage)
|
714
836
|
snap_version = 1
|
715
837
|
|
716
838
|
elif isinstance(data_value, dict):
|
@@ -720,14 +842,14 @@ class DevModeTranslator:
|
|
720
842
|
if not storage_path:
|
721
843
|
raise _ex.EConfigParse(f"Invalid configuration for input [{data_key}] (missing required value 'path'")
|
722
844
|
|
723
|
-
storage_key = data_value.get("storageKey") or
|
724
|
-
storage_format = data_value.get("format") or
|
845
|
+
storage_key = data_value.get("storageKey") or self._sys_config.storage.defaultBucket
|
846
|
+
storage_format = data_value.get("format") or self.infer_format(storage_path, self._sys_config.storage)
|
725
847
|
snap_version = 1
|
726
848
|
|
727
849
|
else:
|
728
850
|
raise _ex.EConfigParse(f"Invalid configuration for input '{data_key}'")
|
729
851
|
|
730
|
-
|
852
|
+
self._log.info(f"Generating data definition for [{data_key}] with ID = [{_util.object_key(data_id)}]")
|
731
853
|
|
732
854
|
# For unique outputs, increment the snap number to find a new unique snap
|
733
855
|
# These are not incarnations, bc likely in dev mode model code and inputs are changing
|
@@ -735,7 +857,7 @@ class DevModeTranslator:
|
|
735
857
|
|
736
858
|
if new_unique_file:
|
737
859
|
|
738
|
-
x_storage_mgr = _storage.StorageManager(
|
860
|
+
x_storage_mgr = _storage.StorageManager(self._sys_config)
|
739
861
|
x_storage = x_storage_mgr.get_file_storage(storage_key)
|
740
862
|
x_orig_path = pathlib.PurePath(storage_path)
|
741
863
|
x_name = x_orig_path.name
|
@@ -752,9 +874,9 @@ class DevModeTranslator:
|
|
752
874
|
x_name = f"{x_orig_path.stem}-{snap_version}"
|
753
875
|
storage_path = str(x_orig_path.parent.joinpath(x_name))
|
754
876
|
|
755
|
-
|
877
|
+
self._log.info(f"Output for [{data_key}] will be snap version {snap_version}")
|
756
878
|
|
757
|
-
data_obj, storage_obj =
|
879
|
+
data_obj, storage_obj = self._generate_input_definition(
|
758
880
|
data_id, storage_id, storage_key, storage_path, storage_format,
|
759
881
|
snap_index=snap_version, delta_index=1, incarnation_index=1,
|
760
882
|
schema=schema)
|