tracdap-runtime 0.8.0rc2__py3-none-any.whl → 0.9.0b1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tracdap/rt/_impl/core/data.py +578 -33
- tracdap/rt/_impl/core/repos.py +7 -0
- tracdap/rt/_impl/core/storage.py +10 -3
- tracdap/rt/_impl/core/util.py +54 -11
- tracdap/rt/_impl/exec/dev_mode.py +122 -100
- tracdap/rt/_impl/exec/engine.py +178 -109
- tracdap/rt/_impl/exec/functions.py +218 -257
- tracdap/rt/_impl/exec/graph.py +140 -125
- tracdap/rt/_impl/exec/graph_builder.py +411 -449
- tracdap/rt/_impl/grpc/codec.py +4 -2
- tracdap/rt/_impl/grpc/server.py +7 -7
- tracdap/rt/_impl/grpc/tracdap/api/internal/runtime_pb2.py +25 -18
- tracdap/rt/_impl/grpc/tracdap/api/internal/runtime_pb2.pyi +27 -9
- tracdap/rt/_impl/grpc/tracdap/metadata/common_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/config_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/custom_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/data_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/file_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/flow_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/job_pb2.py +67 -63
- tracdap/rt/_impl/grpc/tracdap/metadata/job_pb2.pyi +11 -2
- tracdap/rt/_impl/grpc/tracdap/metadata/model_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/object_id_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/object_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/resource_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/search_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/storage_pb2.py +11 -9
- tracdap/rt/_impl/grpc/tracdap/metadata/storage_pb2.pyi +11 -2
- tracdap/rt/_impl/grpc/tracdap/metadata/tag_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/tag_update_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/type_pb2.py +1 -1
- tracdap/rt/_impl/runtime.py +8 -0
- tracdap/rt/_plugins/repo_git.py +56 -11
- tracdap/rt/_version.py +1 -1
- tracdap/rt/config/__init__.py +6 -6
- tracdap/rt/config/common.py +5 -0
- tracdap/rt/config/job.py +13 -3
- tracdap/rt/config/result.py +8 -4
- tracdap/rt/config/runtime.py +2 -0
- tracdap/rt/metadata/__init__.py +37 -36
- tracdap/rt/metadata/job.py +2 -0
- tracdap/rt/metadata/storage.py +9 -0
- {tracdap_runtime-0.8.0rc2.dist-info → tracdap_runtime-0.9.0b1.dist-info}/METADATA +3 -1
- {tracdap_runtime-0.8.0rc2.dist-info → tracdap_runtime-0.9.0b1.dist-info}/RECORD +47 -47
- {tracdap_runtime-0.8.0rc2.dist-info → tracdap_runtime-0.9.0b1.dist-info}/WHEEL +1 -1
- {tracdap_runtime-0.8.0rc2.dist-info → tracdap_runtime-0.9.0b1.dist-info}/licenses/LICENSE +0 -0
- {tracdap_runtime-0.8.0rc2.dist-info → tracdap_runtime-0.9.0b1.dist-info}/top_level.txt +0 -0
@@ -13,16 +13,15 @@
|
|
13
13
|
# See the License for the specific language governing permissions and
|
14
14
|
# limitations under the License.
|
15
15
|
|
16
|
-
import copy
|
17
|
-
import datetime
|
18
16
|
import abc
|
17
|
+
import copy
|
19
18
|
import io
|
20
19
|
import pathlib
|
21
|
-
import
|
22
|
-
import dataclasses as dc # noqa
|
20
|
+
import typing as tp
|
23
21
|
|
24
22
|
import tracdap.rt.api as _api
|
25
|
-
import tracdap.rt.
|
23
|
+
import tracdap.rt.metadata as _meta
|
24
|
+
import tracdap.rt.config as _cfg
|
26
25
|
import tracdap.rt.exceptions as _ex
|
27
26
|
import tracdap.rt._impl.exec.context as _ctx
|
28
27
|
import tracdap.rt._impl.exec.graph_builder as _graph
|
@@ -66,7 +65,7 @@ class NodeContext:
|
|
66
65
|
class NodeCallback:
|
67
66
|
|
68
67
|
@abc.abstractmethod
|
69
|
-
def
|
68
|
+
def send_graph_update(self, update: _graph.GraphUpdate):
|
70
69
|
pass
|
71
70
|
|
72
71
|
|
@@ -120,6 +119,9 @@ class NodeFunction(tp.Generic[_T]):
|
|
120
119
|
# ----------------------------------------------------------------------------------------------------------------------
|
121
120
|
|
122
121
|
|
122
|
+
# STATIC VALUES
|
123
|
+
# -------------
|
124
|
+
|
123
125
|
class NoopFunc(NodeFunction[None]):
|
124
126
|
|
125
127
|
def __init__(self, node: NoopNode):
|
@@ -140,6 +142,9 @@ class StaticValueFunc(NodeFunction[_T]):
|
|
140
142
|
return self.node.value
|
141
143
|
|
142
144
|
|
145
|
+
# MAPPING OPERATIONS
|
146
|
+
# ------------------
|
147
|
+
|
143
148
|
class IdentityFunc(NodeFunction[_T]):
|
144
149
|
|
145
150
|
def __init__(self, node: IdentityNode[_T]):
|
@@ -150,6 +155,18 @@ class IdentityFunc(NodeFunction[_T]):
|
|
150
155
|
return _ctx_lookup(self.node.src_id, ctx)
|
151
156
|
|
152
157
|
|
158
|
+
class KeyedItemFunc(NodeFunction[_T]):
|
159
|
+
|
160
|
+
def __init__(self, node: KeyedItemNode[_T]):
|
161
|
+
super().__init__()
|
162
|
+
self.node = node
|
163
|
+
|
164
|
+
def _execute(self, ctx: NodeContext) -> _T:
|
165
|
+
src_node_result = _ctx_lookup(self.node.src_id, ctx)
|
166
|
+
src_item = src_node_result.get(self.node.src_item)
|
167
|
+
return src_item
|
168
|
+
|
169
|
+
|
153
170
|
class _ContextPushPopFunc(NodeFunction[Bundle[tp.Any]], abc.ABC):
|
154
171
|
|
155
172
|
# This approach to context push / pop assumes all the nodes to be mapped are already available
|
@@ -195,74 +212,27 @@ class ContextPopFunc(_ContextPushPopFunc):
|
|
195
212
|
super(ContextPopFunc, self).__init__(node, self._POP)
|
196
213
|
|
197
214
|
|
198
|
-
|
215
|
+
# DATA HANDLING
|
216
|
+
# -------------
|
199
217
|
|
200
|
-
|
201
|
-
super().__init__()
|
202
|
-
self.node = node
|
203
|
-
|
204
|
-
def _execute(self, ctx: NodeContext) -> _T:
|
205
|
-
src_node_result = _ctx_lookup(self.node.src_id, ctx)
|
206
|
-
src_item = src_node_result.get(self.node.src_item)
|
207
|
-
return src_item
|
208
|
-
|
209
|
-
|
210
|
-
class RuntimeOutputsFunc(NodeFunction[JobOutputs]):
|
211
|
-
|
212
|
-
def __init__(self, node: RuntimeOutputsNode):
|
213
|
-
super().__init__()
|
214
|
-
self.node = node
|
215
|
-
|
216
|
-
def _execute(self, ctx: NodeContext) -> JobOutputs:
|
217
|
-
return self.node.outputs
|
218
|
-
|
219
|
-
|
220
|
-
class BuildJobResultFunc(NodeFunction[_config.JobResult]):
|
218
|
+
class DataSpecFunc(NodeFunction[_data.DataSpec]):
|
221
219
|
|
222
|
-
def __init__(self, node:
|
220
|
+
def __init__(self, node: DataSpecNode):
|
223
221
|
super().__init__()
|
224
222
|
self.node = node
|
225
223
|
|
226
|
-
def _execute(self, ctx: NodeContext) ->
|
227
|
-
|
228
|
-
job_result = _config.JobResult()
|
229
|
-
job_result.jobId = self.node.job_id
|
230
|
-
job_result.statusCode = meta.JobStatusCode.SUCCEEDED
|
231
|
-
|
232
|
-
if self.node.result_id is not None:
|
233
|
-
|
234
|
-
result_def = meta.ResultDefinition()
|
235
|
-
result_def.jobId = _util.selector_for(self.node.job_id)
|
236
|
-
result_def.statusCode = meta.JobStatusCode.SUCCEEDED
|
237
|
-
|
238
|
-
result_key = _util.object_key(self.node.result_id)
|
239
|
-
result_obj = meta.ObjectDefinition(objectType=meta.ObjectType.RESULT, result=result_def)
|
240
|
-
|
241
|
-
job_result.results[result_key] = result_obj
|
242
|
-
|
243
|
-
# TODO: Handle individual failed results
|
244
|
-
|
245
|
-
for obj_key, node_id in self.node.outputs.objects.items():
|
246
|
-
obj_def = _ctx_lookup(node_id, ctx)
|
247
|
-
job_result.results[obj_key] = obj_def
|
248
|
-
|
249
|
-
for bundle_id in self.node.outputs.bundles:
|
250
|
-
bundle = _ctx_lookup(bundle_id, ctx)
|
251
|
-
job_result.results.update(bundle.items())
|
252
|
-
|
253
|
-
if self.node.runtime_outputs is not None:
|
254
|
-
|
255
|
-
runtime_outputs = _ctx_lookup(self.node.runtime_outputs, ctx)
|
256
|
-
|
257
|
-
for obj_key, node_id in runtime_outputs.objects.items():
|
258
|
-
obj_def = _ctx_lookup(node_id, ctx)
|
259
|
-
job_result.results[obj_key] = obj_def
|
224
|
+
def _execute(self, ctx: NodeContext) -> _data.DataSpec:
|
260
225
|
|
261
|
-
|
262
|
-
|
263
|
-
|
226
|
+
# Get the schema from runtime data
|
227
|
+
data_view = _ctx_lookup(self.node.data_view_id, ctx)
|
228
|
+
trac_schema = data_view.trac_schema
|
264
229
|
|
265
|
-
|
230
|
+
# Common logic for building a data spec is part of the data module
|
231
|
+
return _data.build_data_spec(
|
232
|
+
self.node.data_obj_id, self.node.storage_obj_id,
|
233
|
+
self.node.context_key, trac_schema,
|
234
|
+
self.node.storage_config,
|
235
|
+
self.node.prior_data_spec)
|
266
236
|
|
267
237
|
|
268
238
|
class DataViewFunc(NodeFunction[_data.DataView]):
|
@@ -281,12 +251,12 @@ class DataViewFunc(NodeFunction[_data.DataView]):
|
|
281
251
|
return _data.DataView.create_empty(root_item.object_type)
|
282
252
|
|
283
253
|
# Handle file data views
|
284
|
-
if root_item.object_type ==
|
254
|
+
if root_item.object_type == _meta.ObjectType.FILE:
|
285
255
|
return _data.DataView.for_file_item(root_item)
|
286
256
|
|
287
257
|
# TODO: Generalize processing across DataView / DataItem types
|
288
258
|
|
289
|
-
if root_item.schema_type ==
|
259
|
+
if root_item.schema_type == _meta.SchemaType.STRUCT:
|
290
260
|
view = _data.DataView.for_trac_schema(self.node.schema)
|
291
261
|
view.parts[root_part_key] = [root_item]
|
292
262
|
return view
|
@@ -294,11 +264,11 @@ class DataViewFunc(NodeFunction[_data.DataView]):
|
|
294
264
|
# Everything else is a regular data view
|
295
265
|
if self.node.schema is not None and len(self.node.schema.table.fields) > 0:
|
296
266
|
trac_schema = self.node.schema
|
267
|
+
data_view = _data.DataView.for_trac_schema(trac_schema)
|
297
268
|
else:
|
298
269
|
arrow_schema = root_item.schema
|
299
|
-
|
270
|
+
data_view = _data.DataView.for_arrow_schema(arrow_schema)
|
300
271
|
|
301
|
-
data_view = _data.DataView.for_trac_schema(trac_schema)
|
302
272
|
data_view = _data.DataMapping.add_item_to_view(data_view, root_part_key, root_item)
|
303
273
|
|
304
274
|
return data_view
|
@@ -319,7 +289,7 @@ class DataItemFunc(NodeFunction[_data.DataItem]):
|
|
319
289
|
return _data.DataItem.create_empty(data_view.object_type)
|
320
290
|
|
321
291
|
# Handle file data views
|
322
|
-
if data_view.object_type ==
|
292
|
+
if data_view.object_type == _meta.ObjectType.FILE:
|
323
293
|
return data_view.file_item
|
324
294
|
|
325
295
|
# TODO: Support selecting data item described by self.node
|
@@ -332,117 +302,6 @@ class DataItemFunc(NodeFunction[_data.DataItem]):
|
|
332
302
|
return delta
|
333
303
|
|
334
304
|
|
335
|
-
class DataResultFunc(NodeFunction[ObjectBundle]):
|
336
|
-
|
337
|
-
def __init__(self, node: DataResultNode):
|
338
|
-
super().__init__()
|
339
|
-
self.node = node
|
340
|
-
|
341
|
-
def _execute(self, ctx: NodeContext) -> ObjectBundle:
|
342
|
-
|
343
|
-
data_spec = _ctx_lookup(self.node.data_save_id, ctx)
|
344
|
-
|
345
|
-
result_bundle = dict()
|
346
|
-
|
347
|
-
# Do not record output metadata for optional outputs that are empty
|
348
|
-
if data_spec.is_empty():
|
349
|
-
return result_bundle
|
350
|
-
|
351
|
-
if self.node.data_key is not None:
|
352
|
-
result_bundle[self.node.data_key] = meta.ObjectDefinition(objectType=meta.ObjectType.DATA, data=data_spec.data_def)
|
353
|
-
|
354
|
-
if self.node.file_key is not None:
|
355
|
-
result_bundle[self.node.file_key] = meta.ObjectDefinition(objectType=meta.ObjectType.FILE, file=data_spec.file_def)
|
356
|
-
|
357
|
-
if self.node.storage_key is not None:
|
358
|
-
result_bundle[self.node.storage_key] = meta.ObjectDefinition(objectType=meta.ObjectType.STORAGE, storage=data_spec.storage_def)
|
359
|
-
|
360
|
-
return result_bundle
|
361
|
-
|
362
|
-
|
363
|
-
class DynamicDataSpecFunc(NodeFunction[_data.DataSpec]):
|
364
|
-
|
365
|
-
DATA_ITEM_TEMPLATE = "data/{}/{}/{}/snap-{:d}/delta-{:d}"
|
366
|
-
STORAGE_PATH_TEMPLATE = "data/{}/{}/{}/snap-{:d}/delta-{:d}-x{:0>6x}"
|
367
|
-
|
368
|
-
RANDOM = random.Random()
|
369
|
-
RANDOM.seed()
|
370
|
-
|
371
|
-
def __init__(self, node: DynamicDataSpecNode, storage: _storage.StorageManager):
|
372
|
-
super().__init__()
|
373
|
-
self.node = node
|
374
|
-
self.storage = storage
|
375
|
-
|
376
|
-
def _execute(self, ctx: NodeContext) -> _data.DataSpec:
|
377
|
-
|
378
|
-
# When data def for an output was not supplied in the job, this function creates a dynamic data spec
|
379
|
-
|
380
|
-
if self.node.prior_data_spec is not None:
|
381
|
-
raise _ex.ETracInternal("Data updates not supported yet")
|
382
|
-
|
383
|
-
data_view = _ctx_lookup(self.node.data_view_id, ctx)
|
384
|
-
|
385
|
-
data_id = self.node.data_obj_id
|
386
|
-
storage_id = self.node.storage_obj_id
|
387
|
-
|
388
|
-
# TODO: pass the object timestamp in from somewhere
|
389
|
-
|
390
|
-
# Note that datetime.utcnow() creates a datetime with no zone
|
391
|
-
# datetime.now(utc) creates a datetime with an explicit UTC zone
|
392
|
-
# The latter is more precise, also missing zones are rejected by validation
|
393
|
-
# (lenient validation might infer the zone, this should be limited to front-facing APIs)
|
394
|
-
|
395
|
-
object_timestamp = datetime.datetime.now(datetime.timezone.utc)
|
396
|
-
|
397
|
-
part_key = meta.PartKey("part-root", meta.PartType.PART_ROOT)
|
398
|
-
snap_index = 0
|
399
|
-
delta_index = 0
|
400
|
-
|
401
|
-
data_type = data_view.trac_schema.schemaType.name.lower()
|
402
|
-
|
403
|
-
data_item = self.DATA_ITEM_TEMPLATE.format(
|
404
|
-
data_type, data_id.objectId,
|
405
|
-
part_key.opaqueKey, snap_index, delta_index)
|
406
|
-
|
407
|
-
delta = meta.DataDefinition.Delta(delta_index, data_item)
|
408
|
-
snap = meta.DataDefinition.Snap(snap_index, [delta])
|
409
|
-
part = meta.DataDefinition.Part(part_key, snap)
|
410
|
-
|
411
|
-
data_def = meta.DataDefinition()
|
412
|
-
data_def.storageId = _util.selector_for_latest(storage_id)
|
413
|
-
data_def.schema = data_view.trac_schema
|
414
|
-
data_def.parts[part_key.opaqueKey] = part
|
415
|
-
|
416
|
-
storage_key = self.storage.default_storage_key()
|
417
|
-
storage_format = self.storage.default_storage_format()
|
418
|
-
storage_suffix_bytes = random.randint(0, 1 << 24)
|
419
|
-
|
420
|
-
storage_path = self.DATA_ITEM_TEMPLATE.format(
|
421
|
-
data_type, data_id.objectId,
|
422
|
-
part_key.opaqueKey, snap_index, delta_index,
|
423
|
-
storage_suffix_bytes)
|
424
|
-
|
425
|
-
storage_copy = meta.StorageCopy(
|
426
|
-
storage_key, storage_path, storage_format,
|
427
|
-
copyStatus=meta.CopyStatus.COPY_AVAILABLE,
|
428
|
-
copyTimestamp=meta.DatetimeValue(object_timestamp.isoformat()))
|
429
|
-
|
430
|
-
storage_incarnation = meta.StorageIncarnation(
|
431
|
-
[storage_copy],
|
432
|
-
incarnationIndex=0,
|
433
|
-
incarnationTimestamp=meta.DatetimeValue(object_timestamp.isoformat()),
|
434
|
-
incarnationStatus=meta.IncarnationStatus.INCARNATION_AVAILABLE)
|
435
|
-
|
436
|
-
storage_item = meta.StorageItem([storage_incarnation])
|
437
|
-
|
438
|
-
storage_def = meta.StorageDefinition()
|
439
|
-
storage_def.dataItems[data_item] = storage_item
|
440
|
-
|
441
|
-
# Dynamic data def will always use an embedded schema (this is no ID for an external schema)
|
442
|
-
|
443
|
-
return _data.DataSpec.create_data_spec(data_item, data_def, storage_def, schema_def=None)
|
444
|
-
|
445
|
-
|
446
305
|
class _LoadSaveDataFunc(abc.ABC):
|
447
306
|
|
448
307
|
def __init__(self, storage: _storage.StorageManager):
|
@@ -458,7 +317,7 @@ class _LoadSaveDataFunc(abc.ABC):
|
|
458
317
|
else:
|
459
318
|
raise _ex.EUnexpected()
|
460
319
|
|
461
|
-
def _choose_copy(self, data_item: str, storage_def:
|
320
|
+
def _choose_copy(self, data_item: str, storage_def: _meta.StorageDefinition) -> _meta.StorageCopy:
|
462
321
|
|
463
322
|
# Metadata should be checked for consistency before a job is accepted
|
464
323
|
# An error here indicates a validation gap
|
@@ -469,15 +328,15 @@ class _LoadSaveDataFunc(abc.ABC):
|
|
469
328
|
raise _ex.EValidationGap()
|
470
329
|
|
471
330
|
incarnation = next(filter(
|
472
|
-
lambda i: i.incarnationStatus ==
|
331
|
+
lambda i: i.incarnationStatus == _meta.IncarnationStatus.INCARNATION_AVAILABLE,
|
473
332
|
reversed(storage_info.incarnations)), None)
|
474
333
|
|
475
334
|
if incarnation is None:
|
476
335
|
raise _ex.EValidationGap()
|
477
336
|
|
478
337
|
copy_ = next(filter(
|
479
|
-
lambda c: c.copyStatus ==
|
480
|
-
|
338
|
+
lambda c: c.copyStatus == _meta.CopyStatus.COPY_AVAILABLE
|
339
|
+
and self.storage.has_data_storage(c.storageKey),
|
481
340
|
incarnation.copies), None)
|
482
341
|
|
483
342
|
if copy_ is None:
|
@@ -495,7 +354,7 @@ class LoadDataFunc( _LoadSaveDataFunc, NodeFunction[_data.DataItem],):
|
|
495
354
|
def _execute(self, ctx: NodeContext) -> _data.DataItem:
|
496
355
|
|
497
356
|
data_spec = self._choose_data_spec(self.node.spec_id, self.node.spec, ctx)
|
498
|
-
data_copy = self._choose_copy(data_spec.data_item, data_spec.
|
357
|
+
data_copy = self._choose_copy(data_spec.data_item, data_spec.storage)
|
499
358
|
|
500
359
|
if data_spec.object_type == _api.ObjectType.FILE:
|
501
360
|
return self._load_file(data_copy)
|
@@ -522,12 +381,12 @@ class LoadDataFunc( _LoadSaveDataFunc, NodeFunction[_data.DataItem],):
|
|
522
381
|
|
523
382
|
def _load_table(self, data_spec, data_copy):
|
524
383
|
|
525
|
-
trac_schema = data_spec.
|
384
|
+
trac_schema = data_spec.schema if data_spec.schema else data_spec.definition.schema
|
526
385
|
arrow_schema = _data.DataMapping.trac_to_arrow_schema(trac_schema) if trac_schema else None
|
527
386
|
|
528
387
|
storage_options = dict(
|
529
388
|
(opt_key, _types.MetadataCodec.decode_value(opt_value))
|
530
|
-
for opt_key, opt_value in data_spec.
|
389
|
+
for opt_key, opt_value in data_spec.storage.storageOptions.items())
|
531
390
|
|
532
391
|
storage = self.storage.get_data_storage(data_copy.storageKey)
|
533
392
|
|
@@ -561,7 +420,7 @@ class SaveDataFunc(_LoadSaveDataFunc, NodeFunction[_data.DataSpec]):
|
|
561
420
|
|
562
421
|
# Metadata already exists as data_spec but may not contain schema, row count, file size etc.
|
563
422
|
data_spec = self._choose_data_spec(self.node.spec_id, self.node.spec, ctx)
|
564
|
-
data_copy = self._choose_copy(data_spec.data_item, data_spec.
|
423
|
+
data_copy = self._choose_copy(data_spec.data_item, data_spec.storage)
|
565
424
|
|
566
425
|
# Do not save empty outputs (optional outputs that were not produced)
|
567
426
|
if data_item.is_empty():
|
@@ -588,7 +447,7 @@ class SaveDataFunc(_LoadSaveDataFunc, NodeFunction[_data.DataSpec]):
|
|
588
447
|
storage.write_bytes(data_copy.storagePath, data_item.content)
|
589
448
|
|
590
449
|
data_spec = copy.deepcopy(data_spec)
|
591
|
-
data_spec.
|
450
|
+
data_spec.definition.size = len(data_item.content)
|
592
451
|
|
593
452
|
return data_spec
|
594
453
|
|
@@ -603,7 +462,7 @@ class SaveDataFunc(_LoadSaveDataFunc, NodeFunction[_data.DataSpec]):
|
|
603
462
|
|
604
463
|
# Decode options (metadata values) from the storage definition
|
605
464
|
options = dict()
|
606
|
-
for opt_key, opt_value in data_spec.
|
465
|
+
for opt_key, opt_value in data_spec.storage.storageOptions.items():
|
607
466
|
options[opt_key] = _types.MetadataCodec.decode_value(opt_value)
|
608
467
|
|
609
468
|
storage = self.storage.get_data_storage(data_copy.storageKey)
|
@@ -615,8 +474,8 @@ class SaveDataFunc(_LoadSaveDataFunc, NodeFunction[_data.DataSpec]):
|
|
615
474
|
data_spec = copy.deepcopy(data_spec)
|
616
475
|
# TODO: Save row count in metadata
|
617
476
|
|
618
|
-
if data_spec.
|
619
|
-
data_spec.
|
477
|
+
if data_spec.definition.schema is None and data_spec.definition.schemaId is None:
|
478
|
+
data_spec.definition.schema = _data.DataMapping.arrow_to_trac_schema(data_item.table.schema)
|
620
479
|
|
621
480
|
return data_spec
|
622
481
|
|
@@ -638,38 +497,48 @@ class SaveDataFunc(_LoadSaveDataFunc, NodeFunction[_data.DataSpec]):
|
|
638
497
|
|
639
498
|
data_spec = copy.deepcopy(data_spec)
|
640
499
|
|
641
|
-
if data_spec.
|
642
|
-
data_spec.
|
500
|
+
if data_spec.definition.schema is None and data_spec.definition.schemaId is None:
|
501
|
+
data_spec.definition.schema = data_item.trac_schema
|
643
502
|
|
644
503
|
return data_spec
|
645
504
|
|
646
|
-
def _model_def_for_import(import_details: meta.ImportModelJob):
|
647
|
-
|
648
|
-
return meta.ModelDefinition(
|
649
|
-
language=import_details.language,
|
650
|
-
repository=import_details.repository,
|
651
|
-
packageGroup=import_details.packageGroup,
|
652
|
-
package=import_details.package,
|
653
|
-
version=import_details.version,
|
654
|
-
entryPoint=import_details.entryPoint,
|
655
|
-
path=import_details.path)
|
656
505
|
|
506
|
+
# MODEL EXECUTION
|
507
|
+
# ---------------
|
657
508
|
|
658
|
-
class ImportModelFunc(NodeFunction[
|
509
|
+
class ImportModelFunc(NodeFunction[GraphOutput]):
|
659
510
|
|
660
511
|
def __init__(self, node: ImportModelNode, models: _models.ModelLoader):
|
661
512
|
super().__init__()
|
662
513
|
self.node = node
|
663
514
|
self._models = models
|
664
515
|
|
665
|
-
def _execute(self, ctx: NodeContext) ->
|
516
|
+
def _execute(self, ctx: NodeContext) -> GraphOutput:
|
666
517
|
|
667
|
-
|
518
|
+
model_id = self.node.model_id
|
668
519
|
|
669
|
-
|
520
|
+
model_stub = self._build_model_stub(self.node.import_details)
|
521
|
+
model_class = self._models.load_model_class(self.node.import_scope, model_stub)
|
670
522
|
model_def = self._models.scan_model(model_stub, model_class)
|
523
|
+
model_obj = _meta.ObjectDefinition(_meta.ObjectType.MODEL, model=model_def)
|
524
|
+
|
525
|
+
model_attrs = [
|
526
|
+
_meta.TagUpdate(_meta.TagOperation.CREATE_OR_REPLACE_ATTR, attr_name, attr_value)
|
527
|
+
for attr_name, attr_value in model_def.staticAttributes.items()]
|
528
|
+
|
529
|
+
return GraphOutput(model_id, model_obj, model_attrs)
|
530
|
+
|
531
|
+
@staticmethod
|
532
|
+
def _build_model_stub(import_details: _meta.ImportModelJob):
|
671
533
|
|
672
|
-
return
|
534
|
+
return _meta.ModelDefinition(
|
535
|
+
language=import_details.language,
|
536
|
+
repository=import_details.repository,
|
537
|
+
packageGroup=import_details.packageGroup,
|
538
|
+
package=import_details.package,
|
539
|
+
version=import_details.version,
|
540
|
+
entryPoint=import_details.entryPoint,
|
541
|
+
path=import_details.path)
|
673
542
|
|
674
543
|
|
675
544
|
class RunModelFunc(NodeFunction[Bundle[_data.DataView]]):
|
@@ -709,7 +578,7 @@ class RunModelFunc(NodeFunction[Bundle[_data.DataView]]):
|
|
709
578
|
storage_map = {}
|
710
579
|
|
711
580
|
if self.node.storage_access:
|
712
|
-
write_access = True if self.node.model_def.modelType ==
|
581
|
+
write_access = True if self.node.model_def.modelType == _meta.ModelType.DATA_EXPORT_MODEL else False
|
713
582
|
for storage_key in self.node.storage_access:
|
714
583
|
if self.storage_manager.has_file_storage(storage_key, external=True):
|
715
584
|
storage_impl = self.storage_manager.get_file_storage(storage_key, external=True)
|
@@ -729,7 +598,7 @@ class RunModelFunc(NodeFunction[Bundle[_data.DataView]]):
|
|
729
598
|
|
730
599
|
# Run the model against the mapped local context
|
731
600
|
|
732
|
-
if model_def.modelType in [
|
601
|
+
if model_def.modelType in [_meta.ModelType.DATA_IMPORT_MODEL, _meta.ModelType.DATA_EXPORT_MODEL]:
|
733
602
|
trac_ctx = _ctx.TracDataContextImpl(
|
734
603
|
self.node.model_def, self.model_class,
|
735
604
|
local_ctx, dynamic_outputs, storage_map,
|
@@ -741,7 +610,8 @@ class RunModelFunc(NodeFunction[Bundle[_data.DataView]]):
|
|
741
610
|
self.checkout_directory, self.log_provider)
|
742
611
|
|
743
612
|
try:
|
744
|
-
model = self.model_class
|
613
|
+
model = object.__new__(self.model_class)
|
614
|
+
model.__init__()
|
745
615
|
model.run_model(trac_ctx)
|
746
616
|
except _ex.ETrac:
|
747
617
|
raise
|
@@ -750,56 +620,152 @@ class RunModelFunc(NodeFunction[Bundle[_data.DataView]]):
|
|
750
620
|
msg = f"There was an unhandled error in the model: {str(e)}{details}"
|
751
621
|
raise _ex.EModelExec(msg) from e
|
752
622
|
|
753
|
-
#
|
754
|
-
|
755
|
-
model_name = self.model_class.__name__
|
623
|
+
# Buidl a result bundle for the defined model outputs
|
756
624
|
results: Bundle[_data.DataView] = dict()
|
757
|
-
new_nodes = dict()
|
758
|
-
new_deps = dict()
|
759
625
|
|
760
626
|
for output_name, output_schema in model_def.outputs.items():
|
627
|
+
output: _data.DataView = local_ctx.get(output_name)
|
628
|
+
if (output is None or output.is_empty()) and not output_schema.optional:
|
629
|
+
raise _ex.ERuntimeValidation(f"Missing required output [{output_name}] from model [{self.model_class.__name__}]")
|
630
|
+
results[output_name] = output or _data.DataView.create_empty()
|
631
|
+
|
632
|
+
# Add dynamic outputs to the model result bundle
|
633
|
+
for output_name in dynamic_outputs:
|
634
|
+
output: _data.DataView = local_ctx.get(output_name)
|
635
|
+
if output is None or output.is_empty():
|
636
|
+
raise _ex.ERuntimeValidation(f"No data provided for [{output_name}] from model [{self.model_class.__name__}]")
|
637
|
+
results[output_name] = output
|
638
|
+
|
639
|
+
# Send a graph update to include the dynamic outputs in the job result
|
640
|
+
if any(dynamic_outputs):
|
641
|
+
builder = _graph.GraphBuilder.dynamic(self.node.graph_context)
|
642
|
+
update = builder.build_dynamic_outputs(self.node.id, dynamic_outputs)
|
643
|
+
self.node_callback.send_graph_update(update)
|
644
|
+
|
645
|
+
return results
|
646
|
+
|
761
647
|
|
762
|
-
|
648
|
+
# RESULTS PROCESSING
|
649
|
+
# ------------------
|
763
650
|
|
764
|
-
|
651
|
+
class JobResultFunc(NodeFunction[_cfg.JobResult]):
|
765
652
|
|
766
|
-
|
767
|
-
|
653
|
+
def __init__(self, node: JobResultNode):
|
654
|
+
super().__init__()
|
655
|
+
self.node = node
|
768
656
|
|
769
|
-
|
770
|
-
elif result is None:
|
771
|
-
result = _data.DataView.create_empty()
|
657
|
+
def _execute(self, ctx: NodeContext) -> _cfg.JobResult:
|
772
658
|
|
773
|
-
|
659
|
+
result_def = _meta.ResultDefinition()
|
660
|
+
result_def.jobId = _util.selector_for(self.node.job_id)
|
774
661
|
|
775
|
-
|
662
|
+
job_result = _cfg.JobResult()
|
663
|
+
job_result.jobId = self.node.job_id
|
664
|
+
job_result.resultId = self.node.result_id
|
665
|
+
job_result.result = result_def
|
776
666
|
|
777
|
-
|
667
|
+
self._process_named_outputs(self.node.named_outputs, ctx, job_result)
|
668
|
+
self._process_unnamed_outputs(self.node.unnamed_outputs, ctx, job_result)
|
778
669
|
|
779
|
-
|
670
|
+
# TODO: Handle individual failed results
|
780
671
|
|
781
|
-
|
782
|
-
raise _ex.ERuntimeValidation(f"No data provided for [{output_name}] from model [{model_name}]")
|
672
|
+
result_def.statusCode = _meta.JobStatusCode.SUCCEEDED
|
783
673
|
|
784
|
-
|
674
|
+
return job_result
|
785
675
|
|
786
|
-
|
787
|
-
result_node = BundleItemNode(result_node_id, self.node.id, output_name)
|
676
|
+
def _process_named_outputs(self, named_outputs, ctx: NodeContext, job_result: _cfg.JobResult):
|
788
677
|
|
789
|
-
|
678
|
+
for output_name, output_id in named_outputs.items():
|
790
679
|
|
791
|
-
|
792
|
-
new_nodes.update(output_section.nodes)
|
680
|
+
output = _ctx_lookup(output_id, ctx)
|
793
681
|
|
794
|
-
|
795
|
-
|
682
|
+
if output_id.result_type == GraphOutput:
|
683
|
+
self._process_graph_output(output_name, output, job_result)
|
796
684
|
|
797
|
-
|
685
|
+
elif output_id.result_type == _data.DataSpec:
|
686
|
+
self._process_data_spec(output_name, output, job_result)
|
798
687
|
|
799
|
-
|
688
|
+
else:
|
689
|
+
raise _ex.EUnexpected()
|
690
|
+
|
691
|
+
def _process_unnamed_outputs(self, unnamed_outputs, ctx: NodeContext, job_result: _cfg.JobResult):
|
692
|
+
|
693
|
+
for output_id in unnamed_outputs:
|
694
|
+
|
695
|
+
output = _ctx_lookup(output_id, ctx)
|
800
696
|
|
697
|
+
if output_id.result_type == GraphOutput:
|
698
|
+
self._process_graph_output(None, output, job_result)
|
801
699
|
|
802
|
-
|
700
|
+
elif output_id.result_type == _data.DataSpec:
|
701
|
+
self._process_data_spec(None, output, job_result)
|
702
|
+
|
703
|
+
else:
|
704
|
+
raise _ex.EUnexpected()
|
705
|
+
|
706
|
+
@staticmethod
|
707
|
+
def _process_graph_output(output_name: tp.Optional[str], output: GraphOutput, job_result: _cfg.JobResult):
|
708
|
+
|
709
|
+
output_key = _util.object_key(output.objectId)
|
710
|
+
|
711
|
+
job_result.objectIds.append(output.objectId)
|
712
|
+
job_result.objects[output_key] = output.definition
|
713
|
+
|
714
|
+
if output.attrs is not None:
|
715
|
+
job_result.attrs[output_key] = _cfg.JobResultAttrs(output.attrs)
|
716
|
+
|
717
|
+
if output_name is not None:
|
718
|
+
job_result.result.outputs[output_name] = _util.selector_for(output.objectId)
|
719
|
+
|
720
|
+
@staticmethod
|
721
|
+
def _process_data_spec(output_name: tp.Optional[str], data_spec: _data.DataSpec, job_result: _cfg.JobResult):
|
722
|
+
|
723
|
+
# Do not record results for optional outputs that were not produced
|
724
|
+
if data_spec.is_empty():
|
725
|
+
return
|
726
|
+
|
727
|
+
output_id = data_spec.primary_id
|
728
|
+
output_key = _util.object_key(output_id)
|
729
|
+
output_def = data_spec.definition
|
730
|
+
|
731
|
+
if data_spec.object_type == _meta.ObjectType.DATA:
|
732
|
+
output_obj = _meta.ObjectDefinition(data_spec.object_type, data=output_def)
|
733
|
+
elif data_spec.object_type == _meta.ObjectType.FILE:
|
734
|
+
output_obj = _meta.ObjectDefinition(data_spec.object_type, file=output_def)
|
735
|
+
else:
|
736
|
+
raise _ex.EUnexpected()
|
737
|
+
|
738
|
+
storage_id = data_spec.storage_id
|
739
|
+
storage_key = _util.object_key(storage_id)
|
740
|
+
storage_def = data_spec.storage
|
741
|
+
storage_obj = _meta.ObjectDefinition(objectType=_meta.ObjectType.STORAGE, storage=storage_def)
|
742
|
+
|
743
|
+
job_result.objectIds.append(output_id)
|
744
|
+
job_result.objectIds.append(storage_id)
|
745
|
+
job_result.objects[output_key] = output_obj
|
746
|
+
job_result.objects[storage_key] = storage_obj
|
747
|
+
|
748
|
+
# Currently, jobs do not ever produce external schemas
|
749
|
+
|
750
|
+
if output_name is not None:
|
751
|
+
job_result.result.outputs[output_name] = _util.selector_for(output_id)
|
752
|
+
|
753
|
+
|
754
|
+
class DynamicOutputsFunc(NodeFunction[DynamicOutputsNode]):
|
755
|
+
|
756
|
+
def __init__(self, node: DynamicOutputsNode):
|
757
|
+
super().__init__()
|
758
|
+
self.node = node
|
759
|
+
|
760
|
+
def _execute(self, ctx: NodeContext) -> DynamicOutputsNode:
|
761
|
+
return self.node
|
762
|
+
|
763
|
+
|
764
|
+
# MISC NODE TYPES
|
765
|
+
# ---------------
|
766
|
+
|
767
|
+
|
768
|
+
class ChildJobFunc(NodeFunction[None]):
|
803
769
|
|
804
770
|
def __init__(self, node: ChildJobNode):
|
805
771
|
super().__init__()
|
@@ -866,9 +832,6 @@ class FunctionResolver:
|
|
866
832
|
def resolve_save_data(self, node: SaveDataNode):
|
867
833
|
return SaveDataFunc(node, self._storage)
|
868
834
|
|
869
|
-
def resolve_dynamic_data_spec(self, node: DynamicDataSpecNode):
|
870
|
-
return DynamicDataSpecFunc(node, self._storage)
|
871
|
-
|
872
835
|
def resolve_import_model_node(self, node: ImportModelNode):
|
873
836
|
return ImportModelFunc(node, self._models)
|
874
837
|
|
@@ -884,27 +847,25 @@ class FunctionResolver:
|
|
884
847
|
|
885
848
|
__basic_node_mapping: tp.Dict[Node.__class__, NodeFunction.__class__] = {
|
886
849
|
|
887
|
-
|
888
|
-
|
850
|
+
NoopNode: NoopFunc,
|
851
|
+
StaticValueNode: StaticValueFunc,
|
889
852
|
IdentityNode: IdentityFunc,
|
890
853
|
KeyedItemNode: KeyedItemFunc,
|
854
|
+
ContextPushNode: ContextPushFunc,
|
855
|
+
ContextPopNode: ContextPopFunc,
|
856
|
+
DataSpecNode: DataSpecFunc,
|
891
857
|
DataViewNode: DataViewFunc,
|
892
858
|
DataItemNode: DataItemFunc,
|
893
|
-
|
894
|
-
|
895
|
-
|
896
|
-
RuntimeOutputsNode: RuntimeOutputsFunc,
|
897
|
-
ChildJobNode: ChildJobFunction,
|
859
|
+
JobResultNode: JobResultFunc,
|
860
|
+
DynamicOutputsNode: DynamicOutputsFunc,
|
861
|
+
ChildJobNode: ChildJobFunc,
|
898
862
|
BundleItemNode: NoopFunc,
|
899
|
-
NoopNode: NoopFunc,
|
900
|
-
RunModelResultNode: NoopFunc
|
901
863
|
}
|
902
864
|
|
903
865
|
__node_mapping: tp.Dict[Node.__class__, __ResolveFunc] = {
|
904
866
|
|
905
867
|
LoadDataNode: resolve_load_data,
|
906
868
|
SaveDataNode: resolve_save_data,
|
907
|
-
DynamicDataSpecNode: resolve_dynamic_data_spec,
|
908
869
|
RunModelNode: resolve_run_model_node,
|
909
870
|
ImportModelNode: resolve_import_model_node
|
910
871
|
}
|