tracdap-runtime 0.8.0rc2__py3-none-any.whl → 0.9.0b2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tracdap/rt/_impl/core/config_parser.py +29 -3
- tracdap/rt/_impl/core/data.py +627 -40
- tracdap/rt/_impl/core/repos.py +17 -8
- tracdap/rt/_impl/core/storage.py +25 -13
- tracdap/rt/_impl/core/struct.py +254 -60
- tracdap/rt/_impl/core/util.py +125 -11
- tracdap/rt/_impl/exec/context.py +35 -8
- tracdap/rt/_impl/exec/dev_mode.py +169 -127
- tracdap/rt/_impl/exec/engine.py +203 -140
- tracdap/rt/_impl/exec/functions.py +228 -263
- tracdap/rt/_impl/exec/graph.py +141 -126
- tracdap/rt/_impl/exec/graph_builder.py +428 -449
- tracdap/rt/_impl/grpc/codec.py +8 -13
- tracdap/rt/_impl/grpc/server.py +7 -7
- tracdap/rt/_impl/grpc/tracdap/api/internal/runtime_pb2.py +25 -18
- tracdap/rt/_impl/grpc/tracdap/api/internal/runtime_pb2.pyi +27 -9
- tracdap/rt/_impl/grpc/tracdap/metadata/common_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/config_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/custom_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/data_pb2.py +37 -35
- tracdap/rt/_impl/grpc/tracdap/metadata/data_pb2.pyi +37 -43
- tracdap/rt/_impl/grpc/tracdap/metadata/file_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/flow_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/job_pb2.py +67 -63
- tracdap/rt/_impl/grpc/tracdap/metadata/job_pb2.pyi +11 -2
- tracdap/rt/_impl/grpc/tracdap/metadata/model_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/object_id_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/object_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/resource_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/search_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/storage_pb2.py +11 -9
- tracdap/rt/_impl/grpc/tracdap/metadata/storage_pb2.pyi +11 -2
- tracdap/rt/_impl/grpc/tracdap/metadata/tag_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/tag_update_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/type_pb2.py +23 -19
- tracdap/rt/_impl/grpc/tracdap/metadata/type_pb2.pyi +15 -2
- tracdap/rt/_impl/runtime.py +3 -9
- tracdap/rt/_impl/static_api.py +5 -6
- tracdap/rt/_plugins/format_csv.py +2 -2
- tracdap/rt/_plugins/repo_git.py +56 -11
- tracdap/rt/_plugins/storage_aws.py +165 -150
- tracdap/rt/_plugins/storage_azure.py +17 -11
- tracdap/rt/_plugins/storage_gcp.py +35 -18
- tracdap/rt/_version.py +1 -1
- tracdap/rt/api/model_api.py +45 -0
- tracdap/rt/config/__init__.py +7 -9
- tracdap/rt/config/common.py +3 -14
- tracdap/rt/config/job.py +17 -3
- tracdap/rt/config/platform.py +9 -32
- tracdap/rt/config/result.py +8 -4
- tracdap/rt/config/runtime.py +5 -10
- tracdap/rt/config/tenant.py +28 -0
- tracdap/rt/launch/cli.py +0 -8
- tracdap/rt/launch/launch.py +1 -3
- tracdap/rt/metadata/__init__.py +35 -35
- tracdap/rt/metadata/data.py +19 -31
- tracdap/rt/metadata/job.py +3 -1
- tracdap/rt/metadata/storage.py +9 -0
- tracdap/rt/metadata/type.py +9 -5
- {tracdap_runtime-0.8.0rc2.dist-info → tracdap_runtime-0.9.0b2.dist-info}/METADATA +5 -3
- {tracdap_runtime-0.8.0rc2.dist-info → tracdap_runtime-0.9.0b2.dist-info}/RECORD +64 -63
- {tracdap_runtime-0.8.0rc2.dist-info → tracdap_runtime-0.9.0b2.dist-info}/WHEEL +1 -1
- {tracdap_runtime-0.8.0rc2.dist-info → tracdap_runtime-0.9.0b2.dist-info}/licenses/LICENSE +0 -0
- {tracdap_runtime-0.8.0rc2.dist-info → tracdap_runtime-0.9.0b2.dist-info}/top_level.txt +0 -0
@@ -13,16 +13,15 @@
|
|
13
13
|
# See the License for the specific language governing permissions and
|
14
14
|
# limitations under the License.
|
15
15
|
|
16
|
-
import copy
|
17
|
-
import datetime
|
18
16
|
import abc
|
17
|
+
import copy
|
19
18
|
import io
|
20
19
|
import pathlib
|
21
|
-
import
|
22
|
-
import dataclasses as dc # noqa
|
20
|
+
import typing as tp
|
23
21
|
|
24
22
|
import tracdap.rt.api as _api
|
25
|
-
import tracdap.rt.
|
23
|
+
import tracdap.rt.metadata as _meta
|
24
|
+
import tracdap.rt.config as _cfg
|
26
25
|
import tracdap.rt.exceptions as _ex
|
27
26
|
import tracdap.rt._impl.exec.context as _ctx
|
28
27
|
import tracdap.rt._impl.exec.graph_builder as _graph
|
@@ -66,7 +65,7 @@ class NodeContext:
|
|
66
65
|
class NodeCallback:
|
67
66
|
|
68
67
|
@abc.abstractmethod
|
69
|
-
def
|
68
|
+
def send_graph_update(self, update: _graph.GraphUpdate):
|
70
69
|
pass
|
71
70
|
|
72
71
|
|
@@ -120,6 +119,9 @@ class NodeFunction(tp.Generic[_T]):
|
|
120
119
|
# ----------------------------------------------------------------------------------------------------------------------
|
121
120
|
|
122
121
|
|
122
|
+
# STATIC VALUES
|
123
|
+
# -------------
|
124
|
+
|
123
125
|
class NoopFunc(NodeFunction[None]):
|
124
126
|
|
125
127
|
def __init__(self, node: NoopNode):
|
@@ -140,6 +142,9 @@ class StaticValueFunc(NodeFunction[_T]):
|
|
140
142
|
return self.node.value
|
141
143
|
|
142
144
|
|
145
|
+
# MAPPING OPERATIONS
|
146
|
+
# ------------------
|
147
|
+
|
143
148
|
class IdentityFunc(NodeFunction[_T]):
|
144
149
|
|
145
150
|
def __init__(self, node: IdentityNode[_T]):
|
@@ -150,6 +155,18 @@ class IdentityFunc(NodeFunction[_T]):
|
|
150
155
|
return _ctx_lookup(self.node.src_id, ctx)
|
151
156
|
|
152
157
|
|
158
|
+
class KeyedItemFunc(NodeFunction[_T]):
|
159
|
+
|
160
|
+
def __init__(self, node: KeyedItemNode[_T]):
|
161
|
+
super().__init__()
|
162
|
+
self.node = node
|
163
|
+
|
164
|
+
def _execute(self, ctx: NodeContext) -> _T:
|
165
|
+
src_node_result = _ctx_lookup(self.node.src_id, ctx)
|
166
|
+
src_item = src_node_result.get(self.node.src_item)
|
167
|
+
return src_item
|
168
|
+
|
169
|
+
|
153
170
|
class _ContextPushPopFunc(NodeFunction[Bundle[tp.Any]], abc.ABC):
|
154
171
|
|
155
172
|
# This approach to context push / pop assumes all the nodes to be mapped are already available
|
@@ -195,74 +212,27 @@ class ContextPopFunc(_ContextPushPopFunc):
|
|
195
212
|
super(ContextPopFunc, self).__init__(node, self._POP)
|
196
213
|
|
197
214
|
|
198
|
-
|
215
|
+
# DATA HANDLING
|
216
|
+
# -------------
|
199
217
|
|
200
|
-
|
201
|
-
super().__init__()
|
202
|
-
self.node = node
|
203
|
-
|
204
|
-
def _execute(self, ctx: NodeContext) -> _T:
|
205
|
-
src_node_result = _ctx_lookup(self.node.src_id, ctx)
|
206
|
-
src_item = src_node_result.get(self.node.src_item)
|
207
|
-
return src_item
|
208
|
-
|
209
|
-
|
210
|
-
class RuntimeOutputsFunc(NodeFunction[JobOutputs]):
|
211
|
-
|
212
|
-
def __init__(self, node: RuntimeOutputsNode):
|
213
|
-
super().__init__()
|
214
|
-
self.node = node
|
215
|
-
|
216
|
-
def _execute(self, ctx: NodeContext) -> JobOutputs:
|
217
|
-
return self.node.outputs
|
218
|
-
|
219
|
-
|
220
|
-
class BuildJobResultFunc(NodeFunction[_config.JobResult]):
|
218
|
+
class DataSpecFunc(NodeFunction[_data.DataSpec]):
|
221
219
|
|
222
|
-
def __init__(self, node:
|
220
|
+
def __init__(self, node: DataSpecNode):
|
223
221
|
super().__init__()
|
224
222
|
self.node = node
|
225
223
|
|
226
|
-
def _execute(self, ctx: NodeContext) ->
|
227
|
-
|
228
|
-
job_result = _config.JobResult()
|
229
|
-
job_result.jobId = self.node.job_id
|
230
|
-
job_result.statusCode = meta.JobStatusCode.SUCCEEDED
|
231
|
-
|
232
|
-
if self.node.result_id is not None:
|
233
|
-
|
234
|
-
result_def = meta.ResultDefinition()
|
235
|
-
result_def.jobId = _util.selector_for(self.node.job_id)
|
236
|
-
result_def.statusCode = meta.JobStatusCode.SUCCEEDED
|
237
|
-
|
238
|
-
result_key = _util.object_key(self.node.result_id)
|
239
|
-
result_obj = meta.ObjectDefinition(objectType=meta.ObjectType.RESULT, result=result_def)
|
240
|
-
|
241
|
-
job_result.results[result_key] = result_obj
|
242
|
-
|
243
|
-
# TODO: Handle individual failed results
|
244
|
-
|
245
|
-
for obj_key, node_id in self.node.outputs.objects.items():
|
246
|
-
obj_def = _ctx_lookup(node_id, ctx)
|
247
|
-
job_result.results[obj_key] = obj_def
|
248
|
-
|
249
|
-
for bundle_id in self.node.outputs.bundles:
|
250
|
-
bundle = _ctx_lookup(bundle_id, ctx)
|
251
|
-
job_result.results.update(bundle.items())
|
252
|
-
|
253
|
-
if self.node.runtime_outputs is not None:
|
254
|
-
|
255
|
-
runtime_outputs = _ctx_lookup(self.node.runtime_outputs, ctx)
|
256
|
-
|
257
|
-
for obj_key, node_id in runtime_outputs.objects.items():
|
258
|
-
obj_def = _ctx_lookup(node_id, ctx)
|
259
|
-
job_result.results[obj_key] = obj_def
|
224
|
+
def _execute(self, ctx: NodeContext) -> _data.DataSpec:
|
260
225
|
|
261
|
-
|
262
|
-
|
263
|
-
|
226
|
+
# Get the schema from runtime data
|
227
|
+
data_view = _ctx_lookup(self.node.data_view_id, ctx)
|
228
|
+
trac_schema = data_view.trac_schema
|
264
229
|
|
265
|
-
|
230
|
+
# Common logic for building a data spec is part of the data module
|
231
|
+
return _data.build_data_spec(
|
232
|
+
self.node.data_obj_id, self.node.storage_obj_id,
|
233
|
+
self.node.context_key, trac_schema,
|
234
|
+
self.node.sys_config,
|
235
|
+
self.node.prior_data_spec)
|
266
236
|
|
267
237
|
|
268
238
|
class DataViewFunc(NodeFunction[_data.DataView]):
|
@@ -281,12 +251,12 @@ class DataViewFunc(NodeFunction[_data.DataView]):
|
|
281
251
|
return _data.DataView.create_empty(root_item.object_type)
|
282
252
|
|
283
253
|
# Handle file data views
|
284
|
-
if root_item.object_type ==
|
254
|
+
if root_item.object_type == _meta.ObjectType.FILE:
|
285
255
|
return _data.DataView.for_file_item(root_item)
|
286
256
|
|
287
257
|
# TODO: Generalize processing across DataView / DataItem types
|
288
258
|
|
289
|
-
if root_item.schema_type ==
|
259
|
+
if root_item.schema_type == _meta.SchemaType.STRUCT_SCHEMA:
|
290
260
|
view = _data.DataView.for_trac_schema(self.node.schema)
|
291
261
|
view.parts[root_part_key] = [root_item]
|
292
262
|
return view
|
@@ -294,12 +264,14 @@ class DataViewFunc(NodeFunction[_data.DataView]):
|
|
294
264
|
# Everything else is a regular data view
|
295
265
|
if self.node.schema is not None and len(self.node.schema.table.fields) > 0:
|
296
266
|
trac_schema = self.node.schema
|
267
|
+
data_view = _data.DataView.for_trac_schema(trac_schema)
|
297
268
|
else:
|
298
269
|
arrow_schema = root_item.schema
|
299
|
-
|
270
|
+
data_view = _data.DataView.for_arrow_schema(arrow_schema)
|
300
271
|
|
301
|
-
data_view = _data.
|
302
|
-
|
272
|
+
data_view = _data.DataMapping \
|
273
|
+
.add_item_to_view(data_view, root_part_key, root_item) \
|
274
|
+
.with_metadata(root_item.metadata)
|
303
275
|
|
304
276
|
return data_view
|
305
277
|
|
@@ -319,7 +291,7 @@ class DataItemFunc(NodeFunction[_data.DataItem]):
|
|
319
291
|
return _data.DataItem.create_empty(data_view.object_type)
|
320
292
|
|
321
293
|
# Handle file data views
|
322
|
-
if data_view.object_type ==
|
294
|
+
if data_view.object_type == _meta.ObjectType.FILE:
|
323
295
|
return data_view.file_item
|
324
296
|
|
325
297
|
# TODO: Support selecting data item described by self.node
|
@@ -332,117 +304,6 @@ class DataItemFunc(NodeFunction[_data.DataItem]):
|
|
332
304
|
return delta
|
333
305
|
|
334
306
|
|
335
|
-
class DataResultFunc(NodeFunction[ObjectBundle]):
|
336
|
-
|
337
|
-
def __init__(self, node: DataResultNode):
|
338
|
-
super().__init__()
|
339
|
-
self.node = node
|
340
|
-
|
341
|
-
def _execute(self, ctx: NodeContext) -> ObjectBundle:
|
342
|
-
|
343
|
-
data_spec = _ctx_lookup(self.node.data_save_id, ctx)
|
344
|
-
|
345
|
-
result_bundle = dict()
|
346
|
-
|
347
|
-
# Do not record output metadata for optional outputs that are empty
|
348
|
-
if data_spec.is_empty():
|
349
|
-
return result_bundle
|
350
|
-
|
351
|
-
if self.node.data_key is not None:
|
352
|
-
result_bundle[self.node.data_key] = meta.ObjectDefinition(objectType=meta.ObjectType.DATA, data=data_spec.data_def)
|
353
|
-
|
354
|
-
if self.node.file_key is not None:
|
355
|
-
result_bundle[self.node.file_key] = meta.ObjectDefinition(objectType=meta.ObjectType.FILE, file=data_spec.file_def)
|
356
|
-
|
357
|
-
if self.node.storage_key is not None:
|
358
|
-
result_bundle[self.node.storage_key] = meta.ObjectDefinition(objectType=meta.ObjectType.STORAGE, storage=data_spec.storage_def)
|
359
|
-
|
360
|
-
return result_bundle
|
361
|
-
|
362
|
-
|
363
|
-
class DynamicDataSpecFunc(NodeFunction[_data.DataSpec]):
|
364
|
-
|
365
|
-
DATA_ITEM_TEMPLATE = "data/{}/{}/{}/snap-{:d}/delta-{:d}"
|
366
|
-
STORAGE_PATH_TEMPLATE = "data/{}/{}/{}/snap-{:d}/delta-{:d}-x{:0>6x}"
|
367
|
-
|
368
|
-
RANDOM = random.Random()
|
369
|
-
RANDOM.seed()
|
370
|
-
|
371
|
-
def __init__(self, node: DynamicDataSpecNode, storage: _storage.StorageManager):
|
372
|
-
super().__init__()
|
373
|
-
self.node = node
|
374
|
-
self.storage = storage
|
375
|
-
|
376
|
-
def _execute(self, ctx: NodeContext) -> _data.DataSpec:
|
377
|
-
|
378
|
-
# When data def for an output was not supplied in the job, this function creates a dynamic data spec
|
379
|
-
|
380
|
-
if self.node.prior_data_spec is not None:
|
381
|
-
raise _ex.ETracInternal("Data updates not supported yet")
|
382
|
-
|
383
|
-
data_view = _ctx_lookup(self.node.data_view_id, ctx)
|
384
|
-
|
385
|
-
data_id = self.node.data_obj_id
|
386
|
-
storage_id = self.node.storage_obj_id
|
387
|
-
|
388
|
-
# TODO: pass the object timestamp in from somewhere
|
389
|
-
|
390
|
-
# Note that datetime.utcnow() creates a datetime with no zone
|
391
|
-
# datetime.now(utc) creates a datetime with an explicit UTC zone
|
392
|
-
# The latter is more precise, also missing zones are rejected by validation
|
393
|
-
# (lenient validation might infer the zone, this should be limited to front-facing APIs)
|
394
|
-
|
395
|
-
object_timestamp = datetime.datetime.now(datetime.timezone.utc)
|
396
|
-
|
397
|
-
part_key = meta.PartKey("part-root", meta.PartType.PART_ROOT)
|
398
|
-
snap_index = 0
|
399
|
-
delta_index = 0
|
400
|
-
|
401
|
-
data_type = data_view.trac_schema.schemaType.name.lower()
|
402
|
-
|
403
|
-
data_item = self.DATA_ITEM_TEMPLATE.format(
|
404
|
-
data_type, data_id.objectId,
|
405
|
-
part_key.opaqueKey, snap_index, delta_index)
|
406
|
-
|
407
|
-
delta = meta.DataDefinition.Delta(delta_index, data_item)
|
408
|
-
snap = meta.DataDefinition.Snap(snap_index, [delta])
|
409
|
-
part = meta.DataDefinition.Part(part_key, snap)
|
410
|
-
|
411
|
-
data_def = meta.DataDefinition()
|
412
|
-
data_def.storageId = _util.selector_for_latest(storage_id)
|
413
|
-
data_def.schema = data_view.trac_schema
|
414
|
-
data_def.parts[part_key.opaqueKey] = part
|
415
|
-
|
416
|
-
storage_key = self.storage.default_storage_key()
|
417
|
-
storage_format = self.storage.default_storage_format()
|
418
|
-
storage_suffix_bytes = random.randint(0, 1 << 24)
|
419
|
-
|
420
|
-
storage_path = self.DATA_ITEM_TEMPLATE.format(
|
421
|
-
data_type, data_id.objectId,
|
422
|
-
part_key.opaqueKey, snap_index, delta_index,
|
423
|
-
storage_suffix_bytes)
|
424
|
-
|
425
|
-
storage_copy = meta.StorageCopy(
|
426
|
-
storage_key, storage_path, storage_format,
|
427
|
-
copyStatus=meta.CopyStatus.COPY_AVAILABLE,
|
428
|
-
copyTimestamp=meta.DatetimeValue(object_timestamp.isoformat()))
|
429
|
-
|
430
|
-
storage_incarnation = meta.StorageIncarnation(
|
431
|
-
[storage_copy],
|
432
|
-
incarnationIndex=0,
|
433
|
-
incarnationTimestamp=meta.DatetimeValue(object_timestamp.isoformat()),
|
434
|
-
incarnationStatus=meta.IncarnationStatus.INCARNATION_AVAILABLE)
|
435
|
-
|
436
|
-
storage_item = meta.StorageItem([storage_incarnation])
|
437
|
-
|
438
|
-
storage_def = meta.StorageDefinition()
|
439
|
-
storage_def.dataItems[data_item] = storage_item
|
440
|
-
|
441
|
-
# Dynamic data def will always use an embedded schema (this is no ID for an external schema)
|
442
|
-
|
443
|
-
return _data.DataSpec.create_data_spec(data_item, data_def, storage_def, schema_def=None)
|
444
|
-
|
445
|
-
|
446
307
|
class _LoadSaveDataFunc(abc.ABC):
|
447
308
|
|
448
309
|
def __init__(self, storage: _storage.StorageManager):
|
@@ -458,7 +319,7 @@ class _LoadSaveDataFunc(abc.ABC):
|
|
458
319
|
else:
|
459
320
|
raise _ex.EUnexpected()
|
460
321
|
|
461
|
-
def _choose_copy(self, data_item: str, storage_def:
|
322
|
+
def _choose_copy(self, data_item: str, storage_def: _meta.StorageDefinition) -> _meta.StorageCopy:
|
462
323
|
|
463
324
|
# Metadata should be checked for consistency before a job is accepted
|
464
325
|
# An error here indicates a validation gap
|
@@ -469,15 +330,15 @@ class _LoadSaveDataFunc(abc.ABC):
|
|
469
330
|
raise _ex.EValidationGap()
|
470
331
|
|
471
332
|
incarnation = next(filter(
|
472
|
-
lambda i: i.incarnationStatus ==
|
333
|
+
lambda i: i.incarnationStatus == _meta.IncarnationStatus.INCARNATION_AVAILABLE,
|
473
334
|
reversed(storage_info.incarnations)), None)
|
474
335
|
|
475
336
|
if incarnation is None:
|
476
337
|
raise _ex.EValidationGap()
|
477
338
|
|
478
339
|
copy_ = next(filter(
|
479
|
-
lambda c: c.copyStatus ==
|
480
|
-
|
340
|
+
lambda c: c.copyStatus == _meta.CopyStatus.COPY_AVAILABLE
|
341
|
+
and self.storage.has_data_storage(c.storageKey),
|
481
342
|
incarnation.copies), None)
|
482
343
|
|
483
344
|
if copy_ is None:
|
@@ -495,15 +356,15 @@ class LoadDataFunc( _LoadSaveDataFunc, NodeFunction[_data.DataItem],):
|
|
495
356
|
def _execute(self, ctx: NodeContext) -> _data.DataItem:
|
496
357
|
|
497
358
|
data_spec = self._choose_data_spec(self.node.spec_id, self.node.spec, ctx)
|
498
|
-
data_copy = self._choose_copy(data_spec.data_item, data_spec.
|
359
|
+
data_copy = self._choose_copy(data_spec.data_item, data_spec.storage)
|
499
360
|
|
500
361
|
if data_spec.object_type == _api.ObjectType.FILE:
|
501
362
|
return self._load_file(data_copy)
|
502
363
|
|
503
|
-
elif data_spec.schema_type == _api.SchemaType.
|
364
|
+
elif data_spec.schema_type == _api.SchemaType.TABLE_SCHEMA:
|
504
365
|
return self._load_table(data_spec, data_copy)
|
505
366
|
|
506
|
-
elif data_spec.schema_type == _api.SchemaType.
|
367
|
+
elif data_spec.schema_type == _api.SchemaType.STRUCT_SCHEMA:
|
507
368
|
return self._load_struct(data_copy)
|
508
369
|
|
509
370
|
# TODO: Handle dynamic inputs, they should work for any schema type
|
@@ -522,12 +383,12 @@ class LoadDataFunc( _LoadSaveDataFunc, NodeFunction[_data.DataItem],):
|
|
522
383
|
|
523
384
|
def _load_table(self, data_spec, data_copy):
|
524
385
|
|
525
|
-
trac_schema = data_spec.
|
386
|
+
trac_schema = data_spec.schema if data_spec.schema else data_spec.definition.schema
|
526
387
|
arrow_schema = _data.DataMapping.trac_to_arrow_schema(trac_schema) if trac_schema else None
|
527
388
|
|
528
389
|
storage_options = dict(
|
529
390
|
(opt_key, _types.MetadataCodec.decode_value(opt_value))
|
530
|
-
for opt_key, opt_value in data_spec.
|
391
|
+
for opt_key, opt_value in data_spec.storage.storageOptions.items())
|
531
392
|
|
532
393
|
storage = self.storage.get_data_storage(data_copy.storageKey)
|
533
394
|
|
@@ -535,7 +396,9 @@ class LoadDataFunc( _LoadSaveDataFunc, NodeFunction[_data.DataItem],):
|
|
535
396
|
data_copy.storagePath, data_copy.storageFormat, arrow_schema,
|
536
397
|
storage_options=storage_options)
|
537
398
|
|
538
|
-
return _data.DataItem
|
399
|
+
return _data.DataItem \
|
400
|
+
.for_table(table, table.schema, trac_schema) \
|
401
|
+
.with_metadata(data_spec.metadata)
|
539
402
|
|
540
403
|
def _load_struct(self, data_copy):
|
541
404
|
|
@@ -561,7 +424,7 @@ class SaveDataFunc(_LoadSaveDataFunc, NodeFunction[_data.DataSpec]):
|
|
561
424
|
|
562
425
|
# Metadata already exists as data_spec but may not contain schema, row count, file size etc.
|
563
426
|
data_spec = self._choose_data_spec(self.node.spec_id, self.node.spec, ctx)
|
564
|
-
data_copy = self._choose_copy(data_spec.data_item, data_spec.
|
427
|
+
data_copy = self._choose_copy(data_spec.data_item, data_spec.storage)
|
565
428
|
|
566
429
|
# Do not save empty outputs (optional outputs that were not produced)
|
567
430
|
if data_item.is_empty():
|
@@ -570,10 +433,10 @@ class SaveDataFunc(_LoadSaveDataFunc, NodeFunction[_data.DataSpec]):
|
|
570
433
|
if data_item.object_type == _api.ObjectType.FILE:
|
571
434
|
return self._save_file(data_item, data_spec, data_copy)
|
572
435
|
|
573
|
-
elif data_item.schema_type == _api.SchemaType.
|
436
|
+
elif data_item.schema_type == _api.SchemaType.TABLE_SCHEMA:
|
574
437
|
return self._save_table(data_item, data_spec, data_copy)
|
575
438
|
|
576
|
-
elif data_item.schema_type == _api.SchemaType.
|
439
|
+
elif data_item.schema_type == _api.SchemaType.STRUCT_SCHEMA:
|
577
440
|
return self._save_struct(data_item, data_spec, data_copy)
|
578
441
|
|
579
442
|
else:
|
@@ -588,7 +451,7 @@ class SaveDataFunc(_LoadSaveDataFunc, NodeFunction[_data.DataSpec]):
|
|
588
451
|
storage.write_bytes(data_copy.storagePath, data_item.content)
|
589
452
|
|
590
453
|
data_spec = copy.deepcopy(data_spec)
|
591
|
-
data_spec.
|
454
|
+
data_spec.definition.size = len(data_item.content)
|
592
455
|
|
593
456
|
return data_spec
|
594
457
|
|
@@ -603,7 +466,7 @@ class SaveDataFunc(_LoadSaveDataFunc, NodeFunction[_data.DataSpec]):
|
|
603
466
|
|
604
467
|
# Decode options (metadata values) from the storage definition
|
605
468
|
options = dict()
|
606
|
-
for opt_key, opt_value in data_spec.
|
469
|
+
for opt_key, opt_value in data_spec.storage.storageOptions.items():
|
607
470
|
options[opt_key] = _types.MetadataCodec.decode_value(opt_value)
|
608
471
|
|
609
472
|
storage = self.storage.get_data_storage(data_copy.storageKey)
|
@@ -615,8 +478,8 @@ class SaveDataFunc(_LoadSaveDataFunc, NodeFunction[_data.DataSpec]):
|
|
615
478
|
data_spec = copy.deepcopy(data_spec)
|
616
479
|
# TODO: Save row count in metadata
|
617
480
|
|
618
|
-
if data_spec.
|
619
|
-
data_spec.
|
481
|
+
if data_spec.definition.schema is None and data_spec.definition.schemaId is None:
|
482
|
+
data_spec.definition.schema = _data.DataMapping.arrow_to_trac_schema(data_item.table.schema)
|
620
483
|
|
621
484
|
return data_spec
|
622
485
|
|
@@ -638,38 +501,48 @@ class SaveDataFunc(_LoadSaveDataFunc, NodeFunction[_data.DataSpec]):
|
|
638
501
|
|
639
502
|
data_spec = copy.deepcopy(data_spec)
|
640
503
|
|
641
|
-
if data_spec.
|
642
|
-
data_spec.
|
504
|
+
if data_spec.definition.schema is None and data_spec.definition.schemaId is None:
|
505
|
+
data_spec.definition.schema = data_item.trac_schema
|
643
506
|
|
644
507
|
return data_spec
|
645
508
|
|
646
|
-
def _model_def_for_import(import_details: meta.ImportModelJob):
|
647
|
-
|
648
|
-
return meta.ModelDefinition(
|
649
|
-
language=import_details.language,
|
650
|
-
repository=import_details.repository,
|
651
|
-
packageGroup=import_details.packageGroup,
|
652
|
-
package=import_details.package,
|
653
|
-
version=import_details.version,
|
654
|
-
entryPoint=import_details.entryPoint,
|
655
|
-
path=import_details.path)
|
656
509
|
|
510
|
+
# MODEL EXECUTION
|
511
|
+
# ---------------
|
657
512
|
|
658
|
-
class ImportModelFunc(NodeFunction[
|
513
|
+
class ImportModelFunc(NodeFunction[GraphOutput]):
|
659
514
|
|
660
515
|
def __init__(self, node: ImportModelNode, models: _models.ModelLoader):
|
661
516
|
super().__init__()
|
662
517
|
self.node = node
|
663
518
|
self._models = models
|
664
519
|
|
665
|
-
def _execute(self, ctx: NodeContext) ->
|
520
|
+
def _execute(self, ctx: NodeContext) -> GraphOutput:
|
666
521
|
|
667
|
-
|
522
|
+
model_id = self.node.model_id
|
668
523
|
|
669
|
-
|
524
|
+
model_stub = self._build_model_stub(self.node.import_details)
|
525
|
+
model_class = self._models.load_model_class(self.node.import_scope, model_stub)
|
670
526
|
model_def = self._models.scan_model(model_stub, model_class)
|
527
|
+
model_obj = _meta.ObjectDefinition(_meta.ObjectType.MODEL, model=model_def)
|
528
|
+
|
529
|
+
model_attrs = [
|
530
|
+
_meta.TagUpdate(_meta.TagOperation.CREATE_OR_REPLACE_ATTR, attr_name, attr_value)
|
531
|
+
for attr_name, attr_value in model_def.staticAttributes.items()]
|
532
|
+
|
533
|
+
return GraphOutput(model_id, model_obj, model_attrs)
|
534
|
+
|
535
|
+
@staticmethod
|
536
|
+
def _build_model_stub(import_details: _meta.ImportModelJob):
|
671
537
|
|
672
|
-
return
|
538
|
+
return _meta.ModelDefinition(
|
539
|
+
language=import_details.language,
|
540
|
+
repository=import_details.repository,
|
541
|
+
packageGroup=import_details.packageGroup,
|
542
|
+
package=import_details.package,
|
543
|
+
version=import_details.version,
|
544
|
+
entryPoint=import_details.entryPoint,
|
545
|
+
path=import_details.path)
|
673
546
|
|
674
547
|
|
675
548
|
class RunModelFunc(NodeFunction[Bundle[_data.DataView]]):
|
@@ -709,7 +582,7 @@ class RunModelFunc(NodeFunction[Bundle[_data.DataView]]):
|
|
709
582
|
storage_map = {}
|
710
583
|
|
711
584
|
if self.node.storage_access:
|
712
|
-
write_access = True if self.node.model_def.modelType ==
|
585
|
+
write_access = True if self.node.model_def.modelType == _meta.ModelType.DATA_EXPORT_MODEL else False
|
713
586
|
for storage_key in self.node.storage_access:
|
714
587
|
if self.storage_manager.has_file_storage(storage_key, external=True):
|
715
588
|
storage_impl = self.storage_manager.get_file_storage(storage_key, external=True)
|
@@ -729,7 +602,7 @@ class RunModelFunc(NodeFunction[Bundle[_data.DataView]]):
|
|
729
602
|
|
730
603
|
# Run the model against the mapped local context
|
731
604
|
|
732
|
-
if model_def.modelType in [
|
605
|
+
if model_def.modelType in [_meta.ModelType.DATA_IMPORT_MODEL, _meta.ModelType.DATA_EXPORT_MODEL]:
|
733
606
|
trac_ctx = _ctx.TracDataContextImpl(
|
734
607
|
self.node.model_def, self.model_class,
|
735
608
|
local_ctx, dynamic_outputs, storage_map,
|
@@ -741,7 +614,8 @@ class RunModelFunc(NodeFunction[Bundle[_data.DataView]]):
|
|
741
614
|
self.checkout_directory, self.log_provider)
|
742
615
|
|
743
616
|
try:
|
744
|
-
model = self.model_class
|
617
|
+
model = object.__new__(self.model_class)
|
618
|
+
model.__init__()
|
745
619
|
model.run_model(trac_ctx)
|
746
620
|
except _ex.ETrac:
|
747
621
|
raise
|
@@ -750,56 +624,152 @@ class RunModelFunc(NodeFunction[Bundle[_data.DataView]]):
|
|
750
624
|
msg = f"There was an unhandled error in the model: {str(e)}{details}"
|
751
625
|
raise _ex.EModelExec(msg) from e
|
752
626
|
|
753
|
-
#
|
754
|
-
|
755
|
-
model_name = self.model_class.__name__
|
627
|
+
# Buidl a result bundle for the defined model outputs
|
756
628
|
results: Bundle[_data.DataView] = dict()
|
757
|
-
new_nodes = dict()
|
758
|
-
new_deps = dict()
|
759
629
|
|
760
630
|
for output_name, output_schema in model_def.outputs.items():
|
631
|
+
output: _data.DataView = local_ctx.get(output_name)
|
632
|
+
if (output is None or output.is_empty()) and not output_schema.optional:
|
633
|
+
raise _ex.ERuntimeValidation(f"Missing required output [{output_name}] from model [{self.model_class.__name__}]")
|
634
|
+
results[output_name] = output or _data.DataView.create_empty()
|
635
|
+
|
636
|
+
# Add dynamic outputs to the model result bundle
|
637
|
+
for output_name in dynamic_outputs:
|
638
|
+
output: _data.DataView = local_ctx.get(output_name)
|
639
|
+
if output is None or output.is_empty():
|
640
|
+
raise _ex.ERuntimeValidation(f"No data provided for [{output_name}] from model [{self.model_class.__name__}]")
|
641
|
+
results[output_name] = output
|
642
|
+
|
643
|
+
# Send a graph update to include the dynamic outputs in the job result
|
644
|
+
if any(dynamic_outputs):
|
645
|
+
builder = _graph.GraphBuilder.dynamic(self.node.graph_context)
|
646
|
+
update = builder.build_dynamic_outputs(self.node.id, dynamic_outputs)
|
647
|
+
self.node_callback.send_graph_update(update)
|
648
|
+
|
649
|
+
return results
|
650
|
+
|
761
651
|
|
762
|
-
|
652
|
+
# RESULTS PROCESSING
|
653
|
+
# ------------------
|
763
654
|
|
764
|
-
|
655
|
+
class JobResultFunc(NodeFunction[_cfg.JobResult]):
|
765
656
|
|
766
|
-
|
767
|
-
|
657
|
+
def __init__(self, node: JobResultNode):
|
658
|
+
super().__init__()
|
659
|
+
self.node = node
|
768
660
|
|
769
|
-
|
770
|
-
elif result is None:
|
771
|
-
result = _data.DataView.create_empty()
|
661
|
+
def _execute(self, ctx: NodeContext) -> _cfg.JobResult:
|
772
662
|
|
773
|
-
|
663
|
+
result_def = _meta.ResultDefinition()
|
664
|
+
result_def.jobId = _util.selector_for(self.node.job_id)
|
774
665
|
|
775
|
-
|
666
|
+
job_result = _cfg.JobResult()
|
667
|
+
job_result.jobId = self.node.job_id
|
668
|
+
job_result.resultId = self.node.result_id
|
669
|
+
job_result.result = result_def
|
776
670
|
|
777
|
-
|
671
|
+
self._process_named_outputs(self.node.named_outputs, ctx, job_result)
|
672
|
+
self._process_unnamed_outputs(self.node.unnamed_outputs, ctx, job_result)
|
778
673
|
|
779
|
-
|
674
|
+
# TODO: Handle individual failed results
|
780
675
|
|
781
|
-
|
782
|
-
raise _ex.ERuntimeValidation(f"No data provided for [{output_name}] from model [{model_name}]")
|
676
|
+
result_def.statusCode = _meta.JobStatusCode.SUCCEEDED
|
783
677
|
|
784
|
-
|
678
|
+
return job_result
|
785
679
|
|
786
|
-
|
787
|
-
result_node = BundleItemNode(result_node_id, self.node.id, output_name)
|
680
|
+
def _process_named_outputs(self, named_outputs, ctx: NodeContext, job_result: _cfg.JobResult):
|
788
681
|
|
789
|
-
|
682
|
+
for output_name, output_id in named_outputs.items():
|
790
683
|
|
791
|
-
|
792
|
-
new_nodes.update(output_section.nodes)
|
684
|
+
output = _ctx_lookup(output_id, ctx)
|
793
685
|
|
794
|
-
|
795
|
-
|
686
|
+
if output_id.result_type == GraphOutput:
|
687
|
+
self._process_graph_output(output_name, output, job_result)
|
796
688
|
|
797
|
-
|
689
|
+
elif output_id.result_type == _data.DataSpec:
|
690
|
+
self._process_data_spec(output_name, output, job_result)
|
798
691
|
|
799
|
-
|
692
|
+
else:
|
693
|
+
raise _ex.EUnexpected()
|
694
|
+
|
695
|
+
def _process_unnamed_outputs(self, unnamed_outputs, ctx: NodeContext, job_result: _cfg.JobResult):
|
696
|
+
|
697
|
+
for output_id in unnamed_outputs:
|
698
|
+
|
699
|
+
output = _ctx_lookup(output_id, ctx)
|
800
700
|
|
701
|
+
if output_id.result_type == GraphOutput:
|
702
|
+
self._process_graph_output(None, output, job_result)
|
801
703
|
|
802
|
-
|
704
|
+
elif output_id.result_type == _data.DataSpec:
|
705
|
+
self._process_data_spec(None, output, job_result)
|
706
|
+
|
707
|
+
else:
|
708
|
+
raise _ex.EUnexpected()
|
709
|
+
|
710
|
+
@staticmethod
|
711
|
+
def _process_graph_output(output_name: tp.Optional[str], output: GraphOutput, job_result: _cfg.JobResult):
|
712
|
+
|
713
|
+
output_key = _util.object_key(output.objectId)
|
714
|
+
|
715
|
+
job_result.objectIds.append(output.objectId)
|
716
|
+
job_result.objects[output_key] = output.definition
|
717
|
+
|
718
|
+
if output.attrs is not None:
|
719
|
+
job_result.attrs[output_key] = _cfg.JobResultAttrs(output.attrs)
|
720
|
+
|
721
|
+
if output_name is not None:
|
722
|
+
job_result.result.outputs[output_name] = _util.selector_for(output.objectId)
|
723
|
+
|
724
|
+
@staticmethod
|
725
|
+
def _process_data_spec(output_name: tp.Optional[str], data_spec: _data.DataSpec, job_result: _cfg.JobResult):
|
726
|
+
|
727
|
+
# Do not record results for optional outputs that were not produced
|
728
|
+
if data_spec.is_empty():
|
729
|
+
return
|
730
|
+
|
731
|
+
output_id = data_spec.primary_id
|
732
|
+
output_key = _util.object_key(output_id)
|
733
|
+
output_def = data_spec.definition
|
734
|
+
|
735
|
+
if data_spec.object_type == _meta.ObjectType.DATA:
|
736
|
+
output_obj = _meta.ObjectDefinition(data_spec.object_type, data=output_def)
|
737
|
+
elif data_spec.object_type == _meta.ObjectType.FILE:
|
738
|
+
output_obj = _meta.ObjectDefinition(data_spec.object_type, file=output_def)
|
739
|
+
else:
|
740
|
+
raise _ex.EUnexpected()
|
741
|
+
|
742
|
+
storage_id = data_spec.storage_id
|
743
|
+
storage_key = _util.object_key(storage_id)
|
744
|
+
storage_def = data_spec.storage
|
745
|
+
storage_obj = _meta.ObjectDefinition(objectType=_meta.ObjectType.STORAGE, storage=storage_def)
|
746
|
+
|
747
|
+
job_result.objectIds.append(output_id)
|
748
|
+
job_result.objectIds.append(storage_id)
|
749
|
+
job_result.objects[output_key] = output_obj
|
750
|
+
job_result.objects[storage_key] = storage_obj
|
751
|
+
|
752
|
+
# Currently, jobs do not ever produce external schemas
|
753
|
+
|
754
|
+
if output_name is not None:
|
755
|
+
job_result.result.outputs[output_name] = _util.selector_for(output_id)
|
756
|
+
|
757
|
+
|
758
|
+
class DynamicOutputsFunc(NodeFunction[DynamicOutputsNode]):
|
759
|
+
|
760
|
+
def __init__(self, node: DynamicOutputsNode):
|
761
|
+
super().__init__()
|
762
|
+
self.node = node
|
763
|
+
|
764
|
+
def _execute(self, ctx: NodeContext) -> DynamicOutputsNode:
|
765
|
+
return self.node
|
766
|
+
|
767
|
+
|
768
|
+
# MISC NODE TYPES
|
769
|
+
# ---------------
|
770
|
+
|
771
|
+
|
772
|
+
class ChildJobFunc(NodeFunction[None]):
|
803
773
|
|
804
774
|
def __init__(self, node: ChildJobNode):
|
805
775
|
super().__init__()
|
@@ -866,9 +836,6 @@ class FunctionResolver:
|
|
866
836
|
def resolve_save_data(self, node: SaveDataNode):
|
867
837
|
return SaveDataFunc(node, self._storage)
|
868
838
|
|
869
|
-
def resolve_dynamic_data_spec(self, node: DynamicDataSpecNode):
|
870
|
-
return DynamicDataSpecFunc(node, self._storage)
|
871
|
-
|
872
839
|
def resolve_import_model_node(self, node: ImportModelNode):
|
873
840
|
return ImportModelFunc(node, self._models)
|
874
841
|
|
@@ -884,27 +851,25 @@ class FunctionResolver:
|
|
884
851
|
|
885
852
|
__basic_node_mapping: tp.Dict[Node.__class__, NodeFunction.__class__] = {
|
886
853
|
|
887
|
-
|
888
|
-
|
854
|
+
NoopNode: NoopFunc,
|
855
|
+
StaticValueNode: StaticValueFunc,
|
889
856
|
IdentityNode: IdentityFunc,
|
890
857
|
KeyedItemNode: KeyedItemFunc,
|
858
|
+
ContextPushNode: ContextPushFunc,
|
859
|
+
ContextPopNode: ContextPopFunc,
|
860
|
+
DataSpecNode: DataSpecFunc,
|
891
861
|
DataViewNode: DataViewFunc,
|
892
862
|
DataItemNode: DataItemFunc,
|
893
|
-
|
894
|
-
|
895
|
-
|
896
|
-
RuntimeOutputsNode: RuntimeOutputsFunc,
|
897
|
-
ChildJobNode: ChildJobFunction,
|
863
|
+
JobResultNode: JobResultFunc,
|
864
|
+
DynamicOutputsNode: DynamicOutputsFunc,
|
865
|
+
ChildJobNode: ChildJobFunc,
|
898
866
|
BundleItemNode: NoopFunc,
|
899
|
-
NoopNode: NoopFunc,
|
900
|
-
RunModelResultNode: NoopFunc
|
901
867
|
}
|
902
868
|
|
903
869
|
__node_mapping: tp.Dict[Node.__class__, __ResolveFunc] = {
|
904
870
|
|
905
871
|
LoadDataNode: resolve_load_data,
|
906
872
|
SaveDataNode: resolve_save_data,
|
907
|
-
DynamicDataSpecNode: resolve_dynamic_data_spec,
|
908
873
|
RunModelNode: resolve_run_model_node,
|
909
874
|
ImportModelNode: resolve_import_model_node
|
910
875
|
}
|