tracdap-runtime 0.7.1__py3-none-any.whl → 0.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tracdap/rt/_impl/core/__init__.py +14 -0
- tracdap/rt/_impl/{config_parser.py → core/config_parser.py} +36 -19
- tracdap/rt/_impl/{data.py → core/data.py} +136 -32
- tracdap/rt/_impl/core/logging.py +195 -0
- tracdap/rt/_impl/{models.py → core/models.py} +15 -12
- tracdap/rt/_impl/{repos.py → core/repos.py} +12 -3
- tracdap/rt/_impl/{schemas.py → core/schemas.py} +5 -5
- tracdap/rt/_impl/{shim.py → core/shim.py} +5 -4
- tracdap/rt/_impl/{storage.py → core/storage.py} +21 -10
- tracdap/rt/_impl/core/struct.py +547 -0
- tracdap/rt/_impl/{util.py → core/util.py} +1 -111
- tracdap/rt/_impl/{validation.py → core/validation.py} +99 -31
- tracdap/rt/_impl/exec/__init__.py +14 -0
- tracdap/rt/{_exec → _impl/exec}/actors.py +12 -14
- tracdap/rt/{_exec → _impl/exec}/context.py +228 -82
- tracdap/rt/{_exec → _impl/exec}/dev_mode.py +163 -81
- tracdap/rt/{_exec → _impl/exec}/engine.py +230 -105
- tracdap/rt/{_exec → _impl/exec}/functions.py +191 -100
- tracdap/rt/{_exec → _impl/exec}/graph.py +24 -36
- tracdap/rt/{_exec → _impl/exec}/graph_builder.py +252 -115
- tracdap/rt/_impl/grpc/codec.py +1 -1
- tracdap/rt/{_exec → _impl/grpc}/server.py +7 -6
- tracdap/rt/_impl/grpc/tracdap/api/internal/runtime_pb2.py +3 -3
- tracdap/rt/_impl/grpc/tracdap/api/internal/runtime_pb2_grpc.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/common_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/config_pb2.py +40 -0
- tracdap/rt/_impl/grpc/tracdap/metadata/config_pb2.pyi +62 -0
- tracdap/rt/_impl/grpc/tracdap/metadata/custom_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/data_pb2.py +32 -20
- tracdap/rt/_impl/grpc/tracdap/metadata/data_pb2.pyi +48 -2
- tracdap/rt/_impl/grpc/tracdap/metadata/file_pb2.py +4 -2
- tracdap/rt/_impl/grpc/tracdap/metadata/file_pb2.pyi +8 -0
- tracdap/rt/_impl/grpc/tracdap/metadata/flow_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/job_pb2.py +65 -63
- tracdap/rt/_impl/grpc/tracdap/metadata/job_pb2.pyi +16 -2
- tracdap/rt/_impl/grpc/tracdap/metadata/model_pb2.py +28 -26
- tracdap/rt/_impl/grpc/tracdap/metadata/model_pb2.pyi +14 -4
- tracdap/rt/_impl/grpc/tracdap/metadata/object_id_pb2.py +4 -4
- tracdap/rt/_impl/grpc/tracdap/metadata/object_id_pb2.pyi +6 -0
- tracdap/rt/_impl/grpc/tracdap/metadata/object_pb2.py +9 -7
- tracdap/rt/_impl/grpc/tracdap/metadata/object_pb2.pyi +12 -4
- tracdap/rt/_impl/grpc/tracdap/metadata/resource_pb2.py +18 -5
- tracdap/rt/_impl/grpc/tracdap/metadata/resource_pb2.pyi +42 -2
- tracdap/rt/_impl/grpc/tracdap/metadata/search_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/{stoarge_pb2.py → storage_pb2.py} +4 -4
- tracdap/rt/_impl/grpc/tracdap/metadata/tag_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/tag_update_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/type_pb2.py +1 -1
- tracdap/rt/{_exec → _impl}/runtime.py +32 -18
- tracdap/rt/_impl/static_api.py +65 -37
- tracdap/rt/_plugins/format_csv.py +1 -1
- tracdap/rt/_plugins/repo_git.py +56 -11
- tracdap/rt/_plugins/storage_sql.py +1 -1
- tracdap/rt/_version.py +1 -1
- tracdap/rt/api/__init__.py +5 -24
- tracdap/rt/api/constants.py +57 -0
- tracdap/rt/api/experimental.py +32 -0
- tracdap/rt/api/hook.py +26 -7
- tracdap/rt/api/model_api.py +16 -0
- tracdap/rt/api/static_api.py +265 -127
- tracdap/rt/config/__init__.py +11 -11
- tracdap/rt/config/common.py +2 -26
- tracdap/rt/config/dynamic.py +28 -0
- tracdap/rt/config/platform.py +17 -31
- tracdap/rt/config/runtime.py +2 -0
- tracdap/rt/ext/embed.py +2 -2
- tracdap/rt/ext/plugins.py +3 -3
- tracdap/rt/launch/launch.py +12 -14
- tracdap/rt/metadata/__init__.py +28 -18
- tracdap/rt/metadata/config.py +95 -0
- tracdap/rt/metadata/data.py +40 -0
- tracdap/rt/metadata/file.py +10 -0
- tracdap/rt/metadata/job.py +16 -0
- tracdap/rt/metadata/model.py +12 -2
- tracdap/rt/metadata/object.py +9 -1
- tracdap/rt/metadata/object_id.py +6 -0
- tracdap/rt/metadata/resource.py +41 -1
- {tracdap_runtime-0.7.1.dist-info → tracdap_runtime-0.8.0.dist-info}/METADATA +23 -17
- tracdap_runtime-0.8.0.dist-info/RECORD +129 -0
- {tracdap_runtime-0.7.1.dist-info → tracdap_runtime-0.8.0.dist-info}/WHEEL +1 -1
- tracdap/rt/_exec/__init__.py +0 -0
- tracdap_runtime-0.7.1.dist-info/RECORD +0 -121
- /tracdap/rt/_impl/{guard_rails.py → core/guard_rails.py} +0 -0
- /tracdap/rt/_impl/{type_system.py → core/type_system.py} +0 -0
- /tracdap/rt/_impl/grpc/tracdap/metadata/{stoarge_pb2.pyi → storage_pb2.pyi} +0 -0
- /tracdap/rt/metadata/{stoarge.py → storage.py} +0 -0
- {tracdap_runtime-0.7.1.dist-info → tracdap_runtime-0.8.0.dist-info/licenses}/LICENSE +0 -0
- {tracdap_runtime-0.7.1.dist-info → tracdap_runtime-0.8.0.dist-info}/top_level.txt +0 -0
@@ -13,27 +13,29 @@
|
|
13
13
|
# See the License for the specific language governing permissions and
|
14
14
|
# limitations under the License.
|
15
15
|
|
16
|
-
|
17
|
-
|
16
|
+
import copy
|
18
17
|
import datetime
|
19
18
|
import abc
|
19
|
+
import io
|
20
|
+
import pathlib
|
20
21
|
import random
|
21
22
|
import dataclasses as dc # noqa
|
22
23
|
|
23
24
|
import tracdap.rt.api as _api
|
24
25
|
import tracdap.rt.config as _config
|
25
26
|
import tracdap.rt.exceptions as _ex
|
26
|
-
import tracdap.rt.
|
27
|
-
import tracdap.rt.
|
28
|
-
import tracdap.rt._impl.
|
29
|
-
import tracdap.rt._impl.
|
30
|
-
import tracdap.rt._impl.
|
31
|
-
import tracdap.rt._impl.storage as _storage
|
32
|
-
import tracdap.rt._impl.
|
33
|
-
import tracdap.rt._impl.
|
27
|
+
import tracdap.rt._impl.exec.context as _ctx
|
28
|
+
import tracdap.rt._impl.exec.graph_builder as _graph
|
29
|
+
import tracdap.rt._impl.core.type_system as _types
|
30
|
+
import tracdap.rt._impl.core.data as _data
|
31
|
+
import tracdap.rt._impl.core.logging as _logging
|
32
|
+
import tracdap.rt._impl.core.storage as _storage
|
33
|
+
import tracdap.rt._impl.core.struct as _struct
|
34
|
+
import tracdap.rt._impl.core.models as _models
|
35
|
+
import tracdap.rt._impl.core.util as _util
|
34
36
|
|
35
|
-
from tracdap.rt.
|
36
|
-
from tracdap.rt.
|
37
|
+
from tracdap.rt._impl.exec.graph import *
|
38
|
+
from tracdap.rt._impl.exec.graph import _T
|
37
39
|
|
38
40
|
|
39
41
|
class NodeContext:
|
@@ -227,11 +229,22 @@ class BuildJobResultFunc(NodeFunction[_config.JobResult]):
|
|
227
229
|
job_result.jobId = self.node.job_id
|
228
230
|
job_result.statusCode = meta.JobStatusCode.SUCCEEDED
|
229
231
|
|
232
|
+
if self.node.result_id is not None:
|
233
|
+
|
234
|
+
result_def = meta.ResultDefinition()
|
235
|
+
result_def.jobId = _util.selector_for(self.node.job_id)
|
236
|
+
result_def.statusCode = meta.JobStatusCode.SUCCEEDED
|
237
|
+
|
238
|
+
result_key = _util.object_key(self.node.result_id)
|
239
|
+
result_obj = meta.ObjectDefinition(objectType=meta.ObjectType.RESULT, result=result_def)
|
240
|
+
|
241
|
+
job_result.results[result_key] = result_obj
|
242
|
+
|
230
243
|
# TODO: Handle individual failed results
|
231
244
|
|
232
|
-
for
|
245
|
+
for obj_key, node_id in self.node.outputs.objects.items():
|
233
246
|
obj_def = _ctx_lookup(node_id, ctx)
|
234
|
-
job_result.results[
|
247
|
+
job_result.results[obj_key] = obj_def
|
235
248
|
|
236
249
|
for bundle_id in self.node.outputs.bundles:
|
237
250
|
bundle = _ctx_lookup(bundle_id, ctx)
|
@@ -241,9 +254,9 @@ class BuildJobResultFunc(NodeFunction[_config.JobResult]):
|
|
241
254
|
|
242
255
|
runtime_outputs = _ctx_lookup(self.node.runtime_outputs, ctx)
|
243
256
|
|
244
|
-
for
|
257
|
+
for obj_key, node_id in runtime_outputs.objects.items():
|
245
258
|
obj_def = _ctx_lookup(node_id, ctx)
|
246
|
-
job_result.results[
|
259
|
+
job_result.results[obj_key] = obj_def
|
247
260
|
|
248
261
|
for bundle_id in runtime_outputs.bundles:
|
249
262
|
bundle = _ctx_lookup(bundle_id, ctx)
|
@@ -252,37 +265,6 @@ class BuildJobResultFunc(NodeFunction[_config.JobResult]):
|
|
252
265
|
return job_result
|
253
266
|
|
254
267
|
|
255
|
-
class SaveJobResultFunc(NodeFunction[None]):
|
256
|
-
|
257
|
-
def __init__(self, node: SaveJobResultNode):
|
258
|
-
super().__init__()
|
259
|
-
self.node = node
|
260
|
-
|
261
|
-
def _execute(self, ctx: NodeContext) -> None:
|
262
|
-
|
263
|
-
job_result = _ctx_lookup(self.node.job_result_id, ctx)
|
264
|
-
|
265
|
-
if not self.node.result_spec.save_result:
|
266
|
-
return None
|
267
|
-
|
268
|
-
job_result_format = self.node.result_spec.result_format
|
269
|
-
job_result_str = _cfg_p.ConfigQuoter.quote(job_result, job_result_format)
|
270
|
-
job_result_bytes = bytes(job_result_str, "utf-8")
|
271
|
-
|
272
|
-
job_key = _util.object_key(job_result.jobId)
|
273
|
-
job_result_file = f"job_result_{job_key}.{self.node.result_spec.result_format}"
|
274
|
-
job_result_path = pathlib \
|
275
|
-
.Path(self.node.result_spec.result_dir) \
|
276
|
-
.joinpath(job_result_file)
|
277
|
-
|
278
|
-
_util.logger_for_object(self).info(f"Saving job result to [{job_result_path}]")
|
279
|
-
|
280
|
-
with open(job_result_path, "xb") as result_stream:
|
281
|
-
result_stream.write(job_result_bytes)
|
282
|
-
|
283
|
-
return None
|
284
|
-
|
285
|
-
|
286
268
|
class DataViewFunc(NodeFunction[_data.DataView]):
|
287
269
|
|
288
270
|
def __init__(self, node: DataViewNode):
|
@@ -296,8 +278,20 @@ class DataViewFunc(NodeFunction[_data.DataView]):
|
|
296
278
|
|
297
279
|
# Map empty item -> emtpy view (for optional inputs not supplied)
|
298
280
|
if root_item.is_empty():
|
299
|
-
return _data.DataView.create_empty()
|
281
|
+
return _data.DataView.create_empty(root_item.object_type)
|
282
|
+
|
283
|
+
# Handle file data views
|
284
|
+
if root_item.object_type == meta.ObjectType.FILE:
|
285
|
+
return _data.DataView.for_file_item(root_item)
|
300
286
|
|
287
|
+
# TODO: Generalize processing across DataView / DataItem types
|
288
|
+
|
289
|
+
if root_item.schema_type == meta.SchemaType.STRUCT:
|
290
|
+
view = _data.DataView.for_trac_schema(self.node.schema)
|
291
|
+
view.parts[root_part_key] = [root_item]
|
292
|
+
return view
|
293
|
+
|
294
|
+
# Everything else is a regular data view
|
301
295
|
if self.node.schema is not None and len(self.node.schema.table.fields) > 0:
|
302
296
|
trac_schema = self.node.schema
|
303
297
|
else:
|
@@ -322,7 +316,11 @@ class DataItemFunc(NodeFunction[_data.DataItem]):
|
|
322
316
|
|
323
317
|
# Map empty view -> emtpy item (for optional outputs not supplied)
|
324
318
|
if data_view.is_empty():
|
325
|
-
return _data.DataItem.create_empty()
|
319
|
+
return _data.DataItem.create_empty(data_view.object_type)
|
320
|
+
|
321
|
+
# Handle file data views
|
322
|
+
if data_view.object_type == meta.ObjectType.FILE:
|
323
|
+
return data_view.file_item
|
326
324
|
|
327
325
|
# TODO: Support selecting data item described by self.node
|
328
326
|
|
@@ -342,25 +340,24 @@ class DataResultFunc(NodeFunction[ObjectBundle]):
|
|
342
340
|
|
343
341
|
def _execute(self, ctx: NodeContext) -> ObjectBundle:
|
344
342
|
|
345
|
-
|
343
|
+
data_spec = _ctx_lookup(self.node.data_save_id, ctx)
|
346
344
|
|
347
|
-
|
348
|
-
if data_item.is_empty():
|
349
|
-
return {}
|
345
|
+
result_bundle = dict()
|
350
346
|
|
351
|
-
|
347
|
+
# Do not record output metadata for optional outputs that are empty
|
348
|
+
if data_spec.is_empty():
|
349
|
+
return result_bundle
|
352
350
|
|
353
|
-
|
354
|
-
|
351
|
+
if self.node.data_key is not None:
|
352
|
+
result_bundle[self.node.data_key] = meta.ObjectDefinition(objectType=meta.ObjectType.DATA, data=data_spec.data_def)
|
355
353
|
|
356
|
-
|
357
|
-
|
354
|
+
if self.node.file_key is not None:
|
355
|
+
result_bundle[self.node.file_key] = meta.ObjectDefinition(objectType=meta.ObjectType.FILE, file=data_spec.file_def)
|
358
356
|
|
359
|
-
|
360
|
-
self.node.
|
361
|
-
self.node.storage_key: storage_result}
|
357
|
+
if self.node.storage_key is not None:
|
358
|
+
result_bundle[self.node.storage_key] = meta.ObjectDefinition(objectType=meta.ObjectType.STORAGE, storage=data_spec.storage_def)
|
362
359
|
|
363
|
-
return
|
360
|
+
return result_bundle
|
364
361
|
|
365
362
|
|
366
363
|
class DynamicDataSpecFunc(NodeFunction[_data.DataSpec]):
|
@@ -443,11 +440,7 @@ class DynamicDataSpecFunc(NodeFunction[_data.DataSpec]):
|
|
443
440
|
|
444
441
|
# Dynamic data def will always use an embedded schema (this is no ID for an external schema)
|
445
442
|
|
446
|
-
return _data.DataSpec(
|
447
|
-
data_item,
|
448
|
-
data_def,
|
449
|
-
storage_def,
|
450
|
-
schema_def=None)
|
443
|
+
return _data.DataSpec.create_data_spec(data_item, data_def, storage_def, schema_def=None)
|
451
444
|
|
452
445
|
|
453
446
|
class _LoadSaveDataFunc(abc.ABC):
|
@@ -455,6 +448,16 @@ class _LoadSaveDataFunc(abc.ABC):
|
|
455
448
|
def __init__(self, storage: _storage.StorageManager):
|
456
449
|
self.storage = storage
|
457
450
|
|
451
|
+
@classmethod
|
452
|
+
def _choose_data_spec(cls, spec_id, spec, ctx: NodeContext):
|
453
|
+
|
454
|
+
if spec_id is not None:
|
455
|
+
return _ctx_lookup(spec_id, ctx)
|
456
|
+
elif spec is not None:
|
457
|
+
return spec
|
458
|
+
else:
|
459
|
+
raise _ex.EUnexpected()
|
460
|
+
|
458
461
|
def _choose_copy(self, data_item: str, storage_def: meta.StorageDefinition) -> meta.StorageCopy:
|
459
462
|
|
460
463
|
# Metadata should be checked for consistency before a job is accepted
|
@@ -491,54 +494,111 @@ class LoadDataFunc( _LoadSaveDataFunc, NodeFunction[_data.DataItem],):
|
|
491
494
|
|
492
495
|
def _execute(self, ctx: NodeContext) -> _data.DataItem:
|
493
496
|
|
494
|
-
data_spec =
|
497
|
+
data_spec = self._choose_data_spec(self.node.spec_id, self.node.spec, ctx)
|
495
498
|
data_copy = self._choose_copy(data_spec.data_item, data_spec.storage_def)
|
496
|
-
|
499
|
+
|
500
|
+
if data_spec.object_type == _api.ObjectType.FILE:
|
501
|
+
return self._load_file(data_copy)
|
502
|
+
|
503
|
+
elif data_spec.schema_type == _api.SchemaType.TABLE:
|
504
|
+
return self._load_table(data_spec, data_copy)
|
505
|
+
|
506
|
+
elif data_spec.schema_type == _api.SchemaType.STRUCT:
|
507
|
+
return self._load_struct(data_copy)
|
508
|
+
|
509
|
+
# TODO: Handle dynamic inputs, they should work for any schema type
|
510
|
+
elif data_spec.schema_type == _api.SchemaType.SCHEMA_TYPE_NOT_SET:
|
511
|
+
return self._load_table(data_spec, data_copy)
|
512
|
+
|
513
|
+
else:
|
514
|
+
raise _ex.EUnexpected()
|
515
|
+
|
516
|
+
def _load_file(self, data_copy):
|
517
|
+
|
518
|
+
storage = self.storage.get_file_storage(data_copy.storageKey)
|
519
|
+
content = storage.read_bytes(data_copy.storagePath)
|
520
|
+
|
521
|
+
return _data.DataItem.for_file_content(content)
|
522
|
+
|
523
|
+
def _load_table(self, data_spec, data_copy):
|
497
524
|
|
498
525
|
trac_schema = data_spec.schema_def if data_spec.schema_def else data_spec.data_def.schema
|
499
526
|
arrow_schema = _data.DataMapping.trac_to_arrow_schema(trac_schema) if trac_schema else None
|
500
527
|
|
501
|
-
|
502
|
-
|
503
|
-
|
504
|
-
|
528
|
+
storage_options = dict(
|
529
|
+
(opt_key, _types.MetadataCodec.decode_value(opt_value))
|
530
|
+
for opt_key, opt_value in data_spec.storage_def.storageOptions.items())
|
531
|
+
|
532
|
+
storage = self.storage.get_data_storage(data_copy.storageKey)
|
533
|
+
|
534
|
+
table = storage.read_table(
|
535
|
+
data_copy.storagePath, data_copy.storageFormat, arrow_schema,
|
536
|
+
storage_options=storage_options)
|
537
|
+
|
538
|
+
return _data.DataItem.for_table(table, table.schema, trac_schema)
|
505
539
|
|
506
|
-
|
507
|
-
data_copy.storagePath,
|
508
|
-
data_copy.storageFormat,
|
509
|
-
arrow_schema,
|
510
|
-
storage_options=options)
|
540
|
+
def _load_struct(self, data_copy):
|
511
541
|
|
512
|
-
|
542
|
+
storage = self.storage.get_file_storage(data_copy.storageKey)
|
513
543
|
|
544
|
+
with storage.read_byte_stream(data_copy.storagePath) as stream:
|
545
|
+
with io.TextIOWrapper(stream, "utf-8") as text_stream:
|
546
|
+
struct = _struct.StructProcessor.load_struct(text_stream, data_copy.storageFormat)
|
514
547
|
|
515
|
-
|
548
|
+
return _data.DataItem.for_struct(struct)
|
549
|
+
|
550
|
+
|
551
|
+
class SaveDataFunc(_LoadSaveDataFunc, NodeFunction[_data.DataSpec]):
|
516
552
|
|
517
553
|
def __init__(self, node: SaveDataNode, storage: _storage.StorageManager):
|
518
554
|
super().__init__(storage)
|
519
555
|
self.node = node
|
520
556
|
|
521
|
-
def _execute(self, ctx: NodeContext):
|
557
|
+
def _execute(self, ctx: NodeContext) -> _data.DataSpec:
|
522
558
|
|
523
559
|
# Item to be saved should exist in the current context
|
524
560
|
data_item = _ctx_lookup(self.node.data_item_id, ctx)
|
525
561
|
|
562
|
+
# Metadata already exists as data_spec but may not contain schema, row count, file size etc.
|
563
|
+
data_spec = self._choose_data_spec(self.node.spec_id, self.node.spec, ctx)
|
564
|
+
data_copy = self._choose_copy(data_spec.data_item, data_spec.storage_def)
|
565
|
+
|
526
566
|
# Do not save empty outputs (optional outputs that were not produced)
|
527
567
|
if data_item.is_empty():
|
528
|
-
return
|
568
|
+
return _data.DataSpec.create_empty_spec(data_item.object_type, data_item.schema_type)
|
529
569
|
|
530
|
-
|
531
|
-
|
570
|
+
if data_item.object_type == _api.ObjectType.FILE:
|
571
|
+
return self._save_file(data_item, data_spec, data_copy)
|
532
572
|
|
533
|
-
|
534
|
-
|
535
|
-
|
573
|
+
elif data_item.schema_type == _api.SchemaType.TABLE:
|
574
|
+
return self._save_table(data_item, data_spec, data_copy)
|
575
|
+
|
576
|
+
elif data_item.schema_type == _api.SchemaType.STRUCT:
|
577
|
+
return self._save_struct(data_item, data_spec, data_copy)
|
578
|
+
|
579
|
+
else:
|
580
|
+
raise _ex.EUnexpected()
|
581
|
+
|
582
|
+
def _save_file(self, data_item, data_spec, data_copy):
|
583
|
+
|
584
|
+
if data_item.content is None:
|
585
|
+
raise _ex.EUnexpected()
|
586
|
+
|
587
|
+
storage = self.storage.get_file_storage(data_copy.storageKey)
|
588
|
+
storage.write_bytes(data_copy.storagePath, data_item.content)
|
589
|
+
|
590
|
+
data_spec = copy.deepcopy(data_spec)
|
591
|
+
data_spec.file_def.size = len(data_item.content)
|
592
|
+
|
593
|
+
return data_spec
|
594
|
+
|
595
|
+
def _save_table(self, data_item, data_spec, data_copy):
|
536
596
|
|
537
597
|
# Current implementation will always put an Arrow table in the data item
|
538
598
|
# Empty tables are allowed, so explicitly check if table is None
|
539
599
|
# Testing "if not data_item.table" will fail for empty tables
|
540
600
|
|
541
|
-
if data_item.
|
601
|
+
if data_item.content is None:
|
542
602
|
raise _ex.EUnexpected()
|
543
603
|
|
544
604
|
# Decode options (metadata values) from the storage definition
|
@@ -546,11 +606,42 @@ class SaveDataFunc(_LoadSaveDataFunc, NodeFunction[None]):
|
|
546
606
|
for opt_key, opt_value in data_spec.storage_def.storageOptions.items():
|
547
607
|
options[opt_key] = _types.MetadataCodec.decode_value(opt_value)
|
548
608
|
|
549
|
-
|
609
|
+
storage = self.storage.get_data_storage(data_copy.storageKey)
|
610
|
+
storage.write_table(
|
550
611
|
data_copy.storagePath, data_copy.storageFormat,
|
551
|
-
data_item.
|
612
|
+
data_item.content,
|
552
613
|
storage_options=options, overwrite=False)
|
553
614
|
|
615
|
+
data_spec = copy.deepcopy(data_spec)
|
616
|
+
# TODO: Save row count in metadata
|
617
|
+
|
618
|
+
if data_spec.data_def.schema is None and data_spec.data_def.schemaId is None:
|
619
|
+
data_spec.data_def.schema = _data.DataMapping.arrow_to_trac_schema(data_item.table.schema)
|
620
|
+
|
621
|
+
return data_spec
|
622
|
+
|
623
|
+
def _save_struct(self, data_item, data_spec, data_copy):
|
624
|
+
|
625
|
+
if data_item.content is None:
|
626
|
+
raise _ex.EUnexpected()
|
627
|
+
|
628
|
+
struct_data = data_item.content
|
629
|
+
storage_format = data_copy.storageFormat
|
630
|
+
|
631
|
+
storage = self.storage.get_file_storage(data_copy.storageKey)
|
632
|
+
|
633
|
+
# Using the text wrapper closes the stream early, which is inefficient in the data layer
|
634
|
+
# Supporting text IO directly from the storage API would allow working with text streams more naturally
|
635
|
+
with storage.write_byte_stream(data_copy.storagePath) as stream:
|
636
|
+
with io.TextIOWrapper(stream, "utf-8") as text_stream:
|
637
|
+
_struct.StructProcessor.save_struct(struct_data, text_stream, storage_format)
|
638
|
+
|
639
|
+
data_spec = copy.deepcopy(data_spec)
|
640
|
+
|
641
|
+
if data_spec.data_def.schema is None and data_spec.data_def.schemaId is None:
|
642
|
+
data_spec.data_def.schema = data_item.trac_schema
|
643
|
+
|
644
|
+
return data_spec
|
554
645
|
|
555
646
|
def _model_def_for_import(import_details: meta.ImportModelJob):
|
556
647
|
|
@@ -571,8 +662,6 @@ class ImportModelFunc(NodeFunction[meta.ObjectDefinition]):
|
|
571
662
|
self.node = node
|
572
663
|
self._models = models
|
573
664
|
|
574
|
-
self._log = _util.logger_for_object(self)
|
575
|
-
|
576
665
|
def _execute(self, ctx: NodeContext) -> meta.ObjectDefinition:
|
577
666
|
|
578
667
|
model_stub = _model_def_for_import(self.node.import_details)
|
@@ -589,13 +678,15 @@ class RunModelFunc(NodeFunction[Bundle[_data.DataView]]):
|
|
589
678
|
self, node: RunModelNode,
|
590
679
|
model_class: _api.TracModel.__class__,
|
591
680
|
checkout_directory: pathlib.Path,
|
592
|
-
storage_manager: _storage.StorageManager
|
681
|
+
storage_manager: _storage.StorageManager,
|
682
|
+
log_provider: _logging.LogProvider):
|
593
683
|
|
594
684
|
super().__init__()
|
595
685
|
self.node = node
|
596
686
|
self.model_class = model_class
|
597
687
|
self.checkout_directory = checkout_directory
|
598
688
|
self.storage_manager = storage_manager
|
689
|
+
self.log_provider = log_provider
|
599
690
|
|
600
691
|
def _execute(self, ctx: NodeContext) -> Bundle[_data.DataView]:
|
601
692
|
|
@@ -622,7 +713,7 @@ class RunModelFunc(NodeFunction[Bundle[_data.DataView]]):
|
|
622
713
|
for storage_key in self.node.storage_access:
|
623
714
|
if self.storage_manager.has_file_storage(storage_key, external=True):
|
624
715
|
storage_impl = self.storage_manager.get_file_storage(storage_key, external=True)
|
625
|
-
storage = _ctx.TracFileStorageImpl(storage_key, storage_impl, write_access, self.checkout_directory)
|
716
|
+
storage = _ctx.TracFileStorageImpl(storage_key, storage_impl, write_access, self.checkout_directory, self.log_provider)
|
626
717
|
storage_map[storage_key] = storage
|
627
718
|
elif self.storage_manager.has_data_storage(storage_key, external=True):
|
628
719
|
storage_impl = self.storage_manager.get_data_storage(storage_key, external=True)
|
@@ -630,7 +721,7 @@ class RunModelFunc(NodeFunction[Bundle[_data.DataView]]):
|
|
630
721
|
if not isinstance(storage_impl, _storage.IDataStorageBase):
|
631
722
|
raise _ex.EStorageConfig(f"External storage for [{storage_key}] is using the legacy storage framework]")
|
632
723
|
converter = _data.DataConverter.noop()
|
633
|
-
storage = _ctx.TracDataStorageImpl(storage_key, storage_impl, converter, write_access, self.checkout_directory)
|
724
|
+
storage = _ctx.TracDataStorageImpl(storage_key, storage_impl, converter, write_access, self.checkout_directory, self.log_provider)
|
634
725
|
storage_map[storage_key] = storage
|
635
726
|
else:
|
636
727
|
raise _ex.EStorageConfig(f"External storage is not available: [{storage_key}]")
|
@@ -642,12 +733,12 @@ class RunModelFunc(NodeFunction[Bundle[_data.DataView]]):
|
|
642
733
|
trac_ctx = _ctx.TracDataContextImpl(
|
643
734
|
self.node.model_def, self.model_class,
|
644
735
|
local_ctx, dynamic_outputs, storage_map,
|
645
|
-
self.checkout_directory)
|
736
|
+
self.checkout_directory, self.log_provider)
|
646
737
|
else:
|
647
738
|
trac_ctx = _ctx.TracContextImpl(
|
648
739
|
self.node.model_def, self.model_class,
|
649
740
|
local_ctx, dynamic_outputs,
|
650
|
-
self.checkout_directory)
|
741
|
+
self.checkout_directory, self.log_provider)
|
651
742
|
|
652
743
|
try:
|
653
744
|
model = self.model_class()
|
@@ -750,9 +841,10 @@ class FunctionResolver:
|
|
750
841
|
|
751
842
|
__ResolveFunc = tp.Callable[['FunctionResolver', Node[_T]], NodeFunction[_T]]
|
752
843
|
|
753
|
-
def __init__(self, models: _models.ModelLoader, storage: _storage.StorageManager):
|
844
|
+
def __init__(self, models: _models.ModelLoader, storage: _storage.StorageManager, log_provider: _logging.LogProvider):
|
754
845
|
self._models = models
|
755
846
|
self._storage = storage
|
847
|
+
self._log_provider = log_provider
|
756
848
|
|
757
849
|
def resolve_node(self, node: Node[_T]) -> NodeFunction[_T]:
|
758
850
|
|
@@ -788,7 +880,7 @@ class FunctionResolver:
|
|
788
880
|
checkout_directory = self._models.model_load_checkout_directory(node.model_scope, node.model_def)
|
789
881
|
storage_manager = self._storage if node.storage_access else None
|
790
882
|
|
791
|
-
return RunModelFunc(node, model_class, checkout_directory, storage_manager)
|
883
|
+
return RunModelFunc(node, model_class, checkout_directory, storage_manager, self._log_provider)
|
792
884
|
|
793
885
|
__basic_node_mapping: tp.Dict[Node.__class__, NodeFunction.__class__] = {
|
794
886
|
|
@@ -799,7 +891,6 @@ class FunctionResolver:
|
|
799
891
|
DataViewNode: DataViewFunc,
|
800
892
|
DataItemNode: DataItemFunc,
|
801
893
|
BuildJobResultNode: BuildJobResultFunc,
|
802
|
-
SaveJobResultNode: SaveJobResultFunc,
|
803
894
|
DataResultNode: DataResultFunc,
|
804
895
|
StaticValueNode: StaticValueFunc,
|
805
896
|
RuntimeOutputsNode: RuntimeOutputsFunc,
|
@@ -13,11 +13,10 @@
|
|
13
13
|
# See the License for the specific language governing permissions and
|
14
14
|
# limitations under the License.
|
15
15
|
|
16
|
-
import pathlib
|
17
16
|
import typing as tp
|
18
17
|
import dataclasses as dc
|
19
18
|
|
20
|
-
import tracdap.rt._impl.data as _data
|
19
|
+
import tracdap.rt._impl.core.data as _data
|
21
20
|
import tracdap.rt.metadata as meta
|
22
21
|
import tracdap.rt.config as cfg
|
23
22
|
|
@@ -182,15 +181,6 @@ class JobOutputs:
|
|
182
181
|
bundles: tp.List[NodeId[ObjectBundle]] = dc.field(default_factory=list)
|
183
182
|
|
184
183
|
|
185
|
-
# TODO: Where does this go?
|
186
|
-
@dc.dataclass(frozen=True)
|
187
|
-
class JobResultSpec:
|
188
|
-
|
189
|
-
save_result: bool = False
|
190
|
-
result_dir: tp.Union[str, pathlib.Path] = None
|
191
|
-
result_format: str = None
|
192
|
-
|
193
|
-
|
194
184
|
# ----------------------------------------------------------------------------------------------------------------------
|
195
185
|
# NODE DEFINITIONS
|
196
186
|
# ----------------------------------------------------------------------------------------------------------------------
|
@@ -309,20 +299,18 @@ class DataItemNode(MappingNode[_data.DataItem]):
|
|
309
299
|
@_node_type
|
310
300
|
class DataResultNode(Node[ObjectBundle]):
|
311
301
|
|
302
|
+
# TODO: Remove this node type
|
303
|
+
# Either produce metadata in SaveDataNode, or handle DataSpec outputs in result processing nodes
|
304
|
+
|
312
305
|
output_name: str
|
313
|
-
|
314
|
-
data_spec_id: NodeId[_data.DataSpec]
|
315
|
-
data_save_id: NodeId[type(None)]
|
306
|
+
data_save_id: NodeId[_data.DataSpec]
|
316
307
|
|
317
|
-
data_key: str
|
318
|
-
|
308
|
+
data_key: str = None
|
309
|
+
file_key: str = None
|
310
|
+
storage_key: str = None
|
319
311
|
|
320
312
|
def _node_dependencies(self) -> tp.Dict[NodeId, DependencyType]:
|
321
|
-
|
322
|
-
return {
|
323
|
-
self.data_item_id: DependencyType.HARD,
|
324
|
-
self.data_spec_id: DependencyType.HARD,
|
325
|
-
self.data_save_id: DependencyType.HARD}
|
313
|
+
return {self.data_save_id: DependencyType.HARD}
|
326
314
|
|
327
315
|
|
328
316
|
@_node_type
|
@@ -333,24 +321,33 @@ class LoadDataNode(Node[_data.DataItem]):
|
|
333
321
|
The latest incarnation of the item will be loaded from any available copy
|
334
322
|
"""
|
335
323
|
|
336
|
-
spec_id: NodeId[_data.DataSpec]
|
324
|
+
spec_id: tp.Optional[NodeId[_data.DataSpec]] = None
|
325
|
+
spec: tp.Optional[_data.DataSpec] = None
|
337
326
|
|
338
327
|
def _node_dependencies(self) -> tp.Dict[NodeId, DependencyType]:
|
339
|
-
|
328
|
+
deps = dict()
|
329
|
+
if self.spec_id is not None:
|
330
|
+
deps[self.spec_id] = DependencyType.HARD
|
331
|
+
return deps
|
340
332
|
|
341
333
|
|
342
334
|
@_node_type
|
343
|
-
class SaveDataNode(Node[
|
335
|
+
class SaveDataNode(Node[_data.DataSpec]):
|
344
336
|
|
345
337
|
"""
|
346
338
|
Save an individual data item to storage
|
347
339
|
"""
|
348
340
|
|
349
|
-
spec_id: NodeId[_data.DataSpec]
|
350
341
|
data_item_id: NodeId[_data.DataItem]
|
351
342
|
|
343
|
+
spec_id: tp.Optional[NodeId[_data.DataSpec]] = None
|
344
|
+
spec: tp.Optional[_data.DataSpec] = None
|
345
|
+
|
352
346
|
def _node_dependencies(self) -> tp.Dict[NodeId, DependencyType]:
|
353
|
-
|
347
|
+
deps = {self.data_item_id: DependencyType.HARD}
|
348
|
+
if self.spec_id is not None:
|
349
|
+
deps[self.spec_id] = DependencyType.HARD
|
350
|
+
return deps
|
354
351
|
|
355
352
|
|
356
353
|
@_node_type
|
@@ -395,6 +392,7 @@ class RuntimeOutputsNode(Node[JobOutputs]):
|
|
395
392
|
@_node_type
|
396
393
|
class BuildJobResultNode(Node[cfg.JobResult]):
|
397
394
|
|
395
|
+
result_id: meta.TagHeader
|
398
396
|
job_id: meta.TagHeader
|
399
397
|
|
400
398
|
outputs: JobOutputs
|
@@ -407,16 +405,6 @@ class BuildJobResultNode(Node[cfg.JobResult]):
|
|
407
405
|
return {node_id: DependencyType.HARD for node_id in dep_ids}
|
408
406
|
|
409
407
|
|
410
|
-
@_node_type
|
411
|
-
class SaveJobResultNode(Node[None]):
|
412
|
-
|
413
|
-
job_result_id: NodeId[cfg.JobResult]
|
414
|
-
result_spec: JobResultSpec
|
415
|
-
|
416
|
-
def _node_dependencies(self) -> tp.Dict[NodeId, DependencyType]:
|
417
|
-
return {self.job_result_id: DependencyType.HARD}
|
418
|
-
|
419
|
-
|
420
408
|
@_node_type
|
421
409
|
class ChildJobNode(Node[cfg.JobResult]):
|
422
410
|
|