tracdap-runtime 0.7.0rc1__py3-none-any.whl → 0.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tracdap/rt/_impl/core/__init__.py +14 -0
- tracdap/rt/_impl/{config_parser.py → core/config_parser.py} +61 -36
- tracdap/rt/_impl/{data.py → core/data.py} +136 -32
- tracdap/rt/_impl/core/logging.py +195 -0
- tracdap/rt/_impl/{models.py → core/models.py} +15 -12
- tracdap/rt/_impl/{repos.py → core/repos.py} +12 -3
- tracdap/rt/_impl/{schemas.py → core/schemas.py} +5 -5
- tracdap/rt/_impl/{shim.py → core/shim.py} +5 -4
- tracdap/rt/_impl/{storage.py → core/storage.py} +21 -10
- tracdap/rt/_impl/core/struct.py +547 -0
- tracdap/rt/_impl/{type_system.py → core/type_system.py} +73 -33
- tracdap/rt/_impl/{util.py → core/util.py} +1 -111
- tracdap/rt/_impl/{validation.py → core/validation.py} +99 -31
- tracdap/rt/_impl/exec/__init__.py +14 -0
- tracdap/rt/{_exec → _impl/exec}/actors.py +12 -14
- tracdap/rt/{_exec → _impl/exec}/context.py +228 -82
- tracdap/rt/{_exec → _impl/exec}/dev_mode.py +176 -89
- tracdap/rt/{_exec → _impl/exec}/engine.py +230 -105
- tracdap/rt/{_exec → _impl/exec}/functions.py +191 -100
- tracdap/rt/{_exec → _impl/exec}/graph.py +24 -36
- tracdap/rt/{_exec → _impl/exec}/graph_builder.py +252 -115
- tracdap/rt/_impl/grpc/codec.py +1 -1
- tracdap/rt/{_exec → _impl/grpc}/server.py +7 -6
- tracdap/rt/_impl/grpc/tracdap/api/internal/runtime_pb2.py +3 -3
- tracdap/rt/_impl/grpc/tracdap/api/internal/runtime_pb2_grpc.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/common_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/config_pb2.py +40 -0
- tracdap/rt/_impl/grpc/tracdap/metadata/config_pb2.pyi +62 -0
- tracdap/rt/_impl/grpc/tracdap/metadata/custom_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/data_pb2.py +32 -20
- tracdap/rt/_impl/grpc/tracdap/metadata/data_pb2.pyi +48 -2
- tracdap/rt/_impl/grpc/tracdap/metadata/file_pb2.py +4 -2
- tracdap/rt/_impl/grpc/tracdap/metadata/file_pb2.pyi +8 -0
- tracdap/rt/_impl/grpc/tracdap/metadata/flow_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/job_pb2.py +65 -63
- tracdap/rt/_impl/grpc/tracdap/metadata/job_pb2.pyi +16 -2
- tracdap/rt/_impl/grpc/tracdap/metadata/model_pb2.py +28 -26
- tracdap/rt/_impl/grpc/tracdap/metadata/model_pb2.pyi +14 -4
- tracdap/rt/_impl/grpc/tracdap/metadata/object_id_pb2.py +4 -4
- tracdap/rt/_impl/grpc/tracdap/metadata/object_id_pb2.pyi +6 -0
- tracdap/rt/_impl/grpc/tracdap/metadata/object_pb2.py +9 -7
- tracdap/rt/_impl/grpc/tracdap/metadata/object_pb2.pyi +12 -4
- tracdap/rt/_impl/grpc/tracdap/metadata/resource_pb2.py +18 -5
- tracdap/rt/_impl/grpc/tracdap/metadata/resource_pb2.pyi +42 -2
- tracdap/rt/_impl/grpc/tracdap/metadata/search_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/{stoarge_pb2.py → storage_pb2.py} +4 -4
- tracdap/rt/_impl/grpc/tracdap/metadata/tag_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/tag_update_pb2.py +1 -1
- tracdap/rt/_impl/grpc/tracdap/metadata/type_pb2.py +1 -1
- tracdap/rt/{_exec → _impl}/runtime.py +32 -18
- tracdap/rt/_impl/static_api.py +66 -38
- tracdap/rt/_plugins/format_csv.py +1 -1
- tracdap/rt/_plugins/repo_git.py +56 -11
- tracdap/rt/_plugins/storage_sql.py +13 -6
- tracdap/rt/_version.py +1 -1
- tracdap/rt/api/__init__.py +5 -24
- tracdap/rt/api/constants.py +57 -0
- tracdap/rt/api/experimental.py +32 -0
- tracdap/rt/api/hook.py +26 -7
- tracdap/rt/api/model_api.py +16 -0
- tracdap/rt/api/static_api.py +265 -127
- tracdap/rt/config/__init__.py +11 -11
- tracdap/rt/config/common.py +2 -26
- tracdap/rt/config/dynamic.py +28 -0
- tracdap/rt/config/platform.py +17 -31
- tracdap/rt/config/runtime.py +2 -0
- tracdap/rt/ext/embed.py +2 -2
- tracdap/rt/ext/plugins.py +3 -3
- tracdap/rt/launch/launch.py +12 -14
- tracdap/rt/metadata/__init__.py +31 -21
- tracdap/rt/metadata/config.py +95 -0
- tracdap/rt/metadata/data.py +40 -0
- tracdap/rt/metadata/file.py +10 -0
- tracdap/rt/metadata/job.py +16 -0
- tracdap/rt/metadata/model.py +12 -2
- tracdap/rt/metadata/object.py +9 -1
- tracdap/rt/metadata/object_id.py +6 -0
- tracdap/rt/metadata/resource.py +41 -1
- {tracdap_runtime-0.7.0rc1.dist-info → tracdap_runtime-0.8.0.dist-info}/METADATA +33 -27
- tracdap_runtime-0.8.0.dist-info/RECORD +129 -0
- {tracdap_runtime-0.7.0rc1.dist-info → tracdap_runtime-0.8.0.dist-info}/WHEEL +1 -1
- tracdap/rt/_exec/__init__.py +0 -0
- tracdap_runtime-0.7.0rc1.dist-info/RECORD +0 -121
- /tracdap/rt/_impl/{guard_rails.py → core/guard_rails.py} +0 -0
- /tracdap/rt/_impl/grpc/tracdap/metadata/{stoarge_pb2.pyi → storage_pb2.pyi} +0 -0
- /tracdap/rt/metadata/{stoarge.py → storage.py} +0 -0
- {tracdap_runtime-0.7.0rc1.dist-info → tracdap_runtime-0.8.0.dist-info/licenses}/LICENSE +0 -0
- {tracdap_runtime-0.7.0rc1.dist-info → tracdap_runtime-0.8.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,14 @@
|
|
1
|
+
# Licensed to the Fintech Open Source Foundation (FINOS) under one or
|
2
|
+
# more contributor license agreements. See the NOTICE file distributed
|
3
|
+
# with this work for additional information regarding copyright ownership.
|
4
|
+
# FINOS licenses this file to you under the Apache License, Version 2.0
|
5
|
+
# (the "License"); you may not use this file except in compliance with the
|
6
|
+
# License. You may obtain a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
# See the License for the specific language governing permissions and
|
14
|
+
# limitations under the License.
|
@@ -13,8 +13,6 @@
|
|
13
13
|
# See the License for the specific language governing permissions and
|
14
14
|
# limitations under the License.
|
15
15
|
|
16
|
-
from __future__ import annotations
|
17
|
-
|
18
16
|
import dataclasses as _dc
|
19
17
|
import decimal
|
20
18
|
import enum
|
@@ -24,6 +22,7 @@ import json
|
|
24
22
|
import os
|
25
23
|
import pathlib
|
26
24
|
import re
|
25
|
+
import types as ts
|
27
26
|
import typing as tp
|
28
27
|
import urllib.parse as _urlp
|
29
28
|
import uuid
|
@@ -32,18 +31,24 @@ import tracdap.rt.config as _config
|
|
32
31
|
import tracdap.rt.exceptions as _ex
|
33
32
|
import tracdap.rt.ext.plugins as _plugins
|
34
33
|
import tracdap.rt.ext.config as _config_ext
|
35
|
-
import tracdap.rt._impl.
|
34
|
+
import tracdap.rt._impl.core.logging as _logging
|
35
|
+
import tracdap.rt._impl.core.util as _util
|
36
36
|
|
37
37
|
import yaml
|
38
38
|
import yaml.parser
|
39
39
|
|
40
|
+
try:
|
41
|
+
import pydantic as _pyd # noqa
|
42
|
+
except ModuleNotFoundError:
|
43
|
+
_pyd = None
|
44
|
+
|
40
45
|
_T = tp.TypeVar('_T')
|
41
46
|
|
42
47
|
|
43
48
|
class ConfigManager:
|
44
49
|
|
45
50
|
@classmethod
|
46
|
-
def for_root_config(cls, root_config_file: tp.Union[str, pathlib.Path, None]) -> ConfigManager:
|
51
|
+
def for_root_config(cls, root_config_file: tp.Union[str, pathlib.Path, None]) -> "ConfigManager":
|
47
52
|
|
48
53
|
if isinstance(root_config_file, pathlib.Path):
|
49
54
|
root_file_path = cls._resolve_scheme(root_config_file)
|
@@ -69,7 +74,7 @@ class ConfigManager:
|
|
69
74
|
return ConfigManager(working_dir_url, None)
|
70
75
|
|
71
76
|
@classmethod
|
72
|
-
def for_root_dir(cls, root_config_dir: tp.Union[str, pathlib.Path]) -> ConfigManager:
|
77
|
+
def for_root_dir(cls, root_config_dir: tp.Union[str, pathlib.Path]) -> "ConfigManager":
|
73
78
|
|
74
79
|
if isinstance(root_config_dir, pathlib.Path):
|
75
80
|
root_dir_path = cls._resolve_scheme(root_config_dir)
|
@@ -103,7 +108,7 @@ class ConfigManager:
|
|
103
108
|
return raw_url
|
104
109
|
|
105
110
|
def __init__(self, root_dir_url: _urlp.ParseResult, root_file_url: tp.Optional[_urlp.ParseResult]):
|
106
|
-
self._log =
|
111
|
+
self._log = _logging.logger_for_object(self)
|
107
112
|
self._root_dir_url = root_dir_url
|
108
113
|
self._root_file_url = root_file_url
|
109
114
|
|
@@ -279,9 +284,17 @@ class ConfigManager:
|
|
279
284
|
|
280
285
|
class ConfigParser(tp.Generic[_T]):
|
281
286
|
|
282
|
-
#
|
283
|
-
#
|
284
|
-
|
287
|
+
# Support both new and old styles for generic, union and optional types
|
288
|
+
# Old-style annotations are still valid, even when the new style is fully supported
|
289
|
+
__generic_types: list[type] = [
|
290
|
+
ts.GenericAlias,
|
291
|
+
type(tp.List[int]),
|
292
|
+
type(tp.Optional[int])
|
293
|
+
]
|
294
|
+
|
295
|
+
# UnionType was added to the types module in Python 3.10, we support 3.9 (Jan 2025)
|
296
|
+
if hasattr(ts, "UnionType"):
|
297
|
+
__generic_types.append(ts.UnionType)
|
285
298
|
|
286
299
|
__primitive_types: tp.Dict[type, callable] = {
|
287
300
|
bool: bool,
|
@@ -294,7 +307,7 @@ class ConfigParser(tp.Generic[_T]):
|
|
294
307
|
}
|
295
308
|
|
296
309
|
def __init__(self, config_class: _T.__class__, dev_mode_locations: tp.List[str] = None):
|
297
|
-
self._log =
|
310
|
+
self._log = _logging.logger_for_object(self)
|
298
311
|
self._config_class = config_class
|
299
312
|
self._dev_mode_locations = dev_mode_locations or []
|
300
313
|
self._errors = []
|
@@ -321,6 +334,23 @@ class ConfigParser(tp.Generic[_T]):
|
|
321
334
|
|
322
335
|
def _parse_value(self, location: str, raw_value: tp.Any, annotation: type):
|
323
336
|
|
337
|
+
if self._is_dev_mode_location(location):
|
338
|
+
|
339
|
+
if type(raw_value) in ConfigParser.__primitive_types:
|
340
|
+
return self._parse_primitive(location, raw_value, type(raw_value))
|
341
|
+
|
342
|
+
if isinstance(raw_value, list):
|
343
|
+
if len(raw_value) == 0:
|
344
|
+
return []
|
345
|
+
items = iter((self._child_location(location, i), x) for i, x in enumerate(raw_value))
|
346
|
+
return list(self._parse_value(loc, x, tp.Any) for loc, x in items)
|
347
|
+
|
348
|
+
if isinstance(raw_value, dict):
|
349
|
+
if len(raw_value) == 0:
|
350
|
+
return {}
|
351
|
+
items = iter((self._child_location(location, k), k, v) for k, v in raw_value.items())
|
352
|
+
return dict((k, self._parse_value(loc, v, tp.Any)) for loc, k, v in items)
|
353
|
+
|
324
354
|
if raw_value is None:
|
325
355
|
return None
|
326
356
|
|
@@ -339,24 +369,17 @@ class ConfigParser(tp.Generic[_T]):
|
|
339
369
|
return self._parse_enum(location, raw_value, annotation)
|
340
370
|
|
341
371
|
if _dc.is_dataclass(annotation):
|
372
|
+
return self._parse_simple_class(location, raw_value, annotation)
|
342
373
|
|
343
|
-
|
344
|
-
|
374
|
+
# Basic support for Pydantic, if it is installed
|
375
|
+
if _pyd and isinstance(annotation, type) and issubclass(annotation, _pyd.BaseModel):
|
376
|
+
return self._parse_simple_class(location, raw_value, annotation)
|
345
377
|
|
346
|
-
|
347
|
-
if type(raw_value) in ConfigParser.__primitive_types:
|
348
|
-
return self._parse_primitive(location, raw_value, type(raw_value))
|
349
|
-
if isinstance(raw_value, list):
|
350
|
-
if len(raw_value) == 0:
|
351
|
-
return []
|
352
|
-
list_type = type(raw_value[0])
|
353
|
-
return list(map(lambda x: self._parse_primitive(location, x, list_type), raw_value))
|
354
|
-
|
355
|
-
return self._error(location, f"Expected type {annotation.__name__}, got '{str(raw_value)}'")
|
356
|
-
|
357
|
-
if isinstance(annotation, self.__generic_metaclass):
|
378
|
+
if any(map(lambda _t: isinstance(annotation, _t), self.__generic_types)):
|
358
379
|
return self._parse_generic_class(location, raw_value, annotation) # noqa
|
359
380
|
|
381
|
+
return self._error(location, f"Cannot parse value of type {annotation.__name__}")
|
382
|
+
|
360
383
|
def _is_dev_mode_location(self, location):
|
361
384
|
|
362
385
|
return any(map(lambda pattern: re.match(pattern, location), self._dev_mode_locations))
|
@@ -416,14 +439,14 @@ class ConfigParser(tp.Generic[_T]):
|
|
416
439
|
def _parse_simple_class(self, location: str, raw_dict: tp.Any, metaclass: type) -> object:
|
417
440
|
|
418
441
|
if raw_dict is not None and not isinstance(raw_dict, dict):
|
419
|
-
|
442
|
+
return self._error(location, f"Expected type {metaclass.__name__}, got '{str(raw_dict)}'")
|
420
443
|
|
421
444
|
obj = metaclass.__new__(metaclass, object()) # noqa
|
422
445
|
|
423
446
|
init_signature = inspect.signature(metaclass.__init__)
|
424
447
|
init_types = tp.get_type_hints(metaclass.__init__)
|
425
448
|
init_params = iter(init_signature.parameters.items())
|
426
|
-
init_values: tp.
|
449
|
+
init_values: tp.Dict[str, tp.Any] = dict()
|
427
450
|
|
428
451
|
# Do not process 'self'
|
429
452
|
next(init_params)
|
@@ -437,20 +460,20 @@ class ConfigParser(tp.Generic[_T]):
|
|
437
460
|
message = f"Class {metaclass.__name__} does not support config decoding: " + \
|
438
461
|
f"Missing type information for init parameter '{param_name}'"
|
439
462
|
self._error(location, message)
|
440
|
-
init_values
|
463
|
+
init_values[param_name] = None
|
441
464
|
|
442
465
|
elif param_name in raw_dict and raw_dict[param_name] is not None:
|
443
466
|
param_value = self._parse_value(param_location, raw_dict[param_name], param_type)
|
444
|
-
init_values
|
467
|
+
init_values[param_name] = param_value
|
445
468
|
|
446
469
|
elif param.default != inspect._empty: # noqa
|
447
|
-
init_values
|
470
|
+
init_values[param_name] = param.default
|
448
471
|
|
449
472
|
else:
|
450
473
|
self._error(location, f"Missing required value '{param_name}'")
|
451
|
-
init_values
|
474
|
+
init_values[param_name] = None
|
452
475
|
|
453
|
-
binding = init_signature.bind(obj,
|
476
|
+
binding = init_signature.bind(obj, **init_values)
|
454
477
|
metaclass.__init__(*binding.args, **binding.kwargs)
|
455
478
|
|
456
479
|
# Now go back over the members and look for any that weren't declared in __init__
|
@@ -471,7 +494,7 @@ class ConfigParser(tp.Generic[_T]):
|
|
471
494
|
self._error(location, message)
|
472
495
|
|
473
496
|
# Generic members must be declared in __init__ since that is the only way to get the full annotation
|
474
|
-
if isinstance(type(default_value), self.
|
497
|
+
if any(map(lambda _t: isinstance(type(default_value), _t), self.__generic_types)):
|
475
498
|
message = f"Class {metaclass.__name__} does not support config decoding: " + \
|
476
499
|
f"Members with no default value must be declared in __init__: '{member_name}'"
|
477
500
|
self._error(location, message)
|
@@ -497,7 +520,7 @@ class ConfigParser(tp.Generic[_T]):
|
|
497
520
|
|
498
521
|
return obj
|
499
522
|
|
500
|
-
def _parse_generic_class(self, location: str, raw_value: tp.Any,
|
523
|
+
def _parse_generic_class(self, location: str, raw_value: tp.Any, metaclass: type):
|
501
524
|
|
502
525
|
origin = _util.get_origin(metaclass)
|
503
526
|
args = _util.get_args(metaclass)
|
@@ -510,7 +533,7 @@ class ConfigParser(tp.Generic[_T]):
|
|
510
533
|
return self._error(location, f"Expected a list, got {type(raw_value)}")
|
511
534
|
|
512
535
|
return [
|
513
|
-
self._parse_value(self._child_location(location,
|
536
|
+
self._parse_value(self._child_location(location, idx), item, list_type)
|
514
537
|
for (idx, item) in enumerate(raw_value)]
|
515
538
|
|
516
539
|
if origin == tp.Dict or origin == dict:
|
@@ -541,12 +564,14 @@ class ConfigParser(tp.Generic[_T]):
|
|
541
564
|
return None
|
542
565
|
|
543
566
|
@staticmethod
|
544
|
-
def _child_location(parent_location: str, item: str):
|
567
|
+
def _child_location(parent_location: str, item: tp.Union[str, int]):
|
545
568
|
|
546
569
|
if parent_location is None or parent_location == "":
|
547
570
|
return item
|
571
|
+
elif isinstance(item, int):
|
572
|
+
return f"{parent_location}[{item}]"
|
548
573
|
else:
|
549
|
-
return parent_location
|
574
|
+
return f"{parent_location}.{item}"
|
550
575
|
|
551
576
|
|
552
577
|
class ConfigQuoter:
|
@@ -14,6 +14,7 @@
|
|
14
14
|
# limitations under the License.
|
15
15
|
|
16
16
|
import abc
|
17
|
+
import copy
|
17
18
|
import dataclasses as dc
|
18
19
|
import typing as tp
|
19
20
|
import datetime as dt
|
@@ -36,17 +37,62 @@ except ModuleNotFoundError:
|
|
36
37
|
import tracdap.rt.api.experimental as _api
|
37
38
|
import tracdap.rt.metadata as _meta
|
38
39
|
import tracdap.rt.exceptions as _ex
|
39
|
-
import tracdap.rt._impl.
|
40
|
+
import tracdap.rt._impl.core.logging as _log
|
40
41
|
|
41
42
|
|
42
43
|
@dc.dataclass(frozen=True)
|
43
44
|
class DataSpec:
|
44
45
|
|
46
|
+
object_type: _meta.ObjectType
|
47
|
+
schema_type: _meta.SchemaType
|
45
48
|
data_item: str
|
49
|
+
|
46
50
|
data_def: _meta.DataDefinition
|
51
|
+
file_def: _meta.FileDefinition
|
47
52
|
storage_def: _meta.StorageDefinition
|
48
53
|
schema_def: tp.Optional[_meta.SchemaDefinition]
|
49
54
|
|
55
|
+
@staticmethod
|
56
|
+
def create_data_spec(
|
57
|
+
data_item: str,
|
58
|
+
data_def: _meta.DataDefinition,
|
59
|
+
storage_def: _meta.StorageDefinition,
|
60
|
+
schema_def: tp.Optional[_meta.SchemaDefinition] = None) -> "DataSpec":
|
61
|
+
|
62
|
+
if schema_def:
|
63
|
+
schema_type = schema_def.schemaType
|
64
|
+
elif data_def.schema:
|
65
|
+
schema_type = data_def.schema.schemaType
|
66
|
+
else:
|
67
|
+
schema_type = _meta.SchemaType.SCHEMA_TYPE_NOT_SET
|
68
|
+
|
69
|
+
return DataSpec(
|
70
|
+
_meta.ObjectType.DATA, schema_type, data_item,
|
71
|
+
data_def,
|
72
|
+
storage_def=storage_def,
|
73
|
+
schema_def=schema_def,
|
74
|
+
file_def=None)
|
75
|
+
|
76
|
+
@staticmethod
|
77
|
+
def create_file_spec(
|
78
|
+
data_item: str,
|
79
|
+
file_def: _meta.FileDefinition,
|
80
|
+
storage_def: _meta.StorageDefinition) -> "DataSpec":
|
81
|
+
|
82
|
+
return DataSpec(
|
83
|
+
_meta.ObjectType.FILE, _meta.SchemaType.SCHEMA_TYPE_NOT_SET, data_item,
|
84
|
+
file_def=file_def,
|
85
|
+
storage_def=storage_def,
|
86
|
+
data_def=None,
|
87
|
+
schema_def=None)
|
88
|
+
|
89
|
+
@staticmethod
|
90
|
+
def create_empty_spec(object_type: _meta.ObjectType, schema_type: _meta.SchemaType):
|
91
|
+
return DataSpec(object_type, schema_type, None, None, None, None, None)
|
92
|
+
|
93
|
+
def is_empty(self):
|
94
|
+
return self.data_item is None or len(self.data_item) == 0
|
95
|
+
|
50
96
|
|
51
97
|
@dc.dataclass(frozen=True)
|
52
98
|
class DataPartKey:
|
@@ -61,44 +107,104 @@ class DataPartKey:
|
|
61
107
|
@dc.dataclass(frozen=True)
|
62
108
|
class DataItem:
|
63
109
|
|
64
|
-
|
65
|
-
|
66
|
-
|
110
|
+
object_type: _meta.ObjectType
|
111
|
+
schema_type: _meta.SchemaType
|
112
|
+
|
113
|
+
content: tp.Any = None
|
114
|
+
content_type: tp.Type = None
|
115
|
+
content_func: tp.Callable[[], tp.Any] = None
|
67
116
|
|
68
|
-
|
69
|
-
|
117
|
+
trac_schema: _meta.SchemaDefinition = None
|
118
|
+
native_schema: tp.Any = None
|
119
|
+
|
120
|
+
# TODO: Remove legacy API and use content / native_schema instead
|
121
|
+
schema: pa.Schema = None
|
122
|
+
table: tp.Optional[pa.Table] = None
|
70
123
|
|
71
124
|
def is_empty(self) -> bool:
|
72
|
-
return self.
|
125
|
+
return self.content is None
|
126
|
+
|
127
|
+
@staticmethod
|
128
|
+
def create_empty(
|
129
|
+
object_type: _meta.ObjectType = _meta.ObjectType.DATA,
|
130
|
+
schema_type: _meta.SchemaType = _meta.SchemaType.TABLE) -> "DataItem":
|
131
|
+
|
132
|
+
if object_type == _meta.ObjectType.DATA and schema_type == _meta.SchemaType.TABLE:
|
133
|
+
return DataItem(_meta.ObjectType.DATA, _meta.SchemaType.TABLE, schema=pa.schema([]))
|
134
|
+
else:
|
135
|
+
return DataItem(object_type, schema_type)
|
73
136
|
|
74
137
|
@staticmethod
|
75
|
-
def
|
76
|
-
|
138
|
+
def for_table(table: pa.Table, schema: pa.Schema, trac_schema: _meta.SchemaDefinition) -> "DataItem":
|
139
|
+
|
140
|
+
return DataItem(
|
141
|
+
_meta.ObjectType.DATA, _meta.SchemaType.TABLE,
|
142
|
+
content=table, content_type=pa.Table,
|
143
|
+
trac_schema=trac_schema, native_schema=schema,
|
144
|
+
table=table, schema=schema)
|
145
|
+
|
146
|
+
@staticmethod
|
147
|
+
def for_struct(content: tp.Any):
|
148
|
+
|
149
|
+
return DataItem(
|
150
|
+
_meta.ObjectType.DATA, _meta.SchemaType.STRUCT,
|
151
|
+
content=content, content_type=type(content))
|
152
|
+
|
153
|
+
@staticmethod
|
154
|
+
def for_file_content(content: bytes):
|
155
|
+
|
156
|
+
return DataItem(
|
157
|
+
_meta.ObjectType.FILE, _meta.SchemaType.SCHEMA_TYPE_NOT_SET,
|
158
|
+
content=content, content_type=bytes)
|
77
159
|
|
78
160
|
|
79
161
|
@dc.dataclass(frozen=True)
|
80
162
|
class DataView:
|
81
163
|
|
82
|
-
|
83
|
-
arrow_schema: pa.Schema
|
164
|
+
object_type: _meta.ObjectType
|
84
165
|
|
85
|
-
|
166
|
+
trac_schema: _meta.SchemaDefinition = None
|
167
|
+
arrow_schema: pa.Schema = None
|
168
|
+
|
169
|
+
parts: tp.Dict[DataPartKey, tp.List[DataItem]] = None
|
170
|
+
file_item: tp.Optional[DataItem] = None
|
86
171
|
|
87
172
|
@staticmethod
|
88
|
-
def create_empty() -> "DataView":
|
89
|
-
|
173
|
+
def create_empty(object_type: _meta.ObjectType = _meta.ObjectType.DATA) -> "DataView":
|
174
|
+
if object_type == _meta.ObjectType.DATA:
|
175
|
+
return DataView(object_type, _meta.SchemaDefinition(), pa.schema([]), dict())
|
176
|
+
else:
|
177
|
+
return DataView(object_type)
|
90
178
|
|
91
179
|
@staticmethod
|
92
180
|
def for_trac_schema(trac_schema: _meta.SchemaDefinition):
|
93
|
-
|
94
|
-
|
181
|
+
if trac_schema.schemaType == _meta.SchemaType.TABLE:
|
182
|
+
arrow_schema = DataMapping.trac_to_arrow_schema(trac_schema)
|
183
|
+
return DataView(_meta.ObjectType.DATA, trac_schema, arrow_schema, dict())
|
184
|
+
else:
|
185
|
+
return DataView(_meta.ObjectType.DATA, trac_schema, parts = dict())
|
186
|
+
|
187
|
+
@staticmethod
|
188
|
+
def for_file_item(file_item: DataItem):
|
189
|
+
return DataView(file_item.object_type, file_item=file_item)
|
95
190
|
|
96
191
|
def with_trac_schema(self, trac_schema: _meta.SchemaDefinition):
|
97
192
|
arrow_schema = DataMapping.trac_to_arrow_schema(trac_schema)
|
98
|
-
return DataView(trac_schema, arrow_schema, self.parts)
|
193
|
+
return DataView(_meta.ObjectType.DATA, trac_schema, arrow_schema, self.parts)
|
194
|
+
|
195
|
+
def with_part(self, part_key: DataPartKey, part: DataItem):
|
196
|
+
new_parts = copy.copy(self.parts)
|
197
|
+
new_parts[part_key] = [part]
|
198
|
+
return DataView(self.object_type, self.trac_schema, self.arrow_schema, new_parts)
|
199
|
+
|
200
|
+
def with_file_item(self, file_item: DataItem):
|
201
|
+
return DataView(self.object_type, file_item=file_item)
|
99
202
|
|
100
203
|
def is_empty(self) -> bool:
|
101
|
-
|
204
|
+
if self.object_type == _meta.ObjectType.FILE:
|
205
|
+
return self.file_item is None
|
206
|
+
else:
|
207
|
+
return self.parts is None or not any(self.parts.values())
|
102
208
|
|
103
209
|
|
104
210
|
class _DataInternal:
|
@@ -115,7 +221,7 @@ class DataMapping:
|
|
115
221
|
:py:class:`TypeMapping <tracdap.rt.impl.type_system.MetadataCodec>`.
|
116
222
|
"""
|
117
223
|
|
118
|
-
__log =
|
224
|
+
__log = _log.logger_for_namespace(_DataInternal.__module__ + ".DataMapping")
|
119
225
|
|
120
226
|
# Matches TRAC_ARROW_TYPE_MAPPING in ArrowSchema, tracdap-lib-data
|
121
227
|
|
@@ -293,7 +399,7 @@ class DataMapping:
|
|
293
399
|
deltas = [*prior_deltas, item]
|
294
400
|
parts = {**view.parts, part: deltas}
|
295
401
|
|
296
|
-
return DataView(view.trac_schema, view.arrow_schema, parts)
|
402
|
+
return DataView(view.object_type, view.trac_schema, view.arrow_schema, parts=parts)
|
297
403
|
|
298
404
|
@classmethod
|
299
405
|
def view_to_arrow(cls, view: DataView, part: DataPartKey) -> pa.Table:
|
@@ -308,29 +414,27 @@ class DataMapping:
|
|
308
414
|
if not deltas:
|
309
415
|
raise _ex.ETracInternal(f"Data view for part [{part.opaque_key}] does not contain any items")
|
310
416
|
|
417
|
+
# For a single delta, use the existing Arrow content
|
311
418
|
if len(deltas) == 1:
|
312
419
|
return cls.item_to_arrow(deltas[0])
|
313
420
|
|
314
|
-
|
421
|
+
# For multiple deltas, construct a new table by assembling the record batches
|
422
|
+
# Atm no consideration is given to overwriting records based on business key
|
423
|
+
batches = iter(
|
315
424
|
batch
|
316
425
|
for delta in deltas
|
317
|
-
for batch in (
|
318
|
-
delta.batches
|
319
|
-
if delta.batches
|
320
|
-
else delta.table.to_batches())}
|
426
|
+
for batch in cls.item_to_arrow(delta).to_batches())
|
321
427
|
|
322
428
|
return pa.Table.from_batches(batches) # noqa
|
323
429
|
|
324
430
|
@classmethod
|
325
431
|
def item_to_arrow(cls, item: DataItem) -> pa.Table:
|
326
432
|
|
327
|
-
if item.
|
328
|
-
|
329
|
-
|
330
|
-
if item.batches is not None:
|
331
|
-
return pa.Table.from_batches(item.batches, item.schema) # noqa
|
433
|
+
if item.content_type != pa.Table:
|
434
|
+
detail = f"expected Arrow table, got [{item.content_type}]"
|
435
|
+
raise _ex.ETracInternal(f"Data item does not contain tabular data ({detail})")
|
332
436
|
|
333
|
-
|
437
|
+
return item.content
|
334
438
|
|
335
439
|
@classmethod
|
336
440
|
def arrow_to_pandas(
|
@@ -642,7 +746,7 @@ class DataConformance:
|
|
642
746
|
Check and/or apply conformance between datasets and schemas.
|
643
747
|
"""
|
644
748
|
|
645
|
-
__log =
|
749
|
+
__log = _log.logger_for_namespace(_DataInternal.__module__ + ".DataConformance")
|
646
750
|
|
647
751
|
__E_FIELD_MISSING = \
|
648
752
|
"Field [{field_name}] is missing from the data"
|