tracdap-runtime 0.7.1__py3-none-any.whl → 0.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. tracdap/rt/_impl/core/__init__.py +14 -0
  2. tracdap/rt/_impl/{config_parser.py → core/config_parser.py} +36 -19
  3. tracdap/rt/_impl/{data.py → core/data.py} +136 -32
  4. tracdap/rt/_impl/core/logging.py +195 -0
  5. tracdap/rt/_impl/{models.py → core/models.py} +15 -12
  6. tracdap/rt/_impl/{repos.py → core/repos.py} +12 -3
  7. tracdap/rt/_impl/{schemas.py → core/schemas.py} +5 -5
  8. tracdap/rt/_impl/{shim.py → core/shim.py} +5 -4
  9. tracdap/rt/_impl/{storage.py → core/storage.py} +21 -10
  10. tracdap/rt/_impl/core/struct.py +547 -0
  11. tracdap/rt/_impl/{util.py → core/util.py} +1 -111
  12. tracdap/rt/_impl/{validation.py → core/validation.py} +99 -31
  13. tracdap/rt/_impl/exec/__init__.py +14 -0
  14. tracdap/rt/{_exec → _impl/exec}/actors.py +12 -14
  15. tracdap/rt/{_exec → _impl/exec}/context.py +228 -82
  16. tracdap/rt/{_exec → _impl/exec}/dev_mode.py +163 -81
  17. tracdap/rt/{_exec → _impl/exec}/engine.py +230 -105
  18. tracdap/rt/{_exec → _impl/exec}/functions.py +191 -100
  19. tracdap/rt/{_exec → _impl/exec}/graph.py +24 -36
  20. tracdap/rt/{_exec → _impl/exec}/graph_builder.py +252 -115
  21. tracdap/rt/_impl/grpc/codec.py +1 -1
  22. tracdap/rt/{_exec → _impl/grpc}/server.py +7 -6
  23. tracdap/rt/_impl/grpc/tracdap/api/internal/runtime_pb2.py +3 -3
  24. tracdap/rt/_impl/grpc/tracdap/api/internal/runtime_pb2_grpc.py +1 -1
  25. tracdap/rt/_impl/grpc/tracdap/metadata/common_pb2.py +1 -1
  26. tracdap/rt/_impl/grpc/tracdap/metadata/config_pb2.py +40 -0
  27. tracdap/rt/_impl/grpc/tracdap/metadata/config_pb2.pyi +62 -0
  28. tracdap/rt/_impl/grpc/tracdap/metadata/custom_pb2.py +1 -1
  29. tracdap/rt/_impl/grpc/tracdap/metadata/data_pb2.py +32 -20
  30. tracdap/rt/_impl/grpc/tracdap/metadata/data_pb2.pyi +48 -2
  31. tracdap/rt/_impl/grpc/tracdap/metadata/file_pb2.py +4 -2
  32. tracdap/rt/_impl/grpc/tracdap/metadata/file_pb2.pyi +8 -0
  33. tracdap/rt/_impl/grpc/tracdap/metadata/flow_pb2.py +1 -1
  34. tracdap/rt/_impl/grpc/tracdap/metadata/job_pb2.py +65 -63
  35. tracdap/rt/_impl/grpc/tracdap/metadata/job_pb2.pyi +16 -2
  36. tracdap/rt/_impl/grpc/tracdap/metadata/model_pb2.py +28 -26
  37. tracdap/rt/_impl/grpc/tracdap/metadata/model_pb2.pyi +14 -4
  38. tracdap/rt/_impl/grpc/tracdap/metadata/object_id_pb2.py +4 -4
  39. tracdap/rt/_impl/grpc/tracdap/metadata/object_id_pb2.pyi +6 -0
  40. tracdap/rt/_impl/grpc/tracdap/metadata/object_pb2.py +9 -7
  41. tracdap/rt/_impl/grpc/tracdap/metadata/object_pb2.pyi +12 -4
  42. tracdap/rt/_impl/grpc/tracdap/metadata/resource_pb2.py +18 -5
  43. tracdap/rt/_impl/grpc/tracdap/metadata/resource_pb2.pyi +42 -2
  44. tracdap/rt/_impl/grpc/tracdap/metadata/search_pb2.py +1 -1
  45. tracdap/rt/_impl/grpc/tracdap/metadata/{stoarge_pb2.py → storage_pb2.py} +4 -4
  46. tracdap/rt/_impl/grpc/tracdap/metadata/tag_pb2.py +1 -1
  47. tracdap/rt/_impl/grpc/tracdap/metadata/tag_update_pb2.py +1 -1
  48. tracdap/rt/_impl/grpc/tracdap/metadata/type_pb2.py +1 -1
  49. tracdap/rt/{_exec → _impl}/runtime.py +32 -18
  50. tracdap/rt/_impl/static_api.py +65 -37
  51. tracdap/rt/_plugins/format_csv.py +1 -1
  52. tracdap/rt/_plugins/repo_git.py +56 -11
  53. tracdap/rt/_plugins/storage_sql.py +1 -1
  54. tracdap/rt/_version.py +1 -1
  55. tracdap/rt/api/__init__.py +5 -24
  56. tracdap/rt/api/constants.py +57 -0
  57. tracdap/rt/api/experimental.py +32 -0
  58. tracdap/rt/api/hook.py +26 -7
  59. tracdap/rt/api/model_api.py +16 -0
  60. tracdap/rt/api/static_api.py +265 -127
  61. tracdap/rt/config/__init__.py +11 -11
  62. tracdap/rt/config/common.py +2 -26
  63. tracdap/rt/config/dynamic.py +28 -0
  64. tracdap/rt/config/platform.py +17 -31
  65. tracdap/rt/config/runtime.py +2 -0
  66. tracdap/rt/ext/embed.py +2 -2
  67. tracdap/rt/ext/plugins.py +3 -3
  68. tracdap/rt/launch/launch.py +12 -14
  69. tracdap/rt/metadata/__init__.py +28 -18
  70. tracdap/rt/metadata/config.py +95 -0
  71. tracdap/rt/metadata/data.py +40 -0
  72. tracdap/rt/metadata/file.py +10 -0
  73. tracdap/rt/metadata/job.py +16 -0
  74. tracdap/rt/metadata/model.py +12 -2
  75. tracdap/rt/metadata/object.py +9 -1
  76. tracdap/rt/metadata/object_id.py +6 -0
  77. tracdap/rt/metadata/resource.py +41 -1
  78. {tracdap_runtime-0.7.1.dist-info → tracdap_runtime-0.8.0.dist-info}/METADATA +23 -17
  79. tracdap_runtime-0.8.0.dist-info/RECORD +129 -0
  80. {tracdap_runtime-0.7.1.dist-info → tracdap_runtime-0.8.0.dist-info}/WHEEL +1 -1
  81. tracdap/rt/_exec/__init__.py +0 -0
  82. tracdap_runtime-0.7.1.dist-info/RECORD +0 -121
  83. /tracdap/rt/_impl/{guard_rails.py → core/guard_rails.py} +0 -0
  84. /tracdap/rt/_impl/{type_system.py → core/type_system.py} +0 -0
  85. /tracdap/rt/_impl/grpc/tracdap/metadata/{stoarge_pb2.pyi → storage_pb2.pyi} +0 -0
  86. /tracdap/rt/metadata/{stoarge.py → storage.py} +0 -0
  87. {tracdap_runtime-0.7.1.dist-info → tracdap_runtime-0.8.0.dist-info/licenses}/LICENSE +0 -0
  88. {tracdap_runtime-0.7.1.dist-info → tracdap_runtime-0.8.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,14 @@
1
+ # Licensed to the Fintech Open Source Foundation (FINOS) under one or
2
+ # more contributor license agreements. See the NOTICE file distributed
3
+ # with this work for additional information regarding copyright ownership.
4
+ # FINOS licenses this file to you under the Apache License, Version 2.0
5
+ # (the "License"); you may not use this file except in compliance with the
6
+ # License. You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
@@ -13,8 +13,6 @@
13
13
  # See the License for the specific language governing permissions and
14
14
  # limitations under the License.
15
15
 
16
- from __future__ import annotations
17
-
18
16
  import dataclasses as _dc
19
17
  import decimal
20
18
  import enum
@@ -24,6 +22,7 @@ import json
24
22
  import os
25
23
  import pathlib
26
24
  import re
25
+ import types as ts
27
26
  import typing as tp
28
27
  import urllib.parse as _urlp
29
28
  import uuid
@@ -32,18 +31,24 @@ import tracdap.rt.config as _config
32
31
  import tracdap.rt.exceptions as _ex
33
32
  import tracdap.rt.ext.plugins as _plugins
34
33
  import tracdap.rt.ext.config as _config_ext
35
- import tracdap.rt._impl.util as _util
34
+ import tracdap.rt._impl.core.logging as _logging
35
+ import tracdap.rt._impl.core.util as _util
36
36
 
37
37
  import yaml
38
38
  import yaml.parser
39
39
 
40
+ try:
41
+ import pydantic as _pyd # noqa
42
+ except ModuleNotFoundError:
43
+ _pyd = None
44
+
40
45
  _T = tp.TypeVar('_T')
41
46
 
42
47
 
43
48
  class ConfigManager:
44
49
 
45
50
  @classmethod
46
- def for_root_config(cls, root_config_file: tp.Union[str, pathlib.Path, None]) -> ConfigManager:
51
+ def for_root_config(cls, root_config_file: tp.Union[str, pathlib.Path, None]) -> "ConfigManager":
47
52
 
48
53
  if isinstance(root_config_file, pathlib.Path):
49
54
  root_file_path = cls._resolve_scheme(root_config_file)
@@ -69,7 +74,7 @@ class ConfigManager:
69
74
  return ConfigManager(working_dir_url, None)
70
75
 
71
76
  @classmethod
72
- def for_root_dir(cls, root_config_dir: tp.Union[str, pathlib.Path]) -> ConfigManager:
77
+ def for_root_dir(cls, root_config_dir: tp.Union[str, pathlib.Path]) -> "ConfigManager":
73
78
 
74
79
  if isinstance(root_config_dir, pathlib.Path):
75
80
  root_dir_path = cls._resolve_scheme(root_config_dir)
@@ -103,7 +108,7 @@ class ConfigManager:
103
108
  return raw_url
104
109
 
105
110
  def __init__(self, root_dir_url: _urlp.ParseResult, root_file_url: tp.Optional[_urlp.ParseResult]):
106
- self._log = _util.logger_for_object(self)
111
+ self._log = _logging.logger_for_object(self)
107
112
  self._root_dir_url = root_dir_url
108
113
  self._root_file_url = root_file_url
109
114
 
@@ -279,9 +284,17 @@ class ConfigManager:
279
284
 
280
285
  class ConfigParser(tp.Generic[_T]):
281
286
 
282
- # The metaclass for generic types varies between versions of the typing library
283
- # To work around this, detect the correct metaclass by inspecting a generic type variable
284
- __generic_metaclass = type(tp.List[object])
287
+ # Support both new and old styles for generic, union and optional types
288
+ # Old-style annotations are still valid, even when the new style is fully supported
289
+ __generic_types: list[type] = [
290
+ ts.GenericAlias,
291
+ type(tp.List[int]),
292
+ type(tp.Optional[int])
293
+ ]
294
+
295
+ # UnionType was added to the types module in Python 3.10, we support 3.9 (Jan 2025)
296
+ if hasattr(ts, "UnionType"):
297
+ __generic_types.append(ts.UnionType)
285
298
 
286
299
  __primitive_types: tp.Dict[type, callable] = {
287
300
  bool: bool,
@@ -294,7 +307,7 @@ class ConfigParser(tp.Generic[_T]):
294
307
  }
295
308
 
296
309
  def __init__(self, config_class: _T.__class__, dev_mode_locations: tp.List[str] = None):
297
- self._log = _util.logger_for_object(self)
310
+ self._log = _logging.logger_for_object(self)
298
311
  self._config_class = config_class
299
312
  self._dev_mode_locations = dev_mode_locations or []
300
313
  self._errors = []
@@ -358,7 +371,11 @@ class ConfigParser(tp.Generic[_T]):
358
371
  if _dc.is_dataclass(annotation):
359
372
  return self._parse_simple_class(location, raw_value, annotation)
360
373
 
361
- if isinstance(annotation, self.__generic_metaclass):
374
+ # Basic support for Pydantic, if it is installed
375
+ if _pyd and isinstance(annotation, type) and issubclass(annotation, _pyd.BaseModel):
376
+ return self._parse_simple_class(location, raw_value, annotation)
377
+
378
+ if any(map(lambda _t: isinstance(annotation, _t), self.__generic_types)):
362
379
  return self._parse_generic_class(location, raw_value, annotation) # noqa
363
380
 
364
381
  return self._error(location, f"Cannot parse value of type {annotation.__name__}")
@@ -429,7 +446,7 @@ class ConfigParser(tp.Generic[_T]):
429
446
  init_signature = inspect.signature(metaclass.__init__)
430
447
  init_types = tp.get_type_hints(metaclass.__init__)
431
448
  init_params = iter(init_signature.parameters.items())
432
- init_values: tp.List[tp.Any] = list()
449
+ init_values: tp.Dict[str, tp.Any] = dict()
433
450
 
434
451
  # Do not process 'self'
435
452
  next(init_params)
@@ -443,20 +460,20 @@ class ConfigParser(tp.Generic[_T]):
443
460
  message = f"Class {metaclass.__name__} does not support config decoding: " + \
444
461
  f"Missing type information for init parameter '{param_name}'"
445
462
  self._error(location, message)
446
- init_values.append(None)
463
+ init_values[param_name] = None
447
464
 
448
465
  elif param_name in raw_dict and raw_dict[param_name] is not None:
449
466
  param_value = self._parse_value(param_location, raw_dict[param_name], param_type)
450
- init_values.append(param_value)
467
+ init_values[param_name] = param_value
451
468
 
452
469
  elif param.default != inspect._empty: # noqa
453
- init_values.append(param.default)
470
+ init_values[param_name] = param.default
454
471
 
455
472
  else:
456
473
  self._error(location, f"Missing required value '{param_name}'")
457
- init_values.append(None)
474
+ init_values[param_name] = None
458
475
 
459
- binding = init_signature.bind(obj, *init_values)
476
+ binding = init_signature.bind(obj, **init_values)
460
477
  metaclass.__init__(*binding.args, **binding.kwargs)
461
478
 
462
479
  # Now go back over the members and look for any that weren't declared in __init__
@@ -477,7 +494,7 @@ class ConfigParser(tp.Generic[_T]):
477
494
  self._error(location, message)
478
495
 
479
496
  # Generic members must be declared in __init__ since that is the only way to get the full annotation
480
- if isinstance(type(default_value), self.__generic_metaclass):
497
+ if any(map(lambda _t: isinstance(type(default_value), _t), self.__generic_types)):
481
498
  message = f"Class {metaclass.__name__} does not support config decoding: " + \
482
499
  f"Members with no default value must be declared in __init__: '{member_name}'"
483
500
  self._error(location, message)
@@ -503,7 +520,7 @@ class ConfigParser(tp.Generic[_T]):
503
520
 
504
521
  return obj
505
522
 
506
- def _parse_generic_class(self, location: str, raw_value: tp.Any, metaclass: __generic_metaclass):
523
+ def _parse_generic_class(self, location: str, raw_value: tp.Any, metaclass: type):
507
524
 
508
525
  origin = _util.get_origin(metaclass)
509
526
  args = _util.get_args(metaclass)
@@ -14,6 +14,7 @@
14
14
  # limitations under the License.
15
15
 
16
16
  import abc
17
+ import copy
17
18
  import dataclasses as dc
18
19
  import typing as tp
19
20
  import datetime as dt
@@ -36,17 +37,62 @@ except ModuleNotFoundError:
36
37
  import tracdap.rt.api.experimental as _api
37
38
  import tracdap.rt.metadata as _meta
38
39
  import tracdap.rt.exceptions as _ex
39
- import tracdap.rt._impl.util as _util
40
+ import tracdap.rt._impl.core.logging as _log
40
41
 
41
42
 
42
43
  @dc.dataclass(frozen=True)
43
44
  class DataSpec:
44
45
 
46
+ object_type: _meta.ObjectType
47
+ schema_type: _meta.SchemaType
45
48
  data_item: str
49
+
46
50
  data_def: _meta.DataDefinition
51
+ file_def: _meta.FileDefinition
47
52
  storage_def: _meta.StorageDefinition
48
53
  schema_def: tp.Optional[_meta.SchemaDefinition]
49
54
 
55
+ @staticmethod
56
+ def create_data_spec(
57
+ data_item: str,
58
+ data_def: _meta.DataDefinition,
59
+ storage_def: _meta.StorageDefinition,
60
+ schema_def: tp.Optional[_meta.SchemaDefinition] = None) -> "DataSpec":
61
+
62
+ if schema_def:
63
+ schema_type = schema_def.schemaType
64
+ elif data_def.schema:
65
+ schema_type = data_def.schema.schemaType
66
+ else:
67
+ schema_type = _meta.SchemaType.SCHEMA_TYPE_NOT_SET
68
+
69
+ return DataSpec(
70
+ _meta.ObjectType.DATA, schema_type, data_item,
71
+ data_def,
72
+ storage_def=storage_def,
73
+ schema_def=schema_def,
74
+ file_def=None)
75
+
76
+ @staticmethod
77
+ def create_file_spec(
78
+ data_item: str,
79
+ file_def: _meta.FileDefinition,
80
+ storage_def: _meta.StorageDefinition) -> "DataSpec":
81
+
82
+ return DataSpec(
83
+ _meta.ObjectType.FILE, _meta.SchemaType.SCHEMA_TYPE_NOT_SET, data_item,
84
+ file_def=file_def,
85
+ storage_def=storage_def,
86
+ data_def=None,
87
+ schema_def=None)
88
+
89
+ @staticmethod
90
+ def create_empty_spec(object_type: _meta.ObjectType, schema_type: _meta.SchemaType):
91
+ return DataSpec(object_type, schema_type, None, None, None, None, None)
92
+
93
+ def is_empty(self):
94
+ return self.data_item is None or len(self.data_item) == 0
95
+
50
96
 
51
97
  @dc.dataclass(frozen=True)
52
98
  class DataPartKey:
@@ -61,44 +107,104 @@ class DataPartKey:
61
107
  @dc.dataclass(frozen=True)
62
108
  class DataItem:
63
109
 
64
- schema: pa.Schema
65
- table: tp.Optional[pa.Table] = None
66
- batches: tp.Optional[tp.List[pa.RecordBatch]] = None
110
+ object_type: _meta.ObjectType
111
+ schema_type: _meta.SchemaType
112
+
113
+ content: tp.Any = None
114
+ content_type: tp.Type = None
115
+ content_func: tp.Callable[[], tp.Any] = None
67
116
 
68
- pandas: "tp.Optional[pandas.DataFrame]" = None
69
- pyspark: tp.Any = None
117
+ trac_schema: _meta.SchemaDefinition = None
118
+ native_schema: tp.Any = None
119
+
120
+ # TODO: Remove legacy API and use content / native_schema instead
121
+ schema: pa.Schema = None
122
+ table: tp.Optional[pa.Table] = None
70
123
 
71
124
  def is_empty(self) -> bool:
72
- return self.table is None and (self.batches is None or len(self.batches) == 0)
125
+ return self.content is None
126
+
127
+ @staticmethod
128
+ def create_empty(
129
+ object_type: _meta.ObjectType = _meta.ObjectType.DATA,
130
+ schema_type: _meta.SchemaType = _meta.SchemaType.TABLE) -> "DataItem":
131
+
132
+ if object_type == _meta.ObjectType.DATA and schema_type == _meta.SchemaType.TABLE:
133
+ return DataItem(_meta.ObjectType.DATA, _meta.SchemaType.TABLE, schema=pa.schema([]))
134
+ else:
135
+ return DataItem(object_type, schema_type)
73
136
 
74
137
  @staticmethod
75
- def create_empty() -> "DataItem":
76
- return DataItem(pa.schema([]))
138
+ def for_table(table: pa.Table, schema: pa.Schema, trac_schema: _meta.SchemaDefinition) -> "DataItem":
139
+
140
+ return DataItem(
141
+ _meta.ObjectType.DATA, _meta.SchemaType.TABLE,
142
+ content=table, content_type=pa.Table,
143
+ trac_schema=trac_schema, native_schema=schema,
144
+ table=table, schema=schema)
145
+
146
+ @staticmethod
147
+ def for_struct(content: tp.Any):
148
+
149
+ return DataItem(
150
+ _meta.ObjectType.DATA, _meta.SchemaType.STRUCT,
151
+ content=content, content_type=type(content))
152
+
153
+ @staticmethod
154
+ def for_file_content(content: bytes):
155
+
156
+ return DataItem(
157
+ _meta.ObjectType.FILE, _meta.SchemaType.SCHEMA_TYPE_NOT_SET,
158
+ content=content, content_type=bytes)
77
159
 
78
160
 
79
161
  @dc.dataclass(frozen=True)
80
162
  class DataView:
81
163
 
82
- trac_schema: _meta.SchemaDefinition
83
- arrow_schema: pa.Schema
164
+ object_type: _meta.ObjectType
84
165
 
85
- parts: tp.Dict[DataPartKey, tp.List[DataItem]]
166
+ trac_schema: _meta.SchemaDefinition = None
167
+ arrow_schema: pa.Schema = None
168
+
169
+ parts: tp.Dict[DataPartKey, tp.List[DataItem]] = None
170
+ file_item: tp.Optional[DataItem] = None
86
171
 
87
172
  @staticmethod
88
- def create_empty() -> "DataView":
89
- return DataView(_meta.SchemaDefinition(), pa.schema([]), dict())
173
+ def create_empty(object_type: _meta.ObjectType = _meta.ObjectType.DATA) -> "DataView":
174
+ if object_type == _meta.ObjectType.DATA:
175
+ return DataView(object_type, _meta.SchemaDefinition(), pa.schema([]), dict())
176
+ else:
177
+ return DataView(object_type)
90
178
 
91
179
  @staticmethod
92
180
  def for_trac_schema(trac_schema: _meta.SchemaDefinition):
93
- arrow_schema = DataMapping.trac_to_arrow_schema(trac_schema)
94
- return DataView(trac_schema, arrow_schema, dict())
181
+ if trac_schema.schemaType == _meta.SchemaType.TABLE:
182
+ arrow_schema = DataMapping.trac_to_arrow_schema(trac_schema)
183
+ return DataView(_meta.ObjectType.DATA, trac_schema, arrow_schema, dict())
184
+ else:
185
+ return DataView(_meta.ObjectType.DATA, trac_schema, parts = dict())
186
+
187
+ @staticmethod
188
+ def for_file_item(file_item: DataItem):
189
+ return DataView(file_item.object_type, file_item=file_item)
95
190
 
96
191
  def with_trac_schema(self, trac_schema: _meta.SchemaDefinition):
97
192
  arrow_schema = DataMapping.trac_to_arrow_schema(trac_schema)
98
- return DataView(trac_schema, arrow_schema, self.parts)
193
+ return DataView(_meta.ObjectType.DATA, trac_schema, arrow_schema, self.parts)
194
+
195
+ def with_part(self, part_key: DataPartKey, part: DataItem):
196
+ new_parts = copy.copy(self.parts)
197
+ new_parts[part_key] = [part]
198
+ return DataView(self.object_type, self.trac_schema, self.arrow_schema, new_parts)
199
+
200
+ def with_file_item(self, file_item: DataItem):
201
+ return DataView(self.object_type, file_item=file_item)
99
202
 
100
203
  def is_empty(self) -> bool:
101
- return self.parts is None or not any(self.parts.values())
204
+ if self.object_type == _meta.ObjectType.FILE:
205
+ return self.file_item is None
206
+ else:
207
+ return self.parts is None or not any(self.parts.values())
102
208
 
103
209
 
104
210
  class _DataInternal:
@@ -115,7 +221,7 @@ class DataMapping:
115
221
  :py:class:`TypeMapping <tracdap.rt.impl.type_system.MetadataCodec>`.
116
222
  """
117
223
 
118
- __log = _util.logger_for_namespace(_DataInternal.__module__ + ".DataMapping")
224
+ __log = _log.logger_for_namespace(_DataInternal.__module__ + ".DataMapping")
119
225
 
120
226
  # Matches TRAC_ARROW_TYPE_MAPPING in ArrowSchema, tracdap-lib-data
121
227
 
@@ -293,7 +399,7 @@ class DataMapping:
293
399
  deltas = [*prior_deltas, item]
294
400
  parts = {**view.parts, part: deltas}
295
401
 
296
- return DataView(view.trac_schema, view.arrow_schema, parts)
402
+ return DataView(view.object_type, view.trac_schema, view.arrow_schema, parts=parts)
297
403
 
298
404
  @classmethod
299
405
  def view_to_arrow(cls, view: DataView, part: DataPartKey) -> pa.Table:
@@ -308,29 +414,27 @@ class DataMapping:
308
414
  if not deltas:
309
415
  raise _ex.ETracInternal(f"Data view for part [{part.opaque_key}] does not contain any items")
310
416
 
417
+ # For a single delta, use the existing Arrow content
311
418
  if len(deltas) == 1:
312
419
  return cls.item_to_arrow(deltas[0])
313
420
 
314
- batches = {
421
+ # For multiple deltas, construct a new table by assembling the record batches
422
+ # Atm no consideration is given to overwriting records based on business key
423
+ batches = iter(
315
424
  batch
316
425
  for delta in deltas
317
- for batch in (
318
- delta.batches
319
- if delta.batches
320
- else delta.table.to_batches())}
426
+ for batch in cls.item_to_arrow(delta).to_batches())
321
427
 
322
428
  return pa.Table.from_batches(batches) # noqa
323
429
 
324
430
  @classmethod
325
431
  def item_to_arrow(cls, item: DataItem) -> pa.Table:
326
432
 
327
- if item.table is not None:
328
- return item.table
329
-
330
- if item.batches is not None:
331
- return pa.Table.from_batches(item.batches, item.schema) # noqa
433
+ if item.content_type != pa.Table:
434
+ detail = f"expected Arrow table, got [{item.content_type}]"
435
+ raise _ex.ETracInternal(f"Data item does not contain tabular data ({detail})")
332
436
 
333
- raise _ex.ETracInternal(f"Data item does not contain any usable data")
437
+ return item.content
334
438
 
335
439
  @classmethod
336
440
  def arrow_to_pandas(
@@ -642,7 +746,7 @@ class DataConformance:
642
746
  Check and/or apply conformance between datasets and schemas.
643
747
  """
644
748
 
645
- __log = _util.logger_for_namespace(_DataInternal.__module__ + ".DataConformance")
749
+ __log = _log.logger_for_namespace(_DataInternal.__module__ + ".DataConformance")
646
750
 
647
751
  __E_FIELD_MISSING = \
648
752
  "Field [{field_name}] is missing from the data"
@@ -0,0 +1,195 @@
1
+ # Licensed to the Fintech Open Source Foundation (FINOS) under one or
2
+ # more contributor license agreements. See the NOTICE file distributed
3
+ # with this work for additional information regarding copyright ownership.
4
+ # FINOS licenses this file to you under the Apache License, Version 2.0
5
+ # (the "License"); you may not use this file except in compliance with the
6
+ # License. You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ import io as _io
17
+ import sys as _sys
18
+ import typing as _tp
19
+
20
+ from logging import *
21
+
22
+
23
+ class PlainFormatter(Formatter):
24
+
25
+ FORMAT = f"%(asctime)s [%(threadName)s] %(levelname)s %(name)s" + \
26
+ f" - %(message)s"
27
+
28
+ def __init__(self):
29
+ super().__init__(self.FORMAT)
30
+
31
+
32
+ class ColorFormatter(Formatter):
33
+
34
+ _BLACK, _RED, _GREEN, _YELLOW, _BLUE, _MAGENTA, _CYAN, _WHITE, _DEFAULT_WHITE = range(9)
35
+ _DARK_BASE = 30
36
+ _LIGHT_BASE = 90
37
+
38
+ # DARK_BASE + WHITE = light grey
39
+ # DARK_BASE + DEFAULT_WHITE = regular console white
40
+ # LIGHT_BASE + WHITE = bright white (0xffffff), very bright!
41
+
42
+ def __init__(self, is_bright: bool):
43
+
44
+ super().__init__(self._base_fmt(is_bright))
45
+ self._level_colors = self._make_level_colors(is_bright)
46
+ self._default_color = self._make_default_color(is_bright)
47
+
48
+ def format(self, record):
49
+
50
+ level_name = record.levelname
51
+ level_color = self._level_colors.get(level_name)
52
+
53
+ if level_color:
54
+ record.levelname = level_color
55
+ else:
56
+ record.levelname = self._default_color + level_name
57
+
58
+ return Formatter.format(self, record)
59
+
60
+ def _base_fmt(self, is_bright: bool):
61
+
62
+ if is_bright:
63
+ base_color = self._make_ansi_code(self._DARK_BASE, self._DEFAULT_WHITE, is_bold=False)
64
+ message_color = self._make_ansi_code(self._LIGHT_BASE, self._CYAN, is_bold=False)
65
+ else:
66
+ base_color = self._make_ansi_code(self._DARK_BASE, self._WHITE, is_bold=False)
67
+ message_color = self._make_ansi_code(self._DARK_BASE, self._CYAN, is_bold=False)
68
+
69
+ return f"{base_color}%(asctime)s [%(threadName)s] %(levelname)s{base_color} %(name)s" + \
70
+ f" - {message_color}%(message)s"
71
+
72
+ def _make_level_colors(self, is_bright: bool):
73
+
74
+ base_code = self._LIGHT_BASE if is_bright else self._DARK_BASE
75
+
76
+ green = self._make_ansi_code(base_code, self._GREEN, is_bold=is_bright)
77
+ yellow = self._make_ansi_code(base_code, self._YELLOW, is_bold=is_bright)
78
+ red = self._make_ansi_code(base_code, self._RED, is_bold=is_bright)
79
+
80
+ level_colors = {
81
+ 'CRITICAL': f"{red}CRITICAL",
82
+ 'ERROR': f"{red}ERROR",
83
+ 'WARNING': f"{yellow}WARNING",
84
+ 'INFO': f"{green}INFO"
85
+ }
86
+
87
+ return level_colors
88
+
89
+ def _make_default_color(self, is_bright: bool):
90
+
91
+ base_code = self._LIGHT_BASE if is_bright else self._DARK_BASE
92
+ blue = self._make_ansi_code(base_code, self._BLUE, is_bold=is_bright)
93
+
94
+ return blue
95
+
96
+ @classmethod
97
+ def _make_ansi_code(cls, base_code: int, color_offset: int, is_bold: bool):
98
+ return f"\033[{1 if is_bold else 0};{base_code + color_offset}m"
99
+
100
+
101
+ def configure_logging(enable_debug=False):
102
+
103
+ root_logger = getLogger()
104
+ log_level = DEBUG if enable_debug else INFO
105
+
106
+ if not root_logger.hasHandlers():
107
+
108
+ console_formatter = ColorFormatter(is_bright=True)
109
+ console_handler = StreamHandler(_sys.stdout)
110
+ console_handler.setFormatter(console_formatter)
111
+ console_handler.setLevel(INFO)
112
+ root_logger.addHandler(console_handler)
113
+ root_logger.setLevel(log_level)
114
+
115
+ # Use is_bright=False for logs from the TRAC runtime, so model logs stand out
116
+
117
+ trac_logger = getLogger("tracdap.rt")
118
+
119
+ console_formatter = ColorFormatter(is_bright=False)
120
+ console_handler = StreamHandler(_sys.stdout)
121
+ console_handler.setFormatter(console_formatter)
122
+ console_handler.setLevel(log_level)
123
+ trac_logger.addHandler(console_handler)
124
+ trac_logger.propagate = False
125
+
126
+
127
+ def logger_for_object(obj: object) -> Logger:
128
+ return logger_for_class(obj.__class__)
129
+
130
+
131
+ def logger_for_class(clazz: type) -> Logger:
132
+ qualified_class_name = f"{clazz.__module__}.{clazz.__name__}"
133
+ return getLogger(qualified_class_name)
134
+
135
+
136
+ def logger_for_namespace(namespace: str) -> Logger:
137
+ return getLogger(namespace)
138
+
139
+
140
+ class JobLogger(Logger):
141
+
142
+ def __init__(self, sys_log: Logger, *handlers: Handler):
143
+
144
+ super().__init__(sys_log.name, sys_log.level)
145
+ self._sys_log = sys_log._log
146
+ self._job_log = super()._log
147
+
148
+ for handler in handlers:
149
+ self.addHandler(handler)
150
+
151
+ def _log(self, level, msg, args, exc_info=None, extra=None, stack_info=False, stacklevel=1):
152
+
153
+ self._sys_log(level, msg, args, exc_info, extra, stack_info, stacklevel)
154
+ self._job_log(level, msg, args, exc_info, extra, stack_info, stacklevel)
155
+
156
+
157
+ class LogProvider:
158
+
159
+ def logger_for_object(self, obj: object) -> Logger:
160
+ return logger_for_object(obj)
161
+
162
+ def logger_for_class(self, clazz: type) -> Logger:
163
+ return logger_for_class(clazz)
164
+
165
+ def logger_for_namespace(self, namespace: str) -> Logger:
166
+ return logger_for_namespace(namespace)
167
+
168
+
169
+ class JobLogProvider(LogProvider):
170
+
171
+ def __init__(self, *handlers: Handler):
172
+ self.__handlers = handlers
173
+
174
+ def logger_for_object(self, obj: object) -> Logger:
175
+ base_logger = logger_for_object(obj)
176
+ return JobLogger(base_logger, *self.__handlers)
177
+
178
+ def logger_for_class(self, clazz: type) -> Logger:
179
+ base_logger = logger_for_class(clazz)
180
+ return JobLogger(base_logger, *self.__handlers)
181
+
182
+ def logger_for_namespace(self, namespace: str) -> Logger:
183
+ base_logger = logger_for_namespace(namespace)
184
+ return JobLogger(base_logger, *self.__handlers)
185
+
186
+
187
+ def job_log_provider(target: _tp.BinaryIO) -> JobLogProvider:
188
+
189
+ stream = _io.TextIOWrapper(target, newline="\r\n")
190
+ formatter = PlainFormatter()
191
+
192
+ handler = StreamHandler(stream)
193
+ handler.setFormatter(formatter)
194
+
195
+ return JobLogProvider(handler)
@@ -25,11 +25,12 @@ import tracdap.rt.metadata as _meta
25
25
  import tracdap.rt.config as _cfg
26
26
  import tracdap.rt.exceptions as _ex
27
27
 
28
- import tracdap.rt._impl.type_system as _types
29
- import tracdap.rt._impl.repos as _repos
30
- import tracdap.rt._impl.shim as _shim
31
- import tracdap.rt._impl.util as _util
32
- import tracdap.rt._impl.validation as _val
28
+ import tracdap.rt._impl.core.logging as _logging
29
+ import tracdap.rt._impl.core.repos as _repos
30
+ import tracdap.rt._impl.core.shim as _shim
31
+ import tracdap.rt._impl.core.type_system as _types
32
+ import tracdap.rt._impl.core.util as _util
33
+ import tracdap.rt._impl.core.validation as _val
33
34
 
34
35
 
35
36
  class ModelLoader:
@@ -43,7 +44,7 @@ class ModelLoader:
43
44
 
44
45
  def __init__(self, sys_config: _cfg.RuntimeConfig, scratch_dir: pathlib.Path):
45
46
 
46
- self.__log = _util.logger_for_object(self)
47
+ self.__log = _logging.logger_for_object(self)
47
48
 
48
49
  self.__scratch_dir = scratch_dir.joinpath("models")
49
50
  self.__repos = _repos.RepositoryManager(sys_config)
@@ -226,13 +227,15 @@ class ModelLoader:
226
227
  self.__log.info(f"Parameter [{name}] - {param.paramType.basicType.name}")
227
228
  param.paramProps = self._encoded_props(param.paramProps, "parameter", name)
228
229
 
229
- for name, schema in model_def.inputs.items():
230
- self.__log.info(f"Input [{name}] - {schema.schema.schemaType.name}")
231
- schema.inputProps = self._encoded_props(schema.inputProps, "input", name)
230
+ for name, input_def in model_def.inputs.items():
231
+ input_type = input_def.schema.schemaType.name if input_def.objectType == _meta.ObjectType.DATA else input_def.objectType.name
232
+ self.__log.info(f"Input [{name}] - {input_type}")
233
+ input_def.inputProps = self._encoded_props(input_def.inputProps, "input", name)
232
234
 
233
- for name, schema in model_def.outputs.items():
234
- self.__log.info(f"Output [{name}] - {schema.schema.schemaType.name}")
235
- schema.outputProps = self._encoded_props(schema.outputProps, "input", name)
235
+ for name, output_def in model_def.outputs.items():
236
+ output_type = output_def.schema.schemaType.name if output_def.objectType == _meta.ObjectType.DATA else output_def.objectType.name
237
+ self.__log.info(f"Output [{name}] - {output_type}")
238
+ output_def.outputProps = self._encoded_props(output_def.outputProps, "input", name)
236
239
 
237
240
  return model_def
238
241