tracdap-runtime 0.8.0b3__py3-none-any.whl → 0.8.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. tracdap/rt/_impl/core/config_parser.py +55 -37
  2. tracdap/rt/_impl/core/data.py +63 -32
  3. tracdap/rt/_impl/core/storage.py +4 -1
  4. tracdap/rt/_impl/core/struct.py +547 -0
  5. tracdap/rt/_impl/core/type_system.py +73 -33
  6. tracdap/rt/_impl/core/validation.py +56 -15
  7. tracdap/rt/_impl/exec/context.py +64 -10
  8. tracdap/rt/_impl/exec/dev_mode.py +25 -14
  9. tracdap/rt/_impl/exec/functions.py +79 -29
  10. tracdap/rt/_impl/grpc/codec.py +1 -1
  11. tracdap/rt/_impl/grpc/tracdap/api/internal/runtime_pb2.py +2 -2
  12. tracdap/rt/_impl/grpc/tracdap/api/internal/runtime_pb2_grpc.py +1 -1
  13. tracdap/rt/_impl/grpc/tracdap/metadata/data_pb2.py +31 -19
  14. tracdap/rt/_impl/grpc/tracdap/metadata/data_pb2.pyi +48 -2
  15. tracdap/rt/_impl/grpc/tracdap/metadata/object_pb2.py +2 -2
  16. tracdap/rt/_impl/grpc/tracdap/metadata/object_pb2.pyi +3 -3
  17. tracdap/rt/_impl/grpc/tracdap/metadata/{stoarge_pb2.py → storage_pb2.py} +3 -3
  18. tracdap/rt/_impl/static_api.py +9 -1
  19. tracdap/rt/_plugins/storage_sql.py +12 -5
  20. tracdap/rt/_version.py +1 -1
  21. tracdap/rt/api/__init__.py +1 -23
  22. tracdap/rt/api/constants.py +57 -0
  23. tracdap/rt/api/experimental.py +32 -0
  24. tracdap/rt/api/hook.py +11 -0
  25. tracdap/rt/api/static_api.py +54 -2
  26. tracdap/rt/config/__init__.py +1 -4
  27. tracdap/rt/config/common.py +0 -34
  28. tracdap/rt/config/platform.py +6 -26
  29. tracdap/rt/metadata/__init__.py +31 -29
  30. tracdap/rt/metadata/data.py +40 -0
  31. tracdap/rt/metadata/file.py +2 -0
  32. tracdap/rt/metadata/object.py +1 -1
  33. {tracdap_runtime-0.8.0b3.dist-info → tracdap_runtime-0.8.0rc1.dist-info}/METADATA +17 -14
  34. {tracdap_runtime-0.8.0b3.dist-info → tracdap_runtime-0.8.0rc1.dist-info}/RECORD +39 -38
  35. {tracdap_runtime-0.8.0b3.dist-info → tracdap_runtime-0.8.0rc1.dist-info}/WHEEL +1 -1
  36. tracdap/rt/api/file_types.py +0 -29
  37. /tracdap/rt/_impl/grpc/tracdap/metadata/{stoarge_pb2.pyi → storage_pb2.pyi} +0 -0
  38. /tracdap/rt/metadata/{stoarge.py → storage.py} +0 -0
  39. {tracdap_runtime-0.8.0b3.dist-info → tracdap_runtime-0.8.0rc1.dist-info}/LICENSE +0 -0
  40. {tracdap_runtime-0.8.0b3.dist-info → tracdap_runtime-0.8.0rc1.dist-info}/top_level.txt +0 -0
@@ -12,14 +12,6 @@
12
12
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
13
  # See the License for the specific language governing permissions and
14
14
  # limitations under the License.
15
- #
16
- # http://www.apache.org/licenses/LICENSE-2.0
17
- #
18
- # Unless required by applicable law or agreed to in writing, software
19
- # distributed under the License is distributed on an "AS IS" BASIS,
20
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
21
- # See the License for the specific language governing permissions and
22
- # limitations under the License.
23
15
 
24
16
  import dataclasses as _dc
25
17
  import decimal
@@ -30,6 +22,7 @@ import json
30
22
  import os
31
23
  import pathlib
32
24
  import re
25
+ import types as ts
33
26
  import typing as tp
34
27
  import urllib.parse as _urlp
35
28
  import uuid
@@ -44,6 +37,11 @@ import tracdap.rt._impl.core.util as _util
44
37
  import yaml
45
38
  import yaml.parser
46
39
 
40
+ try:
41
+ import pydantic as _pyd # noqa
42
+ except ModuleNotFoundError:
43
+ _pyd = None
44
+
47
45
  _T = tp.TypeVar('_T')
48
46
 
49
47
 
@@ -286,9 +284,17 @@ class ConfigManager:
286
284
 
287
285
  class ConfigParser(tp.Generic[_T]):
288
286
 
289
- # The metaclass for generic types varies between versions of the typing library
290
- # To work around this, detect the correct metaclass by inspecting a generic type variable
291
- __generic_metaclass = type(tp.List[object])
287
+ # Support both new and old styles for generic, union and optional types
288
+ # Old-style annotations are still valid, even when the new style is fully supported
289
+ __generic_types: list[type] = [
290
+ ts.GenericAlias,
291
+ type(tp.List[int]),
292
+ type(tp.Optional[int])
293
+ ]
294
+
295
+ # UnionType was added to the types module in Python 3.10, we support 3.9 (Jan 2025)
296
+ if hasattr(ts, "UnionType"):
297
+ __generic_types.append(ts.UnionType)
292
298
 
293
299
  __primitive_types: tp.Dict[type, callable] = {
294
300
  bool: bool,
@@ -328,6 +334,23 @@ class ConfigParser(tp.Generic[_T]):
328
334
 
329
335
  def _parse_value(self, location: str, raw_value: tp.Any, annotation: type):
330
336
 
337
+ if self._is_dev_mode_location(location):
338
+
339
+ if type(raw_value) in ConfigParser.__primitive_types:
340
+ return self._parse_primitive(location, raw_value, type(raw_value))
341
+
342
+ if isinstance(raw_value, list):
343
+ if len(raw_value) == 0:
344
+ return []
345
+ items = iter((self._child_location(location, i), x) for i, x in enumerate(raw_value))
346
+ return list(self._parse_value(loc, x, tp.Any) for loc, x in items)
347
+
348
+ if isinstance(raw_value, dict):
349
+ if len(raw_value) == 0:
350
+ return {}
351
+ items = iter((self._child_location(location, k), k, v) for k, v in raw_value.items())
352
+ return dict((k, self._parse_value(loc, v, tp.Any)) for loc, k, v in items)
353
+
331
354
  if raw_value is None:
332
355
  return None
333
356
 
@@ -346,24 +369,17 @@ class ConfigParser(tp.Generic[_T]):
346
369
  return self._parse_enum(location, raw_value, annotation)
347
370
 
348
371
  if _dc.is_dataclass(annotation):
372
+ return self._parse_simple_class(location, raw_value, annotation)
349
373
 
350
- if isinstance(raw_value, tp.Dict):
351
- return self._parse_simple_class(location, raw_value, annotation)
352
-
353
- if self._is_dev_mode_location(location):
354
- if type(raw_value) in ConfigParser.__primitive_types:
355
- return self._parse_primitive(location, raw_value, type(raw_value))
356
- if isinstance(raw_value, list):
357
- if len(raw_value) == 0:
358
- return []
359
- list_type = type(raw_value[0])
360
- return list(map(lambda x: self._parse_primitive(location, x, list_type), raw_value))
374
+ # Basic support for Pydantic, if it is installed
375
+ if _pyd and isinstance(annotation, type) and issubclass(annotation, _pyd.BaseModel):
376
+ return self._parse_simple_class(location, raw_value, annotation)
361
377
 
362
- return self._error(location, f"Expected type {annotation.__name__}, got '{str(raw_value)}'")
363
-
364
- if isinstance(annotation, self.__generic_metaclass):
378
+ if any(map(lambda _t: isinstance(annotation, _t), self.__generic_types)):
365
379
  return self._parse_generic_class(location, raw_value, annotation) # noqa
366
380
 
381
+ return self._error(location, f"Cannot parse value of type {annotation.__name__}")
382
+
367
383
  def _is_dev_mode_location(self, location):
368
384
 
369
385
  return any(map(lambda pattern: re.match(pattern, location), self._dev_mode_locations))
@@ -423,14 +439,14 @@ class ConfigParser(tp.Generic[_T]):
423
439
  def _parse_simple_class(self, location: str, raw_dict: tp.Any, metaclass: type) -> object:
424
440
 
425
441
  if raw_dict is not None and not isinstance(raw_dict, dict):
426
- pass
442
+ return self._error(location, f"Expected type {metaclass.__name__}, got '{str(raw_dict)}'")
427
443
 
428
444
  obj = metaclass.__new__(metaclass, object()) # noqa
429
445
 
430
446
  init_signature = inspect.signature(metaclass.__init__)
431
447
  init_types = tp.get_type_hints(metaclass.__init__)
432
448
  init_params = iter(init_signature.parameters.items())
433
- init_values: tp.List[tp.Any] = list()
449
+ init_values: tp.Dict[str, tp.Any] = dict()
434
450
 
435
451
  # Do not process 'self'
436
452
  next(init_params)
@@ -444,20 +460,20 @@ class ConfigParser(tp.Generic[_T]):
444
460
  message = f"Class {metaclass.__name__} does not support config decoding: " + \
445
461
  f"Missing type information for init parameter '{param_name}'"
446
462
  self._error(location, message)
447
- init_values.append(None)
463
+ init_values[param_name] = None
448
464
 
449
465
  elif param_name in raw_dict and raw_dict[param_name] is not None:
450
466
  param_value = self._parse_value(param_location, raw_dict[param_name], param_type)
451
- init_values.append(param_value)
467
+ init_values[param_name] = param_value
452
468
 
453
469
  elif param.default != inspect._empty: # noqa
454
- init_values.append(param.default)
470
+ init_values[param_name] = param.default
455
471
 
456
472
  else:
457
473
  self._error(location, f"Missing required value '{param_name}'")
458
- init_values.append(None)
474
+ init_values[param_name] = None
459
475
 
460
- binding = init_signature.bind(obj, *init_values)
476
+ binding = init_signature.bind(obj, **init_values)
461
477
  metaclass.__init__(*binding.args, **binding.kwargs)
462
478
 
463
479
  # Now go back over the members and look for any that weren't declared in __init__
@@ -478,7 +494,7 @@ class ConfigParser(tp.Generic[_T]):
478
494
  self._error(location, message)
479
495
 
480
496
  # Generic members must be declared in __init__ since that is the only way to get the full annotation
481
- if isinstance(type(default_value), self.__generic_metaclass):
497
+ if any(map(lambda _t: isinstance(type(default_value), _t), self.__generic_types)):
482
498
  message = f"Class {metaclass.__name__} does not support config decoding: " + \
483
499
  f"Members with no default value must be declared in __init__: '{member_name}'"
484
500
  self._error(location, message)
@@ -504,7 +520,7 @@ class ConfigParser(tp.Generic[_T]):
504
520
 
505
521
  return obj
506
522
 
507
- def _parse_generic_class(self, location: str, raw_value: tp.Any, metaclass: __generic_metaclass):
523
+ def _parse_generic_class(self, location: str, raw_value: tp.Any, metaclass: type):
508
524
 
509
525
  origin = _util.get_origin(metaclass)
510
526
  args = _util.get_args(metaclass)
@@ -517,7 +533,7 @@ class ConfigParser(tp.Generic[_T]):
517
533
  return self._error(location, f"Expected a list, got {type(raw_value)}")
518
534
 
519
535
  return [
520
- self._parse_value(self._child_location(location, str(idx)), item, list_type)
536
+ self._parse_value(self._child_location(location, idx), item, list_type)
521
537
  for (idx, item) in enumerate(raw_value)]
522
538
 
523
539
  if origin == tp.Dict or origin == dict:
@@ -548,12 +564,14 @@ class ConfigParser(tp.Generic[_T]):
548
564
  return None
549
565
 
550
566
  @staticmethod
551
- def _child_location(parent_location: str, item: str):
567
+ def _child_location(parent_location: str, item: tp.Union[str, int]):
552
568
 
553
569
  if parent_location is None or parent_location == "":
554
570
  return item
571
+ elif isinstance(item, int):
572
+ return f"{parent_location}[{item}]"
555
573
  else:
556
- return parent_location + "." + item
574
+ return f"{parent_location}.{item}"
557
575
 
558
576
 
559
577
  class ConfigQuoter:
@@ -44,6 +44,7 @@ import tracdap.rt._impl.core.logging as _log
44
44
  class DataSpec:
45
45
 
46
46
  object_type: _meta.ObjectType
47
+ schema_type: _meta.SchemaType
47
48
  data_item: str
48
49
 
49
50
  data_def: _meta.DataDefinition
@@ -58,8 +59,15 @@ class DataSpec:
58
59
  storage_def: _meta.StorageDefinition,
59
60
  schema_def: tp.Optional[_meta.SchemaDefinition] = None) -> "DataSpec":
60
61
 
62
+ if schema_def:
63
+ schema_type = schema_def.schemaType
64
+ elif data_def.schema:
65
+ schema_type = data_def.schema.schemaType
66
+ else:
67
+ schema_type = _meta.SchemaType.SCHEMA_TYPE_NOT_SET
68
+
61
69
  return DataSpec(
62
- _meta.ObjectType.DATA, data_item,
70
+ _meta.ObjectType.DATA, schema_type, data_item,
63
71
  data_def,
64
72
  storage_def=storage_def,
65
73
  schema_def=schema_def,
@@ -72,15 +80,15 @@ class DataSpec:
72
80
  storage_def: _meta.StorageDefinition) -> "DataSpec":
73
81
 
74
82
  return DataSpec(
75
- _meta.ObjectType.FILE, data_item,
83
+ _meta.ObjectType.FILE, _meta.SchemaType.SCHEMA_TYPE_NOT_SET, data_item,
76
84
  file_def=file_def,
77
85
  storage_def=storage_def,
78
86
  data_def=None,
79
87
  schema_def=None)
80
88
 
81
89
  @staticmethod
82
- def create_empty_spec(object_type: _meta.ObjectType):
83
- return DataSpec(object_type, None, None, None, None, None)
90
+ def create_empty_spec(object_type: _meta.ObjectType, schema_type: _meta.SchemaType):
91
+ return DataSpec(object_type, schema_type, None, None, None, None, None)
84
92
 
85
93
  def is_empty(self):
86
94
  return self.data_item is None or len(self.data_item) == 0
@@ -100,32 +108,54 @@ class DataPartKey:
100
108
  class DataItem:
101
109
 
102
110
  object_type: _meta.ObjectType
111
+ schema_type: _meta.SchemaType
112
+
113
+ content: tp.Any = None
114
+ content_type: tp.Type = None
115
+ content_func: tp.Callable[[], tp.Any] = None
116
+
117
+ trac_schema: _meta.SchemaDefinition = None
118
+ native_schema: tp.Any = None
103
119
 
120
+ # TODO: Remove legacy API and use content / native_schema instead
104
121
  schema: pa.Schema = None
105
122
  table: tp.Optional[pa.Table] = None
106
- batches: tp.Optional[tp.List[pa.RecordBatch]] = None
107
123
 
108
- pandas: "tp.Optional[pandas.DataFrame]" = None
109
- pyspark: tp.Any = None
124
+ def is_empty(self) -> bool:
125
+ return self.content is None
110
126
 
111
- raw_bytes: bytes = None
127
+ @staticmethod
128
+ def create_empty(
129
+ object_type: _meta.ObjectType = _meta.ObjectType.DATA,
130
+ schema_type: _meta.SchemaType = _meta.SchemaType.TABLE) -> "DataItem":
112
131
 
113
- def is_empty(self) -> bool:
114
- if self.object_type == _meta.ObjectType.FILE:
115
- return self.raw_bytes is None or len(self.raw_bytes) == 0
132
+ if object_type == _meta.ObjectType.DATA and schema_type == _meta.SchemaType.TABLE:
133
+ return DataItem(_meta.ObjectType.DATA, _meta.SchemaType.TABLE, schema=pa.schema([]))
116
134
  else:
117
- return self.table is None and (self.batches is None or len(self.batches) == 0)
135
+ return DataItem(object_type, schema_type)
118
136
 
119
137
  @staticmethod
120
- def create_empty(object_type: _meta.ObjectType = _meta.ObjectType.DATA) -> "DataItem":
121
- if object_type == _meta.ObjectType.DATA:
122
- return DataItem(_meta.ObjectType.DATA, pa.schema([]))
123
- else:
124
- return DataItem(object_type)
138
+ def for_table(table: pa.Table, schema: pa.Schema, trac_schema: _meta.SchemaDefinition) -> "DataItem":
139
+
140
+ return DataItem(
141
+ _meta.ObjectType.DATA, _meta.SchemaType.TABLE,
142
+ content=table, content_type=pa.Table,
143
+ trac_schema=trac_schema, native_schema=schema,
144
+ table=table, schema=schema)
145
+
146
+ @staticmethod
147
+ def for_struct(content: tp.Any):
148
+
149
+ return DataItem(
150
+ _meta.ObjectType.DATA, _meta.SchemaType.STRUCT,
151
+ content=content, content_type=type(content))
125
152
 
126
153
  @staticmethod
127
- def for_file_content(raw_bytes: bytes):
128
- return DataItem(_meta.ObjectType.FILE, raw_bytes=raw_bytes)
154
+ def for_file_content(content: bytes):
155
+
156
+ return DataItem(
157
+ _meta.ObjectType.FILE, _meta.SchemaType.SCHEMA_TYPE_NOT_SET,
158
+ content=content, content_type=bytes)
129
159
 
130
160
 
131
161
  @dc.dataclass(frozen=True)
@@ -148,8 +178,11 @@ class DataView:
148
178
 
149
179
  @staticmethod
150
180
  def for_trac_schema(trac_schema: _meta.SchemaDefinition):
151
- arrow_schema = DataMapping.trac_to_arrow_schema(trac_schema)
152
- return DataView(_meta.ObjectType.DATA, trac_schema, arrow_schema, dict())
181
+ if trac_schema.schemaType == _meta.SchemaType.TABLE:
182
+ arrow_schema = DataMapping.trac_to_arrow_schema(trac_schema)
183
+ return DataView(_meta.ObjectType.DATA, trac_schema, arrow_schema, dict())
184
+ else:
185
+ return DataView(_meta.ObjectType.DATA, trac_schema, parts = dict())
153
186
 
154
187
  @staticmethod
155
188
  def for_file_item(file_item: DataItem):
@@ -381,29 +414,27 @@ class DataMapping:
381
414
  if not deltas:
382
415
  raise _ex.ETracInternal(f"Data view for part [{part.opaque_key}] does not contain any items")
383
416
 
417
+ # For a single delta, use the existing Arrow content
384
418
  if len(deltas) == 1:
385
419
  return cls.item_to_arrow(deltas[0])
386
420
 
387
- batches = {
421
+ # For multiple deltas, construct a new table by assembling the record batches
422
+ # Atm no consideration is given to overwriting records based on business key
423
+ batches = iter(
388
424
  batch
389
425
  for delta in deltas
390
- for batch in (
391
- delta.batches
392
- if delta.batches
393
- else delta.table.to_batches())}
426
+ for batch in cls.item_to_arrow(delta).to_batches())
394
427
 
395
428
  return pa.Table.from_batches(batches) # noqa
396
429
 
397
430
  @classmethod
398
431
  def item_to_arrow(cls, item: DataItem) -> pa.Table:
399
432
 
400
- if item.table is not None:
401
- return item.table
402
-
403
- if item.batches is not None:
404
- return pa.Table.from_batches(item.batches, item.schema) # noqa
433
+ if item.content_type != pa.Table:
434
+ detail = f"expected Arrow table, got [{item.content_type}]"
435
+ raise _ex.ETracInternal(f"Data item does not contain tabular data ({detail})")
405
436
 
406
- raise _ex.ETracInternal(f"Data item does not contain any usable data")
437
+ return item.content
407
438
 
408
439
  @classmethod
409
440
  def arrow_to_pandas(
@@ -496,7 +496,10 @@ class CommonFileStorage(IFileStorage):
496
496
 
497
497
  # For successful write streams, log the total size written
498
498
  if is_write and not error:
499
- file_size = _util.format_file_size(stream.tell())
499
+ if not stream.closed:
500
+ file_size = _util.format_file_size(stream.tell())
501
+ else:
502
+ file_size = self._fs.get_file_info(storage_path).size
500
503
  self._log.info(f"File size [{self._key}]: {file_size} [{storage_path}]")
501
504
 
502
505
  # Close the stream - this may take time for write streams that are not flushed