vastdb 1.4.0__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,28 @@
1
+ import ibis
2
+ import pyarrow as pa
3
+ from ibis.expr.types.structs import IbisError
4
+
5
+ from vastdb import errors
6
+
7
+
8
+ def validate_ibis_support_schema(arrow_schema: pa.Schema):
9
+ """Validate that the provided Arrow schema is compatible with Ibis.
10
+
11
+ Raises NotSupportedSchema if the schema contains unsupported fields.
12
+ """
13
+ unsupported_fields = []
14
+ first_exception = None
15
+ for f in arrow_schema:
16
+ try:
17
+ ibis.Schema.from_pyarrow(pa.schema([f]))
18
+ except (IbisError, ValueError, KeyError) as e:
19
+ if first_exception is None:
20
+ first_exception = e
21
+ unsupported_fields.append(f)
22
+
23
+ if unsupported_fields:
24
+ raise errors.NotSupportedSchema(
25
+ message=f"Ibis does not support the schema {unsupported_fields=}",
26
+ schema=arrow_schema,
27
+ cause=first_exception
28
+ )
vastdb/_internal.py CHANGED
@@ -7,7 +7,7 @@ import time
7
7
  import urllib.parse
8
8
  from collections import defaultdict, namedtuple
9
9
  from enum import Enum
10
- from typing import Any, Dict, Iterator, List, Optional, Tuple, Union
10
+ from typing import Any, Dict, Iterator, List, Optional, Tuple, Union, cast
11
11
 
12
12
  import backoff
13
13
  import flatbuffers
@@ -52,6 +52,7 @@ import vastdb.vast_flatbuf.org.apache.arrow.computeir.flatbuf.Int8Literal as fb_
52
52
  import vastdb.vast_flatbuf.org.apache.arrow.computeir.flatbuf.Int16Literal as fb_int16_lit
53
53
  import vastdb.vast_flatbuf.org.apache.arrow.computeir.flatbuf.Int32Literal as fb_int32_lit
54
54
  import vastdb.vast_flatbuf.org.apache.arrow.computeir.flatbuf.Int64Literal as fb_int64_lit
55
+ import vastdb.vast_flatbuf.org.apache.arrow.computeir.flatbuf.ListLiteral as fb_list_lit
55
56
  import vastdb.vast_flatbuf.org.apache.arrow.computeir.flatbuf.Literal as fb_literal
56
57
  import vastdb.vast_flatbuf.org.apache.arrow.computeir.flatbuf.Relation as fb_relation
57
58
  import vastdb.vast_flatbuf.org.apache.arrow.computeir.flatbuf.RelationImpl as rel_impl
@@ -262,20 +263,32 @@ class Predicate:
262
263
  node = nodes_map[name]
263
264
  nodes_map = node.children_map
264
265
 
265
- # TODO: support predicate pushdown for leaf nodes (ORION-160338)
266
+ literal_field = node.field
267
+ literal_column_index = node.index
266
268
  if node.children:
267
- raise NotImplementedError(node.field) # no predicate pushdown for nested columns
268
- column_offset = self.build_column(position=node.index)
269
+ # Support fixed size list of a single flat child.
270
+ if pa.types.is_fixed_size_list(node.type) and len(node.children) == 1 and not node.children[
271
+ 0].children:
272
+ # Similar to projection, the index of the column should be the leaf which is the child's.
273
+ literal_column_index = node.children[0].index
274
+ # Set the literal type to be a list rather than fixed list since fixed list is not supported.
275
+ # https://github.com/apache/arrow/blob/apache-arrow-7.0.0/cpp/src/arrow/compute/exec/ir_consumer.cc#L287
276
+ literal_field = node.field.with_type(pa.list_(node.field.type.value_field))
277
+ else:
278
+ # TODO: support predicate pushdown for leaf nodes (ORION-160338)
279
+ raise NotImplementedError(node.field) # no predicate pushdown for nested columns
280
+
281
+ column_offset = self.build_column(position=literal_column_index)
269
282
  for literal in literals:
270
283
  args_offsets = [column_offset]
271
284
  if literal is not None:
272
- args_offsets.append(self.build_literal(field=node.field, value=literal.value))
285
+ args_offsets.append(self.build_literal_expression(field=literal_field, value=literal.value))
273
286
  if builder_func == self.build_between:
274
- args_offsets.append(self.build_literal(field=node.field, value=lower.value))
275
- args_offsets.append(self.build_literal(field=node.field, value=upper.value))
287
+ args_offsets.append(self.build_literal_expression(field=literal_field, value=lower.value))
288
+ args_offsets.append(self.build_literal_expression(field=literal_field, value=upper.value))
276
289
  if builder_func == self.build_starts_with:
277
- args_offsets.append(self.build_literal(field=node.field, value=lower_bytes))
278
- args_offsets.append(self.build_literal(field=node.field, value=upper_bytes))
290
+ args_offsets.append(self.build_literal_expression(field=literal_field, value=lower_bytes))
291
+ args_offsets.append(self.build_literal_expression(field=literal_field, value=upper_bytes))
279
292
 
280
293
  inner_offsets.append(builder_func(*args_offsets))
281
294
 
@@ -326,14 +339,14 @@ class Predicate:
326
339
  if isinstance(filter_by_name, tuple) and len(filter_by_name) == 1:
327
340
  op, value = self.rule_to_operator(filter_by_name[0])
328
341
  if value:
329
- literal = self.build_literal(field=field, value=value)
342
+ literal = self.build_literal_expression(field=field, value=value)
330
343
  return op(column, literal)
331
344
  return op(column) # is_null or is_not_null operation
332
345
 
333
346
  rules = []
334
347
  for rule in filter_by_name:
335
348
  op, value = self.rule_to_operator(rule)
336
- literal = self.build_literal(field=field, value=value)
349
+ literal = self.build_literal_expression(field=field, value=value)
337
350
  rules.append(op(column, literal))
338
351
 
339
352
  return self.build_and(rules)
@@ -359,145 +372,93 @@ class Predicate:
359
372
  # see https://github.com/apache/arrow/blob/main/format/Schema.fbs
360
373
  # https://github.com/apache/arrow/blob/apache-arrow-7.0.0/experimental/computeir/Expression.fbs
361
374
  # https://github.com/apache/arrow/blob/apache-arrow-7.0.0/experimental/computeir/Literal.fbs
362
- def build_literal(self, field: pa.Field, value):
363
- literal_type: Any
364
-
365
- if field.type.equals(pa.int64()) or field.type.equals(pa.uint64()):
366
- is_signed = field.type.equals(pa.int64())
367
- literal_type = fb_int64_lit if is_signed else fb_uint64_lit
368
- literal_impl = LiteralImpl.Int64Literal if is_signed else LiteralImpl.UInt64Literal
369
-
370
- field_type_type = Type.Int
371
- fb_int.Start(self.builder)
372
- fb_int.AddBitWidth(self.builder, field.type.bit_width)
373
- fb_int.AddIsSigned(self.builder, is_signed)
374
- field_type = fb_int.End(self.builder)
375
-
375
+ def build_literal_impl(self, pa_type: pa.DataType, value) -> Tuple[int, int]:
376
+ '''
377
+ Builds a LiteralImpl for the given Arrow type and value.
378
+ :param pa_type: Literal type as defined in Arrow.
379
+ :param value: Value to be used in the LiteralImpl.
380
+ :return: Tuple[LiteralImpl, buffer_value]
381
+ '''
382
+ if pa.types.is_integer(pa_type):
383
+ impl_type, impl_class = None, None
376
384
  value = int(value)
377
- elif field.type.equals(pa.int32()) or field.type.equals(pa.uint32()):
378
- is_signed = field.type.equals(pa.int32())
379
- literal_type = fb_int32_lit if is_signed else fb_uint32_lit
380
- literal_impl = LiteralImpl.Int32Literal if is_signed else LiteralImpl.UInt32Literal
381
385
 
382
- field_type_type = Type.Int
383
- fb_int.Start(self.builder)
384
- fb_int.AddBitWidth(self.builder, field.type.bit_width)
385
- fb_int.AddIsSigned(self.builder, is_signed)
386
- field_type = fb_int.End(self.builder)
386
+ if pa.types.is_int8(pa_type):
387
+ impl_type, impl_class = LiteralImpl.Int8Literal, fb_int8_lit
388
+ elif pa.types.is_uint8(pa_type):
389
+ impl_type, impl_class = LiteralImpl.UInt8Literal, fb_uint8_lit
390
+ elif pa.types.is_int16(pa_type):
391
+ impl_type, impl_class = LiteralImpl.Int16Literal, fb_int16_lit
392
+ elif pa.types.is_uint16(pa_type):
393
+ impl_type, impl_class = LiteralImpl.UInt16Literal, fb_uint16_lit
394
+ elif pa.types.is_int32(pa_type):
395
+ impl_type, impl_class = LiteralImpl.Int32Literal, fb_int32_lit
396
+ elif pa.types.is_uint32(pa_type):
397
+ impl_type, impl_class = LiteralImpl.UInt32Literal, fb_uint32_lit
398
+ elif pa.types.is_int64(pa_type):
399
+ impl_type, impl_class = LiteralImpl.Int64Literal, fb_int64_lit
400
+ elif pa.types.is_uint64(pa_type):
401
+ impl_type, impl_class = LiteralImpl.UInt64Literal, fb_uint64_lit
402
+ else:
403
+ raise ValueError(f'unsupported integer predicate type: {pa_type}, value={value}')
387
404
 
388
- value = int(value)
389
- elif field.type.equals(pa.int16()) or field.type.equals(pa.uint16()):
390
- is_signed = field.type.equals(pa.int16())
391
- literal_type = fb_int16_lit if is_signed else fb_uint16_lit
392
- literal_impl = LiteralImpl.Int16Literal if is_signed else LiteralImpl.UInt16Literal
405
+ impl_class.Start(self.builder)
406
+ impl_class.AddValue(self.builder, value)
407
+ buffer_value = impl_class.End(self.builder)
408
+ return impl_type, buffer_value
393
409
 
394
- field_type_type = Type.Int
395
- fb_int.Start(self.builder)
396
- fb_int.AddBitWidth(self.builder, field.type.bit_width)
397
- fb_int.AddIsSigned(self.builder, is_signed)
398
- field_type = fb_int.End(self.builder)
410
+ if pa.types.is_floating(pa_type):
411
+ impl_type, impl_class = None, None
412
+ value = float(value)
399
413
 
400
- value = int(value)
401
- elif field.type.equals(pa.int8()) or field.type.equals(pa.uint8()):
402
- is_signed = field.type.equals(pa.int8())
403
- literal_type = fb_int8_lit if is_signed else fb_uint8_lit
404
- literal_impl = LiteralImpl.Int8Literal if is_signed else LiteralImpl.UInt8Literal
414
+ if pa.types.is_float32(pa_type):
415
+ impl_type, impl_class = LiteralImpl.Float32Literal, fb_float32_lit
416
+ elif pa.types.is_float64(pa_type):
417
+ impl_type, impl_class = LiteralImpl.Float64Literal, fb_float64_lit
418
+ else:
419
+ # Float16 is not supported by Vast.
420
+ raise ValueError(f'unsupported floating point predicate type: {pa_type}, value={value}')
405
421
 
406
- field_type_type = Type.Int
407
- fb_int.Start(self.builder)
408
- fb_int.AddBitWidth(self.builder, field.type.bit_width)
409
- fb_int.AddIsSigned(self.builder, is_signed)
410
- field_type = fb_int.End(self.builder)
422
+ impl_class.Start(self.builder)
423
+ impl_class.AddValue(self.builder, value)
424
+ buffer_value = impl_class.End(self.builder)
425
+ return impl_type, buffer_value
411
426
 
412
- value = int(value)
413
- elif field.type.equals(pa.float32()):
414
- literal_type = fb_float32_lit
415
- literal_impl = LiteralImpl.Float32Literal
427
+ if pa_type.equals(pa.string()):
428
+ value = self.builder.CreateString(value)
416
429
 
417
- field_type_type = Type.FloatingPoint
418
- fb_floating_point.Start(self.builder)
419
- fb_floating_point.AddPrecision(self.builder, 1) # single
420
- field_type = fb_floating_point.End(self.builder)
430
+ fb_string_lit.Start(self.builder)
431
+ fb_string_lit.AddValue(self.builder, value)
432
+ buffer_value = fb_string_lit.End(self.builder)
433
+ return LiteralImpl.StringLiteral, buffer_value
421
434
 
422
- value = float(value)
423
- elif field.type.equals(pa.float64()):
424
- literal_type = fb_float64_lit
425
- literal_impl = LiteralImpl.Float64Literal
435
+ if pa_type.equals(pa.date32()): # pa.date64() is not supported
436
+ # Assuming units are in Days. Look at get_field_type for more details.
437
+ value, = pa.array([value], pa_type).cast(pa.int32()).to_pylist()
426
438
 
427
- field_type_type = Type.FloatingPoint
428
- fb_floating_point.Start(self.builder)
429
- fb_floating_point.AddPrecision(self.builder, 2) # double
430
- field_type = fb_floating_point.End(self.builder)
439
+ fb_date32_lit.Start(self.builder)
440
+ fb_date32_lit.AddValue(self.builder, value)
441
+ buffer_value = fb_date32_lit.End(self.builder)
442
+ return LiteralImpl.DateLiteral, buffer_value
431
443
 
432
- value = float(value)
433
- elif field.type.equals(pa.string()):
434
- literal_type = fb_string_lit
435
- literal_impl = LiteralImpl.StringLiteral
444
+ if pa.types.is_timestamp(pa_type):
445
+ value, = pa.array([value], pa_type).cast(pa.int64()).to_pylist()
436
446
 
437
- field_type_type = Type.Utf8
438
- fb_utf8.Start(self.builder)
439
- field_type = fb_utf8.End(self.builder)
447
+ fb_timestamp_lit.Start(self.builder)
448
+ fb_timestamp_lit.AddValue(self.builder, value)
449
+ buffer_value = fb_timestamp_lit.End(self.builder)
450
+ return LiteralImpl.TimestampLiteral, buffer_value
440
451
 
441
- value = self.builder.CreateString(value)
442
- elif field.type.equals(pa.date32()): # pa.date64() is not supported
443
- literal_type = fb_date32_lit
444
- literal_impl = LiteralImpl.DateLiteral
445
-
446
- field_type_type = Type.Date
447
- fb_date.Start(self.builder)
448
- fb_date.AddUnit(self.builder, DateUnit.DAY)
449
- field_type = fb_date.End(self.builder)
450
- value, = pa.array([value], field.type).cast(pa.int32()).to_pylist()
451
- elif isinstance(field.type, pa.TimestampType):
452
- literal_type = fb_timestamp_lit
453
- literal_impl = LiteralImpl.TimestampLiteral
454
-
455
- if field.type.equals(pa.timestamp('s')):
456
- unit = TimeUnit.SECOND
457
- if field.type.equals(pa.timestamp('ms')):
458
- unit = TimeUnit.MILLISECOND
459
- if field.type.equals(pa.timestamp('us')):
460
- unit = TimeUnit.MICROSECOND
461
- if field.type.equals(pa.timestamp('ns')):
462
- unit = TimeUnit.NANOSECOND
463
-
464
- field_type_type = Type.Timestamp
465
- fb_timestamp.Start(self.builder)
466
- fb_timestamp.AddUnit(self.builder, unit)
467
- field_type = fb_timestamp.End(self.builder)
468
- value, = pa.array([value], field.type).cast(pa.int64()).to_pylist()
469
- elif isinstance(field.type, (pa.Time32Type, pa.Time64Type)):
470
- literal_type = fb_time_lit
471
- literal_impl = LiteralImpl.TimeLiteral
472
-
473
- if field.type.equals(pa.time32('s')):
474
- target_type = pa.int32()
475
- unit = TimeUnit.SECOND
476
- if field.type.equals(pa.time32('ms')):
477
- target_type = pa.int32()
478
- unit = TimeUnit.MILLISECOND
479
- if field.type.equals(pa.time64('us')):
480
- target_type = pa.int64()
481
- unit = TimeUnit.MICROSECOND
482
- if field.type.equals(pa.time64('ns')):
483
- target_type = pa.int64()
484
- unit = TimeUnit.NANOSECOND
485
-
486
- field_type_type = Type.Time
487
- fb_time.Start(self.builder)
488
- fb_time.AddBitWidth(self.builder, field.type.bit_width)
489
- fb_time.AddUnit(self.builder, unit)
490
- field_type = fb_time.End(self.builder)
491
-
492
- value, = pa.array([value], field.type).cast(target_type).to_pylist()
493
- elif field.type.equals(pa.bool_()):
494
- literal_type = fb_bool_lit
495
- literal_impl = LiteralImpl.BooleanLiteral
496
-
497
- field_type_type = Type.Bool
498
- fb_bool.Start(self.builder)
499
- field_type = fb_bool.End(self.builder)
452
+ if pa.types.is_time(pa_type):
453
+ target_type = pa.int32() if pa.types.is_time32(pa_type) else pa.int64()
454
+ value, = pa.array([value], pa_type).cast(target_type).to_pylist()
500
455
 
456
+ fb_time_lit.Start(self.builder)
457
+ fb_time_lit.AddValue(self.builder, value)
458
+ buffer_value = fb_time_lit.End(self.builder)
459
+ return LiteralImpl.TimeLiteral, buffer_value
460
+
461
+ if pa_type.equals(pa.bool_()):
501
462
  # Handle both boolean values and string representations
502
463
  if isinstance(value, bool):
503
464
  value = value
@@ -505,46 +466,65 @@ class Predicate:
505
466
  value = value.lower() == 'true'
506
467
  else:
507
468
  value = bool(value)
508
- elif isinstance(field.type, pa.Decimal128Type):
509
- literal_type = fb_decimal_lit
510
- literal_impl = LiteralImpl.DecimalLiteral
511
-
512
- field_type_type = Type.Decimal
513
- fb_decimal.Start(self.builder)
514
- fb_decimal.AddPrecision(self.builder, field.type.precision)
515
- fb_decimal.AddScale(self.builder, field.type.scale)
516
- field_type = fb_decimal.End(self.builder)
517
- int_value = int(float(value) * 10 ** field.type.scale)
518
- binary_value = int_value.to_bytes(16, 'little')
519
469
 
470
+ fb_bool_lit.Start(self.builder)
471
+ fb_bool_lit.AddValue(self.builder, value)
472
+ buffer_value = fb_bool_lit.End(self.builder)
473
+ return LiteralImpl.BooleanLiteral, buffer_value
474
+
475
+ if pa.types.is_decimal128(pa_type):
476
+ int_value = int(float(value) * 10 ** pa_type.scale)
477
+ binary_value = int_value.to_bytes(16, 'little')
520
478
  value = self.builder.CreateByteVector(binary_value)
521
- elif field.type.equals(pa.binary()):
522
- literal_type = fb_binary_lit
523
- literal_impl = LiteralImpl.BinaryLiteral
524
479
 
525
- field_type_type = Type.Binary
526
- fb_binary.Start(self.builder)
527
- field_type = fb_binary.End(self.builder)
480
+ fb_decimal_lit.Start(self.builder)
481
+ fb_decimal_lit.AddValue(self.builder, value)
482
+ buffer_value = fb_decimal_lit.End(self.builder)
483
+ return LiteralImpl.DecimalLiteral, buffer_value
528
484
 
485
+ if pa_type.equals(pa.binary()):
529
486
  value = self.builder.CreateByteVector(value)
530
- else:
531
- raise ValueError(f'unsupported predicate for type={field.type}, value={value}')
532
487
 
533
- literal_type.Start(self.builder)
534
- literal_type.AddValue(self.builder, value)
535
- buffer_value = literal_type.End(self.builder)
488
+ fb_binary_lit.Start(self.builder)
489
+ fb_binary_lit.AddValue(self.builder, value)
490
+ buffer_value = fb_binary_lit.End(self.builder)
491
+ return LiteralImpl.BinaryLiteral, buffer_value
536
492
 
537
- fb_field.Start(self.builder)
538
- fb_field.AddTypeType(self.builder, field_type_type)
539
- fb_field.AddType(self.builder, field_type)
540
- buffer_field = fb_field.End(self.builder)
493
+ # pa.types.is_list is False for FixedSizeList which is important since parsing of FixedSizeList is not supported
494
+ # https://github.com/apache/arrow/blob/apache-arrow-7.0.0/cpp/src/arrow/compute/exec/ir_consumer.cc#L287
495
+ if pa.types.is_list(pa_type):
496
+ pa_type = cast(pa.FixedSizeListType, pa_type)
497
+
498
+ buffer_literals = []
499
+ for element in value:
500
+ buffer_literals.append(self.build_literal(pa_type.value_field, element))
501
+ fb_list_lit.StartValuesVector(self.builder, len(buffer_literals))
502
+ for offset in reversed(buffer_literals):
503
+ self.builder.PrependUOffsetTRelative(offset)
504
+ values_buffer = self.builder.EndVector()
505
+
506
+ fb_list_lit.Start(self.builder)
507
+ fb_list_lit.AddValues(self.builder, values_buffer)
508
+ buffer_value = fb_list_lit.End(self.builder)
509
+ return LiteralImpl.ListLiteral, buffer_value
510
+
511
+ raise ValueError(f'unsupported literal type={pa_type}, value={value}')
512
+
513
+ def build_literal(self, field: pa.Field, value) -> int:
514
+ literal_impl_type, literal_impl_buffer = self.build_literal_impl(field.type, value)
515
+
516
+ # Literal type should not contain name for more information
517
+ # https://github.com/apache/arrow/blob/apache-arrow-7.0.0/cpp/src/arrow/compute/exec/ir_consumer.cc#L326
518
+ field_buffer = build_field(self.builder, field, include_name=False)
541
519
 
542
520
  fb_literal.Start(self.builder)
543
- fb_literal.AddImplType(self.builder, literal_impl)
544
- fb_literal.AddImpl(self.builder, buffer_value)
545
- fb_literal.AddType(self.builder, buffer_field)
546
- buffer_literal = fb_literal.End(self.builder)
521
+ fb_literal.AddImplType(self.builder, literal_impl_type)
522
+ fb_literal.AddImpl(self.builder, literal_impl_buffer)
523
+ fb_literal.AddType(self.builder, field_buffer)
524
+ return fb_literal.End(self.builder)
547
525
 
526
+ def build_literal_expression(self, field: pa.Field, value) -> int:
527
+ buffer_literal = self.build_literal(field, value)
548
528
  fb_expression.Start(self.builder)
549
529
  fb_expression.AddImplType(self.builder, ExpressionImpl.Literal)
550
530
  fb_expression.AddImpl(self.builder, buffer_literal)
@@ -937,7 +917,7 @@ class VastdbApi:
937
917
  """Make sure that the connections closed."""
938
918
  self._session.close()
939
919
 
940
- def with_endpoint(self, endpoint):
920
+ def with_endpoint(self, endpoint) -> 'VastdbApi':
941
921
  """Open a new session for targeting a specific endpoint."""
942
922
  return VastdbApi(endpoint=endpoint,
943
923
  access_key=self.access_key,
@@ -1612,7 +1592,7 @@ class VastdbApi:
1612
1592
 
1613
1593
  return headers
1614
1594
 
1615
- def _build_query_data_url_params(self, projection, query_imports_table):
1595
+ def _build_query_data_url_params(self, projection: Optional[str], query_imports_table):
1616
1596
  if query_imports_table and projection:
1617
1597
  raise ValueError("Can't query both imports and projection table")
1618
1598
 
@@ -1624,8 +1604,8 @@ class VastdbApi:
1624
1604
  return url_params
1625
1605
 
1626
1606
  def query_data(self, bucket, schema, table, params, split=(0, 1, 8), num_sub_splits=1, response_row_id=False,
1627
- txid=0, client_tags=[], expected_retvals=[], limit_rows=0, schedule_id=None, retry_count=0,
1628
- search_path=None, sub_split_start_row_ids=[], tenant_guid=None, projection='', enable_sorted_projections=True,
1607
+ txid: Optional[int] = 0, client_tags=[], expected_retvals=[], limit_rows=0, schedule_id=None, retry_count=0,
1608
+ search_path=None, sub_split_start_row_ids=[], tenant_guid=None, projection: Optional[str] = '', enable_sorted_projections=True,
1629
1609
  request_format='string', response_format='string', query_imports_table=False):
1630
1610
  """
1631
1611
  GET /mybucket/myschema/mytable?data HTTP/1.1
@@ -2305,16 +2285,16 @@ def get_field_type(builder: flatbuffers.Builder, field: pa.Field):
2305
2285
  return field_type, field_type_type
2306
2286
 
2307
2287
 
2308
- def build_field(builder: flatbuffers.Builder, f: pa.Field, name: str):
2288
+ def build_field(builder: flatbuffers.Builder, f: pa.Field, include_name=True):
2309
2289
  children = None
2310
2290
  if isinstance(f.type, pa.StructType):
2311
- children = [build_field(builder, child, child.name) for child in list(f.type)]
2291
+ children = [build_field(builder, child, include_name) for child in list(f.type)]
2312
2292
  if pa.types.is_list(f.type) or pa.types.is_fixed_size_list(f.type):
2313
- children = [build_field(builder, f.type.value_field, "item")]
2293
+ children = [build_field(builder, f.type.value_field.with_name("item"), include_name)]
2314
2294
  if isinstance(f.type, pa.MapType):
2315
2295
  children = [
2316
- build_field(builder, f.type.key_field, "key"),
2317
- build_field(builder, f.type.item_field, "value"),
2296
+ build_field(builder, f.type.key_field.with_name("key"), include_name),
2297
+ build_field(builder, f.type.item_field.with_name("value"), include_name),
2318
2298
  ]
2319
2299
 
2320
2300
  # adding "entries" column:
@@ -2340,10 +2320,15 @@ def build_field(builder: flatbuffers.Builder, f: pa.Field, name: str):
2340
2320
  builder.PrependUOffsetTRelative(offset)
2341
2321
  children = builder.EndVector()
2342
2322
 
2343
- col_name = builder.CreateString(name)
2323
+ col_name = None
2324
+ if include_name:
2325
+ col_name = builder.CreateString(f.name)
2326
+
2344
2327
  field_type, field_type_type = get_field_type(builder, f)
2345
2328
  fb_field.Start(builder)
2346
- fb_field.AddName(builder, col_name)
2329
+ if col_name is not None:
2330
+ fb_field.AddName(builder, col_name)
2331
+ fb_field.AddNullable(builder, f.nullable)
2347
2332
  fb_field.AddTypeType(builder, field_type_type)
2348
2333
  fb_field.AddType(builder, field_type)
2349
2334
  if children is not None:
@@ -2358,19 +2343,21 @@ class QueryDataRequest:
2358
2343
  self.response_parser = response_parser
2359
2344
 
2360
2345
 
2361
- def get_response_schema(schema: 'pa.Schema' = pa.schema([]), field_names: Optional[List[str]] = None):
2346
+ def get_response_schema(schema: 'pa.Schema' = pa.schema([]), field_names: Optional[List[str]] = None) -> pa.Schema:
2362
2347
  if field_names is None:
2363
2348
  field_names = [field.name for field in schema]
2364
2349
 
2365
2350
  return pa.schema([schema.field(name) for name in field_names])
2366
2351
 
2367
2352
 
2368
- def build_query_data_request(schema: 'pa.Schema' = pa.schema([]), predicate: ibis.expr.types.BooleanColumn = None, field_names: Optional[List[str]] = None):
2353
+ def build_query_data_request(schema: 'pa.Schema' = pa.schema([]),
2354
+ predicate: ibis.expr.types.BooleanColumn = None,
2355
+ field_names: Optional[List[str]] = None) -> QueryDataRequest:
2369
2356
  builder = flatbuffers.Builder(1024)
2370
2357
 
2371
2358
  source_name = builder.CreateString('') # required
2372
2359
 
2373
- fields = [build_field(builder, f, f.name) for f in schema]
2360
+ fields = [build_field(builder, f) for f in schema]
2374
2361
 
2375
2362
  fb_schema.StartFieldsVector(builder, len(fields))
2376
2363
  for offset in reversed(fields):
@@ -0,0 +1,136 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import TYPE_CHECKING, Iterable, Optional, Union
3
+
4
+ import ibis
5
+ import pyarrow as pa
6
+
7
+ from .config import ImportConfig, QueryConfig
8
+ from .table_metadata import TableRef
9
+
10
+ if TYPE_CHECKING:
11
+ from .table import Projection
12
+
13
+
14
+ class ITable(ABC):
15
+ """Interface for VAST Table operations."""
16
+
17
+ @property
18
+ @abstractmethod
19
+ def ref(self) -> TableRef:
20
+ """Return Table Ref."""
21
+ pass
22
+
23
+ @abstractmethod
24
+ def __eq__(self, other: object) -> bool:
25
+ """Table __eq__."""
26
+ pass
27
+
28
+ @property
29
+ @abstractmethod
30
+ def name(self) -> str:
31
+ """Table name."""
32
+ pass
33
+
34
+ @property
35
+ @abstractmethod
36
+ def arrow_schema(self) -> pa.Schema:
37
+ """Table arrow schema."""
38
+ pass
39
+
40
+ @property
41
+ @abstractmethod
42
+ def path(self) -> str:
43
+ """Return table's path."""
44
+ pass
45
+
46
+ @abstractmethod
47
+ def sorted_columns(self) -> list[str]:
48
+ """Return sorted columns' names."""
49
+ pass
50
+
51
+ @abstractmethod
52
+ def projection(self, name: str) -> "Projection":
53
+ """Get a specific semi-sorted projection of this table."""
54
+ pass
55
+
56
+ @abstractmethod
57
+ def projections(self, projection_name: str = "") -> Iterable["Projection"]:
58
+ """List semi-sorted projections."""
59
+ pass
60
+
61
+ @abstractmethod
62
+ def import_files(self, files_to_import: Iterable[str], config: Optional[ImportConfig] = None) -> None:
63
+ """Import files into table."""
64
+ pass
65
+
66
+ @abstractmethod
67
+ def import_partitioned_files(self, files_and_partitions: dict[str, pa.RecordBatch], config: Optional[ImportConfig] = None) -> None:
68
+ """Import partitioned files."""
69
+ pass
70
+
71
+ @abstractmethod
72
+ def select(self,
73
+ columns: Optional[list[str]] = None,
74
+ predicate: Union[ibis.expr.types.BooleanColumn,
75
+ ibis.common.deferred.Deferred] = None,
76
+ config: Optional[QueryConfig] = None,
77
+ *,
78
+ internal_row_id: bool = False,
79
+ limit_rows: Optional[int] = None) -> pa.RecordBatchReader:
80
+ """Execute a query."""
81
+ pass
82
+
83
+ @abstractmethod
84
+ def insert(self, rows: Union[pa.RecordBatch, pa.Table]) -> pa.ChunkedArray:
85
+ """Insert rows into table."""
86
+ pass
87
+
88
+ @abstractmethod
89
+ def update(self,
90
+ rows: Union[pa.RecordBatch, pa.Table],
91
+ columns: Optional[list[str]] = None) -> None:
92
+ """Update rows in table."""
93
+ pass
94
+
95
+ @abstractmethod
96
+ def delete(self, rows: Union[pa.RecordBatch, pa.Table]) -> None:
97
+ """Delete rows from table."""
98
+ pass
99
+
100
+ @abstractmethod
101
+ def imports_table(self) -> Optional["ITable"]:
102
+ """Get imports table."""
103
+ pass
104
+
105
+ @abstractmethod
106
+ def sorting_done(self) -> bool:
107
+ """Check if sorting is done."""
108
+ pass
109
+
110
+ @abstractmethod
111
+ def sorting_score(self) -> int:
112
+ """Get sorting score."""
113
+ pass
114
+
115
+ @abstractmethod
116
+ def reload_schema(self) -> None:
117
+ """Reload Arrow Schema."""
118
+ pass
119
+
120
+ @abstractmethod
121
+ def reload_stats(self) -> None:
122
+ """Reload Table Stats."""
123
+ pass
124
+
125
+ @abstractmethod
126
+ def reload_sorted_columns(self) -> None:
127
+ """Reload Sorted Columns."""
128
+ pass
129
+
130
+ @abstractmethod
131
+ def __getitem__(self, col_name: str) -> ibis.Column:
132
+ """Allow constructing ibis-like column expressions from this table.
133
+
134
+ It is useful for constructing expressions for predicate pushdown in `ITable.select()` method.
135
+ """
136
+ pass
@@ -113,7 +113,7 @@ def calculate_aggregate_stats(
113
113
  )
114
114
  agg_df["duration_sec"] = (
115
115
  r_df.groupby(group_flds)
116
- .apply(calc_total_time_coverage_seconds, include_groups=False)
116
+ .apply(calc_total_time_coverage_seconds, include_groups=False) # type: ignore
117
117
  .sort_index()
118
118
  )
119
119
  agg_df["M_rows_per_sec"] = (agg_df["n_rows"] / agg_df["duration_sec"] / 1e6).astype(