vastdb 1.4.0__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vastdb/_ibis_support.py +28 -0
- vastdb/_internal.py +167 -180
- vastdb/_table_interface.py +136 -0
- vastdb/bench/perf_bench/orchestrate/results_helpers.py +1 -1
- vastdb/bucket.py +1 -1
- vastdb/conftest.py +42 -19
- vastdb/schema.py +15 -3
- vastdb/session.py +3 -1
- vastdb/table.py +595 -340
- vastdb/table_metadata.py +221 -0
- vastdb/tests/test_duckdb.py +30 -30
- vastdb/tests/test_fixed_list.py +56 -6
- vastdb/tests/test_imports.py +2 -1
- vastdb/tests/test_nested.py +0 -5
- vastdb/tests/test_table_in_tx.py +249 -0
- vastdb/tests/test_tables.py +57 -11
- vastdb/tests/util.py +98 -1
- vastdb/transaction.py +27 -0
- {vastdb-1.4.0.dist-info → vastdb-2.0.0.dist-info}/METADATA +21 -6
- {vastdb-1.4.0.dist-info → vastdb-2.0.0.dist-info}/RECORD +23 -19
- {vastdb-1.4.0.dist-info → vastdb-2.0.0.dist-info}/WHEEL +1 -1
- {vastdb-1.4.0.dist-info → vastdb-2.0.0.dist-info/licenses}/LICENSE +0 -0
- {vastdb-1.4.0.dist-info → vastdb-2.0.0.dist-info}/top_level.txt +0 -0
vastdb/_ibis_support.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import ibis
|
|
2
|
+
import pyarrow as pa
|
|
3
|
+
from ibis.expr.types.structs import IbisError
|
|
4
|
+
|
|
5
|
+
from vastdb import errors
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def validate_ibis_support_schema(arrow_schema: pa.Schema):
|
|
9
|
+
"""Validate that the provided Arrow schema is compatible with Ibis.
|
|
10
|
+
|
|
11
|
+
Raises NotSupportedSchema if the schema contains unsupported fields.
|
|
12
|
+
"""
|
|
13
|
+
unsupported_fields = []
|
|
14
|
+
first_exception = None
|
|
15
|
+
for f in arrow_schema:
|
|
16
|
+
try:
|
|
17
|
+
ibis.Schema.from_pyarrow(pa.schema([f]))
|
|
18
|
+
except (IbisError, ValueError, KeyError) as e:
|
|
19
|
+
if first_exception is None:
|
|
20
|
+
first_exception = e
|
|
21
|
+
unsupported_fields.append(f)
|
|
22
|
+
|
|
23
|
+
if unsupported_fields:
|
|
24
|
+
raise errors.NotSupportedSchema(
|
|
25
|
+
message=f"Ibis does not support the schema {unsupported_fields=}",
|
|
26
|
+
schema=arrow_schema,
|
|
27
|
+
cause=first_exception
|
|
28
|
+
)
|
vastdb/_internal.py
CHANGED
|
@@ -7,7 +7,7 @@ import time
|
|
|
7
7
|
import urllib.parse
|
|
8
8
|
from collections import defaultdict, namedtuple
|
|
9
9
|
from enum import Enum
|
|
10
|
-
from typing import Any, Dict, Iterator, List, Optional, Tuple, Union
|
|
10
|
+
from typing import Any, Dict, Iterator, List, Optional, Tuple, Union, cast
|
|
11
11
|
|
|
12
12
|
import backoff
|
|
13
13
|
import flatbuffers
|
|
@@ -52,6 +52,7 @@ import vastdb.vast_flatbuf.org.apache.arrow.computeir.flatbuf.Int8Literal as fb_
|
|
|
52
52
|
import vastdb.vast_flatbuf.org.apache.arrow.computeir.flatbuf.Int16Literal as fb_int16_lit
|
|
53
53
|
import vastdb.vast_flatbuf.org.apache.arrow.computeir.flatbuf.Int32Literal as fb_int32_lit
|
|
54
54
|
import vastdb.vast_flatbuf.org.apache.arrow.computeir.flatbuf.Int64Literal as fb_int64_lit
|
|
55
|
+
import vastdb.vast_flatbuf.org.apache.arrow.computeir.flatbuf.ListLiteral as fb_list_lit
|
|
55
56
|
import vastdb.vast_flatbuf.org.apache.arrow.computeir.flatbuf.Literal as fb_literal
|
|
56
57
|
import vastdb.vast_flatbuf.org.apache.arrow.computeir.flatbuf.Relation as fb_relation
|
|
57
58
|
import vastdb.vast_flatbuf.org.apache.arrow.computeir.flatbuf.RelationImpl as rel_impl
|
|
@@ -262,20 +263,32 @@ class Predicate:
|
|
|
262
263
|
node = nodes_map[name]
|
|
263
264
|
nodes_map = node.children_map
|
|
264
265
|
|
|
265
|
-
|
|
266
|
+
literal_field = node.field
|
|
267
|
+
literal_column_index = node.index
|
|
266
268
|
if node.children:
|
|
267
|
-
|
|
268
|
-
|
|
269
|
+
# Support fixed size list of a single flat child.
|
|
270
|
+
if pa.types.is_fixed_size_list(node.type) and len(node.children) == 1 and not node.children[
|
|
271
|
+
0].children:
|
|
272
|
+
# Similar to projection, the index of the column should be the leaf which is the child's.
|
|
273
|
+
literal_column_index = node.children[0].index
|
|
274
|
+
# Set the literal type to be a list rather than fixed list since fixed list is not supported.
|
|
275
|
+
# https://github.com/apache/arrow/blob/apache-arrow-7.0.0/cpp/src/arrow/compute/exec/ir_consumer.cc#L287
|
|
276
|
+
literal_field = node.field.with_type(pa.list_(node.field.type.value_field))
|
|
277
|
+
else:
|
|
278
|
+
# TODO: support predicate pushdown for leaf nodes (ORION-160338)
|
|
279
|
+
raise NotImplementedError(node.field) # no predicate pushdown for nested columns
|
|
280
|
+
|
|
281
|
+
column_offset = self.build_column(position=literal_column_index)
|
|
269
282
|
for literal in literals:
|
|
270
283
|
args_offsets = [column_offset]
|
|
271
284
|
if literal is not None:
|
|
272
|
-
args_offsets.append(self.
|
|
285
|
+
args_offsets.append(self.build_literal_expression(field=literal_field, value=literal.value))
|
|
273
286
|
if builder_func == self.build_between:
|
|
274
|
-
args_offsets.append(self.
|
|
275
|
-
args_offsets.append(self.
|
|
287
|
+
args_offsets.append(self.build_literal_expression(field=literal_field, value=lower.value))
|
|
288
|
+
args_offsets.append(self.build_literal_expression(field=literal_field, value=upper.value))
|
|
276
289
|
if builder_func == self.build_starts_with:
|
|
277
|
-
args_offsets.append(self.
|
|
278
|
-
args_offsets.append(self.
|
|
290
|
+
args_offsets.append(self.build_literal_expression(field=literal_field, value=lower_bytes))
|
|
291
|
+
args_offsets.append(self.build_literal_expression(field=literal_field, value=upper_bytes))
|
|
279
292
|
|
|
280
293
|
inner_offsets.append(builder_func(*args_offsets))
|
|
281
294
|
|
|
@@ -326,14 +339,14 @@ class Predicate:
|
|
|
326
339
|
if isinstance(filter_by_name, tuple) and len(filter_by_name) == 1:
|
|
327
340
|
op, value = self.rule_to_operator(filter_by_name[0])
|
|
328
341
|
if value:
|
|
329
|
-
literal = self.
|
|
342
|
+
literal = self.build_literal_expression(field=field, value=value)
|
|
330
343
|
return op(column, literal)
|
|
331
344
|
return op(column) # is_null or is_not_null operation
|
|
332
345
|
|
|
333
346
|
rules = []
|
|
334
347
|
for rule in filter_by_name:
|
|
335
348
|
op, value = self.rule_to_operator(rule)
|
|
336
|
-
literal = self.
|
|
349
|
+
literal = self.build_literal_expression(field=field, value=value)
|
|
337
350
|
rules.append(op(column, literal))
|
|
338
351
|
|
|
339
352
|
return self.build_and(rules)
|
|
@@ -359,145 +372,93 @@ class Predicate:
|
|
|
359
372
|
# see https://github.com/apache/arrow/blob/main/format/Schema.fbs
|
|
360
373
|
# https://github.com/apache/arrow/blob/apache-arrow-7.0.0/experimental/computeir/Expression.fbs
|
|
361
374
|
# https://github.com/apache/arrow/blob/apache-arrow-7.0.0/experimental/computeir/Literal.fbs
|
|
362
|
-
def
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
fb_int.Start(self.builder)
|
|
372
|
-
fb_int.AddBitWidth(self.builder, field.type.bit_width)
|
|
373
|
-
fb_int.AddIsSigned(self.builder, is_signed)
|
|
374
|
-
field_type = fb_int.End(self.builder)
|
|
375
|
-
|
|
375
|
+
def build_literal_impl(self, pa_type: pa.DataType, value) -> Tuple[int, int]:
|
|
376
|
+
'''
|
|
377
|
+
Builds a LiteralImpl for the given Arrow type and value.
|
|
378
|
+
:param pa_type: Literal type as defined in Arrow.
|
|
379
|
+
:param value: Value to be used in the LiteralImpl.
|
|
380
|
+
:return: Tuple[LiteralImpl, buffer_value]
|
|
381
|
+
'''
|
|
382
|
+
if pa.types.is_integer(pa_type):
|
|
383
|
+
impl_type, impl_class = None, None
|
|
376
384
|
value = int(value)
|
|
377
|
-
elif field.type.equals(pa.int32()) or field.type.equals(pa.uint32()):
|
|
378
|
-
is_signed = field.type.equals(pa.int32())
|
|
379
|
-
literal_type = fb_int32_lit if is_signed else fb_uint32_lit
|
|
380
|
-
literal_impl = LiteralImpl.Int32Literal if is_signed else LiteralImpl.UInt32Literal
|
|
381
385
|
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
386
|
+
if pa.types.is_int8(pa_type):
|
|
387
|
+
impl_type, impl_class = LiteralImpl.Int8Literal, fb_int8_lit
|
|
388
|
+
elif pa.types.is_uint8(pa_type):
|
|
389
|
+
impl_type, impl_class = LiteralImpl.UInt8Literal, fb_uint8_lit
|
|
390
|
+
elif pa.types.is_int16(pa_type):
|
|
391
|
+
impl_type, impl_class = LiteralImpl.Int16Literal, fb_int16_lit
|
|
392
|
+
elif pa.types.is_uint16(pa_type):
|
|
393
|
+
impl_type, impl_class = LiteralImpl.UInt16Literal, fb_uint16_lit
|
|
394
|
+
elif pa.types.is_int32(pa_type):
|
|
395
|
+
impl_type, impl_class = LiteralImpl.Int32Literal, fb_int32_lit
|
|
396
|
+
elif pa.types.is_uint32(pa_type):
|
|
397
|
+
impl_type, impl_class = LiteralImpl.UInt32Literal, fb_uint32_lit
|
|
398
|
+
elif pa.types.is_int64(pa_type):
|
|
399
|
+
impl_type, impl_class = LiteralImpl.Int64Literal, fb_int64_lit
|
|
400
|
+
elif pa.types.is_uint64(pa_type):
|
|
401
|
+
impl_type, impl_class = LiteralImpl.UInt64Literal, fb_uint64_lit
|
|
402
|
+
else:
|
|
403
|
+
raise ValueError(f'unsupported integer predicate type: {pa_type}, value={value}')
|
|
387
404
|
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
literal_impl = LiteralImpl.Int16Literal if is_signed else LiteralImpl.UInt16Literal
|
|
405
|
+
impl_class.Start(self.builder)
|
|
406
|
+
impl_class.AddValue(self.builder, value)
|
|
407
|
+
buffer_value = impl_class.End(self.builder)
|
|
408
|
+
return impl_type, buffer_value
|
|
393
409
|
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
fb_int.AddIsSigned(self.builder, is_signed)
|
|
398
|
-
field_type = fb_int.End(self.builder)
|
|
410
|
+
if pa.types.is_floating(pa_type):
|
|
411
|
+
impl_type, impl_class = None, None
|
|
412
|
+
value = float(value)
|
|
399
413
|
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
414
|
+
if pa.types.is_float32(pa_type):
|
|
415
|
+
impl_type, impl_class = LiteralImpl.Float32Literal, fb_float32_lit
|
|
416
|
+
elif pa.types.is_float64(pa_type):
|
|
417
|
+
impl_type, impl_class = LiteralImpl.Float64Literal, fb_float64_lit
|
|
418
|
+
else:
|
|
419
|
+
# Float16 is not supported by Vast.
|
|
420
|
+
raise ValueError(f'unsupported floating point predicate type: {pa_type}, value={value}')
|
|
405
421
|
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
field_type = fb_int.End(self.builder)
|
|
422
|
+
impl_class.Start(self.builder)
|
|
423
|
+
impl_class.AddValue(self.builder, value)
|
|
424
|
+
buffer_value = impl_class.End(self.builder)
|
|
425
|
+
return impl_type, buffer_value
|
|
411
426
|
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
literal_type = fb_float32_lit
|
|
415
|
-
literal_impl = LiteralImpl.Float32Literal
|
|
427
|
+
if pa_type.equals(pa.string()):
|
|
428
|
+
value = self.builder.CreateString(value)
|
|
416
429
|
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
430
|
+
fb_string_lit.Start(self.builder)
|
|
431
|
+
fb_string_lit.AddValue(self.builder, value)
|
|
432
|
+
buffer_value = fb_string_lit.End(self.builder)
|
|
433
|
+
return LiteralImpl.StringLiteral, buffer_value
|
|
421
434
|
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
literal_impl = LiteralImpl.Float64Literal
|
|
435
|
+
if pa_type.equals(pa.date32()): # pa.date64() is not supported
|
|
436
|
+
# Assuming units are in Days. Look at get_field_type for more details.
|
|
437
|
+
value, = pa.array([value], pa_type).cast(pa.int32()).to_pylist()
|
|
426
438
|
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
439
|
+
fb_date32_lit.Start(self.builder)
|
|
440
|
+
fb_date32_lit.AddValue(self.builder, value)
|
|
441
|
+
buffer_value = fb_date32_lit.End(self.builder)
|
|
442
|
+
return LiteralImpl.DateLiteral, buffer_value
|
|
431
443
|
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
literal_type = fb_string_lit
|
|
435
|
-
literal_impl = LiteralImpl.StringLiteral
|
|
444
|
+
if pa.types.is_timestamp(pa_type):
|
|
445
|
+
value, = pa.array([value], pa_type).cast(pa.int64()).to_pylist()
|
|
436
446
|
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
447
|
+
fb_timestamp_lit.Start(self.builder)
|
|
448
|
+
fb_timestamp_lit.AddValue(self.builder, value)
|
|
449
|
+
buffer_value = fb_timestamp_lit.End(self.builder)
|
|
450
|
+
return LiteralImpl.TimestampLiteral, buffer_value
|
|
440
451
|
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
literal_impl = LiteralImpl.DateLiteral
|
|
445
|
-
|
|
446
|
-
field_type_type = Type.Date
|
|
447
|
-
fb_date.Start(self.builder)
|
|
448
|
-
fb_date.AddUnit(self.builder, DateUnit.DAY)
|
|
449
|
-
field_type = fb_date.End(self.builder)
|
|
450
|
-
value, = pa.array([value], field.type).cast(pa.int32()).to_pylist()
|
|
451
|
-
elif isinstance(field.type, pa.TimestampType):
|
|
452
|
-
literal_type = fb_timestamp_lit
|
|
453
|
-
literal_impl = LiteralImpl.TimestampLiteral
|
|
454
|
-
|
|
455
|
-
if field.type.equals(pa.timestamp('s')):
|
|
456
|
-
unit = TimeUnit.SECOND
|
|
457
|
-
if field.type.equals(pa.timestamp('ms')):
|
|
458
|
-
unit = TimeUnit.MILLISECOND
|
|
459
|
-
if field.type.equals(pa.timestamp('us')):
|
|
460
|
-
unit = TimeUnit.MICROSECOND
|
|
461
|
-
if field.type.equals(pa.timestamp('ns')):
|
|
462
|
-
unit = TimeUnit.NANOSECOND
|
|
463
|
-
|
|
464
|
-
field_type_type = Type.Timestamp
|
|
465
|
-
fb_timestamp.Start(self.builder)
|
|
466
|
-
fb_timestamp.AddUnit(self.builder, unit)
|
|
467
|
-
field_type = fb_timestamp.End(self.builder)
|
|
468
|
-
value, = pa.array([value], field.type).cast(pa.int64()).to_pylist()
|
|
469
|
-
elif isinstance(field.type, (pa.Time32Type, pa.Time64Type)):
|
|
470
|
-
literal_type = fb_time_lit
|
|
471
|
-
literal_impl = LiteralImpl.TimeLiteral
|
|
472
|
-
|
|
473
|
-
if field.type.equals(pa.time32('s')):
|
|
474
|
-
target_type = pa.int32()
|
|
475
|
-
unit = TimeUnit.SECOND
|
|
476
|
-
if field.type.equals(pa.time32('ms')):
|
|
477
|
-
target_type = pa.int32()
|
|
478
|
-
unit = TimeUnit.MILLISECOND
|
|
479
|
-
if field.type.equals(pa.time64('us')):
|
|
480
|
-
target_type = pa.int64()
|
|
481
|
-
unit = TimeUnit.MICROSECOND
|
|
482
|
-
if field.type.equals(pa.time64('ns')):
|
|
483
|
-
target_type = pa.int64()
|
|
484
|
-
unit = TimeUnit.NANOSECOND
|
|
485
|
-
|
|
486
|
-
field_type_type = Type.Time
|
|
487
|
-
fb_time.Start(self.builder)
|
|
488
|
-
fb_time.AddBitWidth(self.builder, field.type.bit_width)
|
|
489
|
-
fb_time.AddUnit(self.builder, unit)
|
|
490
|
-
field_type = fb_time.End(self.builder)
|
|
491
|
-
|
|
492
|
-
value, = pa.array([value], field.type).cast(target_type).to_pylist()
|
|
493
|
-
elif field.type.equals(pa.bool_()):
|
|
494
|
-
literal_type = fb_bool_lit
|
|
495
|
-
literal_impl = LiteralImpl.BooleanLiteral
|
|
496
|
-
|
|
497
|
-
field_type_type = Type.Bool
|
|
498
|
-
fb_bool.Start(self.builder)
|
|
499
|
-
field_type = fb_bool.End(self.builder)
|
|
452
|
+
if pa.types.is_time(pa_type):
|
|
453
|
+
target_type = pa.int32() if pa.types.is_time32(pa_type) else pa.int64()
|
|
454
|
+
value, = pa.array([value], pa_type).cast(target_type).to_pylist()
|
|
500
455
|
|
|
456
|
+
fb_time_lit.Start(self.builder)
|
|
457
|
+
fb_time_lit.AddValue(self.builder, value)
|
|
458
|
+
buffer_value = fb_time_lit.End(self.builder)
|
|
459
|
+
return LiteralImpl.TimeLiteral, buffer_value
|
|
460
|
+
|
|
461
|
+
if pa_type.equals(pa.bool_()):
|
|
501
462
|
# Handle both boolean values and string representations
|
|
502
463
|
if isinstance(value, bool):
|
|
503
464
|
value = value
|
|
@@ -505,46 +466,65 @@ class Predicate:
|
|
|
505
466
|
value = value.lower() == 'true'
|
|
506
467
|
else:
|
|
507
468
|
value = bool(value)
|
|
508
|
-
elif isinstance(field.type, pa.Decimal128Type):
|
|
509
|
-
literal_type = fb_decimal_lit
|
|
510
|
-
literal_impl = LiteralImpl.DecimalLiteral
|
|
511
|
-
|
|
512
|
-
field_type_type = Type.Decimal
|
|
513
|
-
fb_decimal.Start(self.builder)
|
|
514
|
-
fb_decimal.AddPrecision(self.builder, field.type.precision)
|
|
515
|
-
fb_decimal.AddScale(self.builder, field.type.scale)
|
|
516
|
-
field_type = fb_decimal.End(self.builder)
|
|
517
|
-
int_value = int(float(value) * 10 ** field.type.scale)
|
|
518
|
-
binary_value = int_value.to_bytes(16, 'little')
|
|
519
469
|
|
|
470
|
+
fb_bool_lit.Start(self.builder)
|
|
471
|
+
fb_bool_lit.AddValue(self.builder, value)
|
|
472
|
+
buffer_value = fb_bool_lit.End(self.builder)
|
|
473
|
+
return LiteralImpl.BooleanLiteral, buffer_value
|
|
474
|
+
|
|
475
|
+
if pa.types.is_decimal128(pa_type):
|
|
476
|
+
int_value = int(float(value) * 10 ** pa_type.scale)
|
|
477
|
+
binary_value = int_value.to_bytes(16, 'little')
|
|
520
478
|
value = self.builder.CreateByteVector(binary_value)
|
|
521
|
-
elif field.type.equals(pa.binary()):
|
|
522
|
-
literal_type = fb_binary_lit
|
|
523
|
-
literal_impl = LiteralImpl.BinaryLiteral
|
|
524
479
|
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
480
|
+
fb_decimal_lit.Start(self.builder)
|
|
481
|
+
fb_decimal_lit.AddValue(self.builder, value)
|
|
482
|
+
buffer_value = fb_decimal_lit.End(self.builder)
|
|
483
|
+
return LiteralImpl.DecimalLiteral, buffer_value
|
|
528
484
|
|
|
485
|
+
if pa_type.equals(pa.binary()):
|
|
529
486
|
value = self.builder.CreateByteVector(value)
|
|
530
|
-
else:
|
|
531
|
-
raise ValueError(f'unsupported predicate for type={field.type}, value={value}')
|
|
532
487
|
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
488
|
+
fb_binary_lit.Start(self.builder)
|
|
489
|
+
fb_binary_lit.AddValue(self.builder, value)
|
|
490
|
+
buffer_value = fb_binary_lit.End(self.builder)
|
|
491
|
+
return LiteralImpl.BinaryLiteral, buffer_value
|
|
536
492
|
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
493
|
+
# pa.types.is_list is False for FixedSizeList which is important since parsing of FixedSizeList is not supported
|
|
494
|
+
# https://github.com/apache/arrow/blob/apache-arrow-7.0.0/cpp/src/arrow/compute/exec/ir_consumer.cc#L287
|
|
495
|
+
if pa.types.is_list(pa_type):
|
|
496
|
+
pa_type = cast(pa.FixedSizeListType, pa_type)
|
|
497
|
+
|
|
498
|
+
buffer_literals = []
|
|
499
|
+
for element in value:
|
|
500
|
+
buffer_literals.append(self.build_literal(pa_type.value_field, element))
|
|
501
|
+
fb_list_lit.StartValuesVector(self.builder, len(buffer_literals))
|
|
502
|
+
for offset in reversed(buffer_literals):
|
|
503
|
+
self.builder.PrependUOffsetTRelative(offset)
|
|
504
|
+
values_buffer = self.builder.EndVector()
|
|
505
|
+
|
|
506
|
+
fb_list_lit.Start(self.builder)
|
|
507
|
+
fb_list_lit.AddValues(self.builder, values_buffer)
|
|
508
|
+
buffer_value = fb_list_lit.End(self.builder)
|
|
509
|
+
return LiteralImpl.ListLiteral, buffer_value
|
|
510
|
+
|
|
511
|
+
raise ValueError(f'unsupported literal type={pa_type}, value={value}')
|
|
512
|
+
|
|
513
|
+
def build_literal(self, field: pa.Field, value) -> int:
|
|
514
|
+
literal_impl_type, literal_impl_buffer = self.build_literal_impl(field.type, value)
|
|
515
|
+
|
|
516
|
+
# Literal type should not contain name for more information
|
|
517
|
+
# https://github.com/apache/arrow/blob/apache-arrow-7.0.0/cpp/src/arrow/compute/exec/ir_consumer.cc#L326
|
|
518
|
+
field_buffer = build_field(self.builder, field, include_name=False)
|
|
541
519
|
|
|
542
520
|
fb_literal.Start(self.builder)
|
|
543
|
-
fb_literal.AddImplType(self.builder,
|
|
544
|
-
fb_literal.AddImpl(self.builder,
|
|
545
|
-
fb_literal.AddType(self.builder,
|
|
546
|
-
|
|
521
|
+
fb_literal.AddImplType(self.builder, literal_impl_type)
|
|
522
|
+
fb_literal.AddImpl(self.builder, literal_impl_buffer)
|
|
523
|
+
fb_literal.AddType(self.builder, field_buffer)
|
|
524
|
+
return fb_literal.End(self.builder)
|
|
547
525
|
|
|
526
|
+
def build_literal_expression(self, field: pa.Field, value) -> int:
|
|
527
|
+
buffer_literal = self.build_literal(field, value)
|
|
548
528
|
fb_expression.Start(self.builder)
|
|
549
529
|
fb_expression.AddImplType(self.builder, ExpressionImpl.Literal)
|
|
550
530
|
fb_expression.AddImpl(self.builder, buffer_literal)
|
|
@@ -937,7 +917,7 @@ class VastdbApi:
|
|
|
937
917
|
"""Make sure that the connections closed."""
|
|
938
918
|
self._session.close()
|
|
939
919
|
|
|
940
|
-
def with_endpoint(self, endpoint):
|
|
920
|
+
def with_endpoint(self, endpoint) -> 'VastdbApi':
|
|
941
921
|
"""Open a new session for targeting a specific endpoint."""
|
|
942
922
|
return VastdbApi(endpoint=endpoint,
|
|
943
923
|
access_key=self.access_key,
|
|
@@ -1612,7 +1592,7 @@ class VastdbApi:
|
|
|
1612
1592
|
|
|
1613
1593
|
return headers
|
|
1614
1594
|
|
|
1615
|
-
def _build_query_data_url_params(self, projection, query_imports_table):
|
|
1595
|
+
def _build_query_data_url_params(self, projection: Optional[str], query_imports_table):
|
|
1616
1596
|
if query_imports_table and projection:
|
|
1617
1597
|
raise ValueError("Can't query both imports and projection table")
|
|
1618
1598
|
|
|
@@ -1624,8 +1604,8 @@ class VastdbApi:
|
|
|
1624
1604
|
return url_params
|
|
1625
1605
|
|
|
1626
1606
|
def query_data(self, bucket, schema, table, params, split=(0, 1, 8), num_sub_splits=1, response_row_id=False,
|
|
1627
|
-
txid=0, client_tags=[], expected_retvals=[], limit_rows=0, schedule_id=None, retry_count=0,
|
|
1628
|
-
search_path=None, sub_split_start_row_ids=[], tenant_guid=None, projection='', enable_sorted_projections=True,
|
|
1607
|
+
txid: Optional[int] = 0, client_tags=[], expected_retvals=[], limit_rows=0, schedule_id=None, retry_count=0,
|
|
1608
|
+
search_path=None, sub_split_start_row_ids=[], tenant_guid=None, projection: Optional[str] = '', enable_sorted_projections=True,
|
|
1629
1609
|
request_format='string', response_format='string', query_imports_table=False):
|
|
1630
1610
|
"""
|
|
1631
1611
|
GET /mybucket/myschema/mytable?data HTTP/1.1
|
|
@@ -2305,16 +2285,16 @@ def get_field_type(builder: flatbuffers.Builder, field: pa.Field):
|
|
|
2305
2285
|
return field_type, field_type_type
|
|
2306
2286
|
|
|
2307
2287
|
|
|
2308
|
-
def build_field(builder: flatbuffers.Builder, f: pa.Field,
|
|
2288
|
+
def build_field(builder: flatbuffers.Builder, f: pa.Field, include_name=True):
|
|
2309
2289
|
children = None
|
|
2310
2290
|
if isinstance(f.type, pa.StructType):
|
|
2311
|
-
children = [build_field(builder, child,
|
|
2291
|
+
children = [build_field(builder, child, include_name) for child in list(f.type)]
|
|
2312
2292
|
if pa.types.is_list(f.type) or pa.types.is_fixed_size_list(f.type):
|
|
2313
|
-
children = [build_field(builder, f.type.value_field
|
|
2293
|
+
children = [build_field(builder, f.type.value_field.with_name("item"), include_name)]
|
|
2314
2294
|
if isinstance(f.type, pa.MapType):
|
|
2315
2295
|
children = [
|
|
2316
|
-
build_field(builder, f.type.key_field
|
|
2317
|
-
build_field(builder, f.type.item_field
|
|
2296
|
+
build_field(builder, f.type.key_field.with_name("key"), include_name),
|
|
2297
|
+
build_field(builder, f.type.item_field.with_name("value"), include_name),
|
|
2318
2298
|
]
|
|
2319
2299
|
|
|
2320
2300
|
# adding "entries" column:
|
|
@@ -2340,10 +2320,15 @@ def build_field(builder: flatbuffers.Builder, f: pa.Field, name: str):
|
|
|
2340
2320
|
builder.PrependUOffsetTRelative(offset)
|
|
2341
2321
|
children = builder.EndVector()
|
|
2342
2322
|
|
|
2343
|
-
col_name =
|
|
2323
|
+
col_name = None
|
|
2324
|
+
if include_name:
|
|
2325
|
+
col_name = builder.CreateString(f.name)
|
|
2326
|
+
|
|
2344
2327
|
field_type, field_type_type = get_field_type(builder, f)
|
|
2345
2328
|
fb_field.Start(builder)
|
|
2346
|
-
|
|
2329
|
+
if col_name is not None:
|
|
2330
|
+
fb_field.AddName(builder, col_name)
|
|
2331
|
+
fb_field.AddNullable(builder, f.nullable)
|
|
2347
2332
|
fb_field.AddTypeType(builder, field_type_type)
|
|
2348
2333
|
fb_field.AddType(builder, field_type)
|
|
2349
2334
|
if children is not None:
|
|
@@ -2358,19 +2343,21 @@ class QueryDataRequest:
|
|
|
2358
2343
|
self.response_parser = response_parser
|
|
2359
2344
|
|
|
2360
2345
|
|
|
2361
|
-
def get_response_schema(schema: 'pa.Schema' = pa.schema([]), field_names: Optional[List[str]] = None):
|
|
2346
|
+
def get_response_schema(schema: 'pa.Schema' = pa.schema([]), field_names: Optional[List[str]] = None) -> pa.Schema:
|
|
2362
2347
|
if field_names is None:
|
|
2363
2348
|
field_names = [field.name for field in schema]
|
|
2364
2349
|
|
|
2365
2350
|
return pa.schema([schema.field(name) for name in field_names])
|
|
2366
2351
|
|
|
2367
2352
|
|
|
2368
|
-
def build_query_data_request(schema: 'pa.Schema' = pa.schema([]),
|
|
2353
|
+
def build_query_data_request(schema: 'pa.Schema' = pa.schema([]),
|
|
2354
|
+
predicate: ibis.expr.types.BooleanColumn = None,
|
|
2355
|
+
field_names: Optional[List[str]] = None) -> QueryDataRequest:
|
|
2369
2356
|
builder = flatbuffers.Builder(1024)
|
|
2370
2357
|
|
|
2371
2358
|
source_name = builder.CreateString('') # required
|
|
2372
2359
|
|
|
2373
|
-
fields = [build_field(builder, f
|
|
2360
|
+
fields = [build_field(builder, f) for f in schema]
|
|
2374
2361
|
|
|
2375
2362
|
fb_schema.StartFieldsVector(builder, len(fields))
|
|
2376
2363
|
for offset in reversed(fields):
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
from typing import TYPE_CHECKING, Iterable, Optional, Union
|
|
3
|
+
|
|
4
|
+
import ibis
|
|
5
|
+
import pyarrow as pa
|
|
6
|
+
|
|
7
|
+
from .config import ImportConfig, QueryConfig
|
|
8
|
+
from .table_metadata import TableRef
|
|
9
|
+
|
|
10
|
+
if TYPE_CHECKING:
|
|
11
|
+
from .table import Projection
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class ITable(ABC):
|
|
15
|
+
"""Interface for VAST Table operations."""
|
|
16
|
+
|
|
17
|
+
@property
|
|
18
|
+
@abstractmethod
|
|
19
|
+
def ref(self) -> TableRef:
|
|
20
|
+
"""Return Table Ref."""
|
|
21
|
+
pass
|
|
22
|
+
|
|
23
|
+
@abstractmethod
|
|
24
|
+
def __eq__(self, other: object) -> bool:
|
|
25
|
+
"""Table __eq__."""
|
|
26
|
+
pass
|
|
27
|
+
|
|
28
|
+
@property
|
|
29
|
+
@abstractmethod
|
|
30
|
+
def name(self) -> str:
|
|
31
|
+
"""Table name."""
|
|
32
|
+
pass
|
|
33
|
+
|
|
34
|
+
@property
|
|
35
|
+
@abstractmethod
|
|
36
|
+
def arrow_schema(self) -> pa.Schema:
|
|
37
|
+
"""Table arrow schema."""
|
|
38
|
+
pass
|
|
39
|
+
|
|
40
|
+
@property
|
|
41
|
+
@abstractmethod
|
|
42
|
+
def path(self) -> str:
|
|
43
|
+
"""Return table's path."""
|
|
44
|
+
pass
|
|
45
|
+
|
|
46
|
+
@abstractmethod
|
|
47
|
+
def sorted_columns(self) -> list[str]:
|
|
48
|
+
"""Return sorted columns' names."""
|
|
49
|
+
pass
|
|
50
|
+
|
|
51
|
+
@abstractmethod
|
|
52
|
+
def projection(self, name: str) -> "Projection":
|
|
53
|
+
"""Get a specific semi-sorted projection of this table."""
|
|
54
|
+
pass
|
|
55
|
+
|
|
56
|
+
@abstractmethod
|
|
57
|
+
def projections(self, projection_name: str = "") -> Iterable["Projection"]:
|
|
58
|
+
"""List semi-sorted projections."""
|
|
59
|
+
pass
|
|
60
|
+
|
|
61
|
+
@abstractmethod
|
|
62
|
+
def import_files(self, files_to_import: Iterable[str], config: Optional[ImportConfig] = None) -> None:
|
|
63
|
+
"""Import files into table."""
|
|
64
|
+
pass
|
|
65
|
+
|
|
66
|
+
@abstractmethod
|
|
67
|
+
def import_partitioned_files(self, files_and_partitions: dict[str, pa.RecordBatch], config: Optional[ImportConfig] = None) -> None:
|
|
68
|
+
"""Import partitioned files."""
|
|
69
|
+
pass
|
|
70
|
+
|
|
71
|
+
@abstractmethod
|
|
72
|
+
def select(self,
|
|
73
|
+
columns: Optional[list[str]] = None,
|
|
74
|
+
predicate: Union[ibis.expr.types.BooleanColumn,
|
|
75
|
+
ibis.common.deferred.Deferred] = None,
|
|
76
|
+
config: Optional[QueryConfig] = None,
|
|
77
|
+
*,
|
|
78
|
+
internal_row_id: bool = False,
|
|
79
|
+
limit_rows: Optional[int] = None) -> pa.RecordBatchReader:
|
|
80
|
+
"""Execute a query."""
|
|
81
|
+
pass
|
|
82
|
+
|
|
83
|
+
@abstractmethod
|
|
84
|
+
def insert(self, rows: Union[pa.RecordBatch, pa.Table]) -> pa.ChunkedArray:
|
|
85
|
+
"""Insert rows into table."""
|
|
86
|
+
pass
|
|
87
|
+
|
|
88
|
+
@abstractmethod
|
|
89
|
+
def update(self,
|
|
90
|
+
rows: Union[pa.RecordBatch, pa.Table],
|
|
91
|
+
columns: Optional[list[str]] = None) -> None:
|
|
92
|
+
"""Update rows in table."""
|
|
93
|
+
pass
|
|
94
|
+
|
|
95
|
+
@abstractmethod
|
|
96
|
+
def delete(self, rows: Union[pa.RecordBatch, pa.Table]) -> None:
|
|
97
|
+
"""Delete rows from table."""
|
|
98
|
+
pass
|
|
99
|
+
|
|
100
|
+
@abstractmethod
|
|
101
|
+
def imports_table(self) -> Optional["ITable"]:
|
|
102
|
+
"""Get imports table."""
|
|
103
|
+
pass
|
|
104
|
+
|
|
105
|
+
@abstractmethod
|
|
106
|
+
def sorting_done(self) -> bool:
|
|
107
|
+
"""Check if sorting is done."""
|
|
108
|
+
pass
|
|
109
|
+
|
|
110
|
+
@abstractmethod
|
|
111
|
+
def sorting_score(self) -> int:
|
|
112
|
+
"""Get sorting score."""
|
|
113
|
+
pass
|
|
114
|
+
|
|
115
|
+
@abstractmethod
|
|
116
|
+
def reload_schema(self) -> None:
|
|
117
|
+
"""Reload Arrow Schema."""
|
|
118
|
+
pass
|
|
119
|
+
|
|
120
|
+
@abstractmethod
|
|
121
|
+
def reload_stats(self) -> None:
|
|
122
|
+
"""Reload Table Stats."""
|
|
123
|
+
pass
|
|
124
|
+
|
|
125
|
+
@abstractmethod
|
|
126
|
+
def reload_sorted_columns(self) -> None:
|
|
127
|
+
"""Reload Sorted Columns."""
|
|
128
|
+
pass
|
|
129
|
+
|
|
130
|
+
@abstractmethod
|
|
131
|
+
def __getitem__(self, col_name: str) -> ibis.Column:
|
|
132
|
+
"""Allow constructing ibis-like column expressions from this table.
|
|
133
|
+
|
|
134
|
+
It is useful for constructing expressions for predicate pushdown in `ITable.select()` method.
|
|
135
|
+
"""
|
|
136
|
+
pass
|
|
@@ -113,7 +113,7 @@ def calculate_aggregate_stats(
|
|
|
113
113
|
)
|
|
114
114
|
agg_df["duration_sec"] = (
|
|
115
115
|
r_df.groupby(group_flds)
|
|
116
|
-
.apply(calc_total_time_coverage_seconds, include_groups=False)
|
|
116
|
+
.apply(calc_total_time_coverage_seconds, include_groups=False) # type: ignore
|
|
117
117
|
.sort_index()
|
|
118
118
|
)
|
|
119
119
|
agg_df["M_rows_per_sec"] = (agg_df["n_rows"] / agg_df["duration_sec"] / 1e6).astype(
|