vastdb 1.4.0__py3-none-any.whl → 2.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vastdb/_ibis_support.py +28 -0
- vastdb/_internal.py +200 -187
- vastdb/_table_interface.py +136 -0
- vastdb/bench/perf_bench/orchestrate/results_helpers.py +1 -1
- vastdb/bucket.py +1 -1
- vastdb/conftest.py +42 -19
- vastdb/schema.py +15 -3
- vastdb/session.py +3 -1
- vastdb/table.py +595 -340
- vastdb/table_metadata.py +221 -0
- vastdb/tests/test_duckdb.py +30 -30
- vastdb/tests/test_fixed_list.py +56 -6
- vastdb/tests/test_imports.py +2 -1
- vastdb/tests/test_nested.py +0 -5
- vastdb/tests/test_table_in_tx.py +249 -0
- vastdb/tests/test_tables.py +57 -11
- vastdb/tests/util.py +98 -1
- vastdb/transaction.py +27 -0
- {vastdb-1.4.0.dist-info → vastdb-2.0.1.dist-info}/METADATA +21 -6
- {vastdb-1.4.0.dist-info → vastdb-2.0.1.dist-info}/RECORD +23 -19
- {vastdb-1.4.0.dist-info → vastdb-2.0.1.dist-info}/WHEEL +1 -1
- {vastdb-1.4.0.dist-info → vastdb-2.0.1.dist-info/licenses}/LICENSE +0 -0
- {vastdb-1.4.0.dist-info → vastdb-2.0.1.dist-info}/top_level.txt +0 -0
vastdb/_ibis_support.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import ibis
|
|
2
|
+
import pyarrow as pa
|
|
3
|
+
from ibis.expr.types.structs import IbisError
|
|
4
|
+
|
|
5
|
+
from vastdb import errors
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def validate_ibis_support_schema(arrow_schema: pa.Schema):
|
|
9
|
+
"""Validate that the provided Arrow schema is compatible with Ibis.
|
|
10
|
+
|
|
11
|
+
Raises NotSupportedSchema if the schema contains unsupported fields.
|
|
12
|
+
"""
|
|
13
|
+
unsupported_fields = []
|
|
14
|
+
first_exception = None
|
|
15
|
+
for f in arrow_schema:
|
|
16
|
+
try:
|
|
17
|
+
ibis.Schema.from_pyarrow(pa.schema([f]))
|
|
18
|
+
except (IbisError, ValueError, KeyError) as e:
|
|
19
|
+
if first_exception is None:
|
|
20
|
+
first_exception = e
|
|
21
|
+
unsupported_fields.append(f)
|
|
22
|
+
|
|
23
|
+
if unsupported_fields:
|
|
24
|
+
raise errors.NotSupportedSchema(
|
|
25
|
+
message=f"Ibis does not support the schema {unsupported_fields=}",
|
|
26
|
+
schema=arrow_schema,
|
|
27
|
+
cause=first_exception
|
|
28
|
+
)
|
vastdb/_internal.py
CHANGED
|
@@ -7,7 +7,7 @@ import time
|
|
|
7
7
|
import urllib.parse
|
|
8
8
|
from collections import defaultdict, namedtuple
|
|
9
9
|
from enum import Enum
|
|
10
|
-
from typing import Any, Dict, Iterator, List, Optional, Tuple, Union
|
|
10
|
+
from typing import Any, Dict, Iterator, List, Optional, Tuple, Union, cast
|
|
11
11
|
|
|
12
12
|
import backoff
|
|
13
13
|
import flatbuffers
|
|
@@ -17,6 +17,7 @@ import requests
|
|
|
17
17
|
import urllib3
|
|
18
18
|
import xmltodict
|
|
19
19
|
from aws_requests_auth.aws_auth import AWSRequestsAuth
|
|
20
|
+
from ibis.expr.operations import Field, Node
|
|
20
21
|
from ibis.expr.operations.generic import (
|
|
21
22
|
IsNull,
|
|
22
23
|
Literal,
|
|
@@ -34,7 +35,6 @@ from ibis.expr.operations.logical import (
|
|
|
34
35
|
NotEquals,
|
|
35
36
|
Or,
|
|
36
37
|
)
|
|
37
|
-
from ibis.expr.operations.relations import Field
|
|
38
38
|
from ibis.expr.operations.strings import StartsWith, StringContains
|
|
39
39
|
from ibis.expr.operations.structs import StructField
|
|
40
40
|
|
|
@@ -52,6 +52,7 @@ import vastdb.vast_flatbuf.org.apache.arrow.computeir.flatbuf.Int8Literal as fb_
|
|
|
52
52
|
import vastdb.vast_flatbuf.org.apache.arrow.computeir.flatbuf.Int16Literal as fb_int16_lit
|
|
53
53
|
import vastdb.vast_flatbuf.org.apache.arrow.computeir.flatbuf.Int32Literal as fb_int32_lit
|
|
54
54
|
import vastdb.vast_flatbuf.org.apache.arrow.computeir.flatbuf.Int64Literal as fb_int64_lit
|
|
55
|
+
import vastdb.vast_flatbuf.org.apache.arrow.computeir.flatbuf.ListLiteral as fb_list_lit
|
|
55
56
|
import vastdb.vast_flatbuf.org.apache.arrow.computeir.flatbuf.Literal as fb_literal
|
|
56
57
|
import vastdb.vast_flatbuf.org.apache.arrow.computeir.flatbuf.Relation as fb_relation
|
|
57
58
|
import vastdb.vast_flatbuf.org.apache.arrow.computeir.flatbuf.RelationImpl as rel_impl
|
|
@@ -262,20 +263,32 @@ class Predicate:
|
|
|
262
263
|
node = nodes_map[name]
|
|
263
264
|
nodes_map = node.children_map
|
|
264
265
|
|
|
265
|
-
|
|
266
|
+
literal_field = node.field
|
|
267
|
+
literal_column_index = node.index
|
|
266
268
|
if node.children:
|
|
267
|
-
|
|
268
|
-
|
|
269
|
+
# Support fixed size list of a single flat child.
|
|
270
|
+
if pa.types.is_fixed_size_list(node.type) and len(node.children) == 1 and not node.children[
|
|
271
|
+
0].children:
|
|
272
|
+
# Similar to projection, the index of the column should be the leaf which is the child's.
|
|
273
|
+
literal_column_index = node.children[0].index
|
|
274
|
+
# Set the literal type to be a list rather than fixed list since fixed list is not supported.
|
|
275
|
+
# https://github.com/apache/arrow/blob/apache-arrow-7.0.0/cpp/src/arrow/compute/exec/ir_consumer.cc#L287
|
|
276
|
+
literal_field = node.field.with_type(pa.list_(node.field.type.value_field))
|
|
277
|
+
else:
|
|
278
|
+
# TODO: support predicate pushdown for leaf nodes (ORION-160338)
|
|
279
|
+
raise NotImplementedError(node.field) # no predicate pushdown for nested columns
|
|
280
|
+
|
|
281
|
+
column_offset = self.build_column(position=literal_column_index)
|
|
269
282
|
for literal in literals:
|
|
270
283
|
args_offsets = [column_offset]
|
|
271
284
|
if literal is not None:
|
|
272
|
-
args_offsets.append(self.
|
|
285
|
+
args_offsets.append(self.build_literal_expression(field=literal_field, value=literal.value))
|
|
273
286
|
if builder_func == self.build_between:
|
|
274
|
-
args_offsets.append(self.
|
|
275
|
-
args_offsets.append(self.
|
|
287
|
+
args_offsets.append(self.build_literal_expression(field=literal_field, value=lower.value))
|
|
288
|
+
args_offsets.append(self.build_literal_expression(field=literal_field, value=upper.value))
|
|
276
289
|
if builder_func == self.build_starts_with:
|
|
277
|
-
args_offsets.append(self.
|
|
278
|
-
args_offsets.append(self.
|
|
290
|
+
args_offsets.append(self.build_literal_expression(field=literal_field, value=lower_bytes))
|
|
291
|
+
args_offsets.append(self.build_literal_expression(field=literal_field, value=upper_bytes))
|
|
279
292
|
|
|
280
293
|
inner_offsets.append(builder_func(*args_offsets))
|
|
281
294
|
|
|
@@ -326,14 +339,14 @@ class Predicate:
|
|
|
326
339
|
if isinstance(filter_by_name, tuple) and len(filter_by_name) == 1:
|
|
327
340
|
op, value = self.rule_to_operator(filter_by_name[0])
|
|
328
341
|
if value:
|
|
329
|
-
literal = self.
|
|
342
|
+
literal = self.build_literal_expression(field=field, value=value)
|
|
330
343
|
return op(column, literal)
|
|
331
344
|
return op(column) # is_null or is_not_null operation
|
|
332
345
|
|
|
333
346
|
rules = []
|
|
334
347
|
for rule in filter_by_name:
|
|
335
348
|
op, value = self.rule_to_operator(rule)
|
|
336
|
-
literal = self.
|
|
349
|
+
literal = self.build_literal_expression(field=field, value=value)
|
|
337
350
|
rules.append(op(column, literal))
|
|
338
351
|
|
|
339
352
|
return self.build_and(rules)
|
|
@@ -359,145 +372,93 @@ class Predicate:
|
|
|
359
372
|
# see https://github.com/apache/arrow/blob/main/format/Schema.fbs
|
|
360
373
|
# https://github.com/apache/arrow/blob/apache-arrow-7.0.0/experimental/computeir/Expression.fbs
|
|
361
374
|
# https://github.com/apache/arrow/blob/apache-arrow-7.0.0/experimental/computeir/Literal.fbs
|
|
362
|
-
def
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
fb_int.Start(self.builder)
|
|
372
|
-
fb_int.AddBitWidth(self.builder, field.type.bit_width)
|
|
373
|
-
fb_int.AddIsSigned(self.builder, is_signed)
|
|
374
|
-
field_type = fb_int.End(self.builder)
|
|
375
|
-
|
|
375
|
+
def build_literal_impl(self, pa_type: pa.DataType, value) -> Tuple[int, int]:
|
|
376
|
+
'''
|
|
377
|
+
Builds a LiteralImpl for the given Arrow type and value.
|
|
378
|
+
:param pa_type: Literal type as defined in Arrow.
|
|
379
|
+
:param value: Value to be used in the LiteralImpl.
|
|
380
|
+
:return: Tuple[LiteralImpl, buffer_value]
|
|
381
|
+
'''
|
|
382
|
+
if pa.types.is_integer(pa_type):
|
|
383
|
+
impl_type, impl_class = None, None
|
|
376
384
|
value = int(value)
|
|
377
|
-
elif field.type.equals(pa.int32()) or field.type.equals(pa.uint32()):
|
|
378
|
-
is_signed = field.type.equals(pa.int32())
|
|
379
|
-
literal_type = fb_int32_lit if is_signed else fb_uint32_lit
|
|
380
|
-
literal_impl = LiteralImpl.Int32Literal if is_signed else LiteralImpl.UInt32Literal
|
|
381
385
|
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
386
|
+
if pa.types.is_int8(pa_type):
|
|
387
|
+
impl_type, impl_class = LiteralImpl.Int8Literal, fb_int8_lit
|
|
388
|
+
elif pa.types.is_uint8(pa_type):
|
|
389
|
+
impl_type, impl_class = LiteralImpl.UInt8Literal, fb_uint8_lit
|
|
390
|
+
elif pa.types.is_int16(pa_type):
|
|
391
|
+
impl_type, impl_class = LiteralImpl.Int16Literal, fb_int16_lit
|
|
392
|
+
elif pa.types.is_uint16(pa_type):
|
|
393
|
+
impl_type, impl_class = LiteralImpl.UInt16Literal, fb_uint16_lit
|
|
394
|
+
elif pa.types.is_int32(pa_type):
|
|
395
|
+
impl_type, impl_class = LiteralImpl.Int32Literal, fb_int32_lit
|
|
396
|
+
elif pa.types.is_uint32(pa_type):
|
|
397
|
+
impl_type, impl_class = LiteralImpl.UInt32Literal, fb_uint32_lit
|
|
398
|
+
elif pa.types.is_int64(pa_type):
|
|
399
|
+
impl_type, impl_class = LiteralImpl.Int64Literal, fb_int64_lit
|
|
400
|
+
elif pa.types.is_uint64(pa_type):
|
|
401
|
+
impl_type, impl_class = LiteralImpl.UInt64Literal, fb_uint64_lit
|
|
402
|
+
else:
|
|
403
|
+
raise ValueError(f'unsupported integer predicate type: {pa_type}, value={value}')
|
|
387
404
|
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
literal_impl = LiteralImpl.Int16Literal if is_signed else LiteralImpl.UInt16Literal
|
|
405
|
+
impl_class.Start(self.builder)
|
|
406
|
+
impl_class.AddValue(self.builder, value)
|
|
407
|
+
buffer_value = impl_class.End(self.builder)
|
|
408
|
+
return impl_type, buffer_value
|
|
393
409
|
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
fb_int.AddIsSigned(self.builder, is_signed)
|
|
398
|
-
field_type = fb_int.End(self.builder)
|
|
410
|
+
if pa.types.is_floating(pa_type):
|
|
411
|
+
impl_type, impl_class = None, None
|
|
412
|
+
value = float(value)
|
|
399
413
|
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
414
|
+
if pa.types.is_float32(pa_type):
|
|
415
|
+
impl_type, impl_class = LiteralImpl.Float32Literal, fb_float32_lit
|
|
416
|
+
elif pa.types.is_float64(pa_type):
|
|
417
|
+
impl_type, impl_class = LiteralImpl.Float64Literal, fb_float64_lit
|
|
418
|
+
else:
|
|
419
|
+
# Float16 is not supported by Vast.
|
|
420
|
+
raise ValueError(f'unsupported floating point predicate type: {pa_type}, value={value}')
|
|
405
421
|
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
field_type = fb_int.End(self.builder)
|
|
422
|
+
impl_class.Start(self.builder)
|
|
423
|
+
impl_class.AddValue(self.builder, value)
|
|
424
|
+
buffer_value = impl_class.End(self.builder)
|
|
425
|
+
return impl_type, buffer_value
|
|
411
426
|
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
literal_type = fb_float32_lit
|
|
415
|
-
literal_impl = LiteralImpl.Float32Literal
|
|
427
|
+
if pa_type.equals(pa.string()):
|
|
428
|
+
value = self.builder.CreateString(value)
|
|
416
429
|
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
430
|
+
fb_string_lit.Start(self.builder)
|
|
431
|
+
fb_string_lit.AddValue(self.builder, value)
|
|
432
|
+
buffer_value = fb_string_lit.End(self.builder)
|
|
433
|
+
return LiteralImpl.StringLiteral, buffer_value
|
|
421
434
|
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
literal_impl = LiteralImpl.Float64Literal
|
|
435
|
+
if pa_type.equals(pa.date32()): # pa.date64() is not supported
|
|
436
|
+
# Assuming units are in Days. Look at get_field_type for more details.
|
|
437
|
+
value, = pa.array([value], pa_type).cast(pa.int32()).to_pylist()
|
|
426
438
|
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
439
|
+
fb_date32_lit.Start(self.builder)
|
|
440
|
+
fb_date32_lit.AddValue(self.builder, value)
|
|
441
|
+
buffer_value = fb_date32_lit.End(self.builder)
|
|
442
|
+
return LiteralImpl.DateLiteral, buffer_value
|
|
431
443
|
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
literal_type = fb_string_lit
|
|
435
|
-
literal_impl = LiteralImpl.StringLiteral
|
|
444
|
+
if pa.types.is_timestamp(pa_type):
|
|
445
|
+
value, = pa.array([value], pa_type).cast(pa.int64()).to_pylist()
|
|
436
446
|
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
447
|
+
fb_timestamp_lit.Start(self.builder)
|
|
448
|
+
fb_timestamp_lit.AddValue(self.builder, value)
|
|
449
|
+
buffer_value = fb_timestamp_lit.End(self.builder)
|
|
450
|
+
return LiteralImpl.TimestampLiteral, buffer_value
|
|
440
451
|
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
literal_impl = LiteralImpl.DateLiteral
|
|
445
|
-
|
|
446
|
-
field_type_type = Type.Date
|
|
447
|
-
fb_date.Start(self.builder)
|
|
448
|
-
fb_date.AddUnit(self.builder, DateUnit.DAY)
|
|
449
|
-
field_type = fb_date.End(self.builder)
|
|
450
|
-
value, = pa.array([value], field.type).cast(pa.int32()).to_pylist()
|
|
451
|
-
elif isinstance(field.type, pa.TimestampType):
|
|
452
|
-
literal_type = fb_timestamp_lit
|
|
453
|
-
literal_impl = LiteralImpl.TimestampLiteral
|
|
454
|
-
|
|
455
|
-
if field.type.equals(pa.timestamp('s')):
|
|
456
|
-
unit = TimeUnit.SECOND
|
|
457
|
-
if field.type.equals(pa.timestamp('ms')):
|
|
458
|
-
unit = TimeUnit.MILLISECOND
|
|
459
|
-
if field.type.equals(pa.timestamp('us')):
|
|
460
|
-
unit = TimeUnit.MICROSECOND
|
|
461
|
-
if field.type.equals(pa.timestamp('ns')):
|
|
462
|
-
unit = TimeUnit.NANOSECOND
|
|
463
|
-
|
|
464
|
-
field_type_type = Type.Timestamp
|
|
465
|
-
fb_timestamp.Start(self.builder)
|
|
466
|
-
fb_timestamp.AddUnit(self.builder, unit)
|
|
467
|
-
field_type = fb_timestamp.End(self.builder)
|
|
468
|
-
value, = pa.array([value], field.type).cast(pa.int64()).to_pylist()
|
|
469
|
-
elif isinstance(field.type, (pa.Time32Type, pa.Time64Type)):
|
|
470
|
-
literal_type = fb_time_lit
|
|
471
|
-
literal_impl = LiteralImpl.TimeLiteral
|
|
472
|
-
|
|
473
|
-
if field.type.equals(pa.time32('s')):
|
|
474
|
-
target_type = pa.int32()
|
|
475
|
-
unit = TimeUnit.SECOND
|
|
476
|
-
if field.type.equals(pa.time32('ms')):
|
|
477
|
-
target_type = pa.int32()
|
|
478
|
-
unit = TimeUnit.MILLISECOND
|
|
479
|
-
if field.type.equals(pa.time64('us')):
|
|
480
|
-
target_type = pa.int64()
|
|
481
|
-
unit = TimeUnit.MICROSECOND
|
|
482
|
-
if field.type.equals(pa.time64('ns')):
|
|
483
|
-
target_type = pa.int64()
|
|
484
|
-
unit = TimeUnit.NANOSECOND
|
|
485
|
-
|
|
486
|
-
field_type_type = Type.Time
|
|
487
|
-
fb_time.Start(self.builder)
|
|
488
|
-
fb_time.AddBitWidth(self.builder, field.type.bit_width)
|
|
489
|
-
fb_time.AddUnit(self.builder, unit)
|
|
490
|
-
field_type = fb_time.End(self.builder)
|
|
491
|
-
|
|
492
|
-
value, = pa.array([value], field.type).cast(target_type).to_pylist()
|
|
493
|
-
elif field.type.equals(pa.bool_()):
|
|
494
|
-
literal_type = fb_bool_lit
|
|
495
|
-
literal_impl = LiteralImpl.BooleanLiteral
|
|
496
|
-
|
|
497
|
-
field_type_type = Type.Bool
|
|
498
|
-
fb_bool.Start(self.builder)
|
|
499
|
-
field_type = fb_bool.End(self.builder)
|
|
452
|
+
if pa.types.is_time(pa_type):
|
|
453
|
+
target_type = pa.int32() if pa.types.is_time32(pa_type) else pa.int64()
|
|
454
|
+
value, = pa.array([value], pa_type).cast(target_type).to_pylist()
|
|
500
455
|
|
|
456
|
+
fb_time_lit.Start(self.builder)
|
|
457
|
+
fb_time_lit.AddValue(self.builder, value)
|
|
458
|
+
buffer_value = fb_time_lit.End(self.builder)
|
|
459
|
+
return LiteralImpl.TimeLiteral, buffer_value
|
|
460
|
+
|
|
461
|
+
if pa_type.equals(pa.bool_()):
|
|
501
462
|
# Handle both boolean values and string representations
|
|
502
463
|
if isinstance(value, bool):
|
|
503
464
|
value = value
|
|
@@ -505,46 +466,65 @@ class Predicate:
|
|
|
505
466
|
value = value.lower() == 'true'
|
|
506
467
|
else:
|
|
507
468
|
value = bool(value)
|
|
508
|
-
elif isinstance(field.type, pa.Decimal128Type):
|
|
509
|
-
literal_type = fb_decimal_lit
|
|
510
|
-
literal_impl = LiteralImpl.DecimalLiteral
|
|
511
|
-
|
|
512
|
-
field_type_type = Type.Decimal
|
|
513
|
-
fb_decimal.Start(self.builder)
|
|
514
|
-
fb_decimal.AddPrecision(self.builder, field.type.precision)
|
|
515
|
-
fb_decimal.AddScale(self.builder, field.type.scale)
|
|
516
|
-
field_type = fb_decimal.End(self.builder)
|
|
517
|
-
int_value = int(float(value) * 10 ** field.type.scale)
|
|
518
|
-
binary_value = int_value.to_bytes(16, 'little')
|
|
519
469
|
|
|
470
|
+
fb_bool_lit.Start(self.builder)
|
|
471
|
+
fb_bool_lit.AddValue(self.builder, value)
|
|
472
|
+
buffer_value = fb_bool_lit.End(self.builder)
|
|
473
|
+
return LiteralImpl.BooleanLiteral, buffer_value
|
|
474
|
+
|
|
475
|
+
if pa.types.is_decimal128(pa_type):
|
|
476
|
+
int_value = int(float(value) * 10 ** pa_type.scale)
|
|
477
|
+
binary_value = int_value.to_bytes(16, 'little')
|
|
520
478
|
value = self.builder.CreateByteVector(binary_value)
|
|
521
|
-
elif field.type.equals(pa.binary()):
|
|
522
|
-
literal_type = fb_binary_lit
|
|
523
|
-
literal_impl = LiteralImpl.BinaryLiteral
|
|
524
479
|
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
480
|
+
fb_decimal_lit.Start(self.builder)
|
|
481
|
+
fb_decimal_lit.AddValue(self.builder, value)
|
|
482
|
+
buffer_value = fb_decimal_lit.End(self.builder)
|
|
483
|
+
return LiteralImpl.DecimalLiteral, buffer_value
|
|
528
484
|
|
|
485
|
+
if pa_type.equals(pa.binary()):
|
|
529
486
|
value = self.builder.CreateByteVector(value)
|
|
530
|
-
else:
|
|
531
|
-
raise ValueError(f'unsupported predicate for type={field.type}, value={value}')
|
|
532
487
|
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
488
|
+
fb_binary_lit.Start(self.builder)
|
|
489
|
+
fb_binary_lit.AddValue(self.builder, value)
|
|
490
|
+
buffer_value = fb_binary_lit.End(self.builder)
|
|
491
|
+
return LiteralImpl.BinaryLiteral, buffer_value
|
|
492
|
+
|
|
493
|
+
# pa.types.is_list is False for FixedSizeList which is important since parsing of FixedSizeList is not supported
|
|
494
|
+
# https://github.com/apache/arrow/blob/apache-arrow-7.0.0/cpp/src/arrow/compute/exec/ir_consumer.cc#L287
|
|
495
|
+
if pa.types.is_list(pa_type):
|
|
496
|
+
pa_type = cast(pa.FixedSizeListType, pa_type)
|
|
497
|
+
|
|
498
|
+
buffer_literals = []
|
|
499
|
+
for element in value:
|
|
500
|
+
buffer_literals.append(self.build_literal(pa_type.value_field, element))
|
|
501
|
+
fb_list_lit.StartValuesVector(self.builder, len(buffer_literals))
|
|
502
|
+
for offset in reversed(buffer_literals):
|
|
503
|
+
self.builder.PrependUOffsetTRelative(offset)
|
|
504
|
+
values_buffer = self.builder.EndVector()
|
|
505
|
+
|
|
506
|
+
fb_list_lit.Start(self.builder)
|
|
507
|
+
fb_list_lit.AddValues(self.builder, values_buffer)
|
|
508
|
+
buffer_value = fb_list_lit.End(self.builder)
|
|
509
|
+
return LiteralImpl.ListLiteral, buffer_value
|
|
510
|
+
|
|
511
|
+
raise ValueError(f'unsupported literal type={pa_type}, value={value}')
|
|
536
512
|
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
513
|
+
def build_literal(self, field: pa.Field, value) -> int:
|
|
514
|
+
literal_impl_type, literal_impl_buffer = self.build_literal_impl(field.type, value)
|
|
515
|
+
|
|
516
|
+
# Literal type should not contain name for more information
|
|
517
|
+
# https://github.com/apache/arrow/blob/apache-arrow-7.0.0/cpp/src/arrow/compute/exec/ir_consumer.cc#L326
|
|
518
|
+
field_buffer = build_field(self.builder, field, include_name=False)
|
|
541
519
|
|
|
542
520
|
fb_literal.Start(self.builder)
|
|
543
|
-
fb_literal.AddImplType(self.builder,
|
|
544
|
-
fb_literal.AddImpl(self.builder,
|
|
545
|
-
fb_literal.AddType(self.builder,
|
|
546
|
-
|
|
521
|
+
fb_literal.AddImplType(self.builder, literal_impl_type)
|
|
522
|
+
fb_literal.AddImpl(self.builder, literal_impl_buffer)
|
|
523
|
+
fb_literal.AddType(self.builder, field_buffer)
|
|
524
|
+
return fb_literal.End(self.builder)
|
|
547
525
|
|
|
526
|
+
def build_literal_expression(self, field: pa.Field, value) -> int:
|
|
527
|
+
buffer_literal = self.build_literal(field, value)
|
|
548
528
|
fb_expression.Start(self.builder)
|
|
549
529
|
fb_expression.AddImplType(self.builder, ExpressionImpl.Literal)
|
|
550
530
|
fb_expression.AddImpl(self.builder, buffer_literal)
|
|
@@ -937,7 +917,7 @@ class VastdbApi:
|
|
|
937
917
|
"""Make sure that the connections closed."""
|
|
938
918
|
self._session.close()
|
|
939
919
|
|
|
940
|
-
def with_endpoint(self, endpoint):
|
|
920
|
+
def with_endpoint(self, endpoint) -> 'VastdbApi':
|
|
941
921
|
"""Open a new session for targeting a specific endpoint."""
|
|
942
922
|
return VastdbApi(endpoint=endpoint,
|
|
943
923
|
access_key=self.access_key,
|
|
@@ -1612,7 +1592,7 @@ class VastdbApi:
|
|
|
1612
1592
|
|
|
1613
1593
|
return headers
|
|
1614
1594
|
|
|
1615
|
-
def _build_query_data_url_params(self, projection, query_imports_table):
|
|
1595
|
+
def _build_query_data_url_params(self, projection: Optional[str], query_imports_table):
|
|
1616
1596
|
if query_imports_table and projection:
|
|
1617
1597
|
raise ValueError("Can't query both imports and projection table")
|
|
1618
1598
|
|
|
@@ -1624,8 +1604,8 @@ class VastdbApi:
|
|
|
1624
1604
|
return url_params
|
|
1625
1605
|
|
|
1626
1606
|
def query_data(self, bucket, schema, table, params, split=(0, 1, 8), num_sub_splits=1, response_row_id=False,
|
|
1627
|
-
txid=0, client_tags=[], expected_retvals=[], limit_rows=0, schedule_id=None, retry_count=0,
|
|
1628
|
-
search_path=None, sub_split_start_row_ids=[], tenant_guid=None, projection='', enable_sorted_projections=True,
|
|
1607
|
+
txid: Optional[int] = 0, client_tags=[], expected_retvals=[], limit_rows=0, schedule_id=None, retry_count=0,
|
|
1608
|
+
search_path=None, sub_split_start_row_ids=[], tenant_guid=None, projection: Optional[str] = '', enable_sorted_projections=True,
|
|
1629
1609
|
request_format='string', response_format='string', query_imports_table=False):
|
|
1630
1610
|
"""
|
|
1631
1611
|
GET /mybucket/myschema/mytable?data HTTP/1.1
|
|
@@ -2305,16 +2285,16 @@ def get_field_type(builder: flatbuffers.Builder, field: pa.Field):
|
|
|
2305
2285
|
return field_type, field_type_type
|
|
2306
2286
|
|
|
2307
2287
|
|
|
2308
|
-
def build_field(builder: flatbuffers.Builder, f: pa.Field,
|
|
2288
|
+
def build_field(builder: flatbuffers.Builder, f: pa.Field, include_name=True):
|
|
2309
2289
|
children = None
|
|
2310
2290
|
if isinstance(f.type, pa.StructType):
|
|
2311
|
-
children = [build_field(builder, child,
|
|
2291
|
+
children = [build_field(builder, child, include_name) for child in list(f.type)]
|
|
2312
2292
|
if pa.types.is_list(f.type) or pa.types.is_fixed_size_list(f.type):
|
|
2313
|
-
children = [build_field(builder, f.type.value_field
|
|
2293
|
+
children = [build_field(builder, f.type.value_field.with_name("item"), include_name)]
|
|
2314
2294
|
if isinstance(f.type, pa.MapType):
|
|
2315
2295
|
children = [
|
|
2316
|
-
build_field(builder, f.type.key_field
|
|
2317
|
-
build_field(builder, f.type.item_field
|
|
2296
|
+
build_field(builder, f.type.key_field.with_name("key"), include_name),
|
|
2297
|
+
build_field(builder, f.type.item_field.with_name("value"), include_name),
|
|
2318
2298
|
]
|
|
2319
2299
|
|
|
2320
2300
|
# adding "entries" column:
|
|
@@ -2340,10 +2320,15 @@ def build_field(builder: flatbuffers.Builder, f: pa.Field, name: str):
|
|
|
2340
2320
|
builder.PrependUOffsetTRelative(offset)
|
|
2341
2321
|
children = builder.EndVector()
|
|
2342
2322
|
|
|
2343
|
-
col_name =
|
|
2323
|
+
col_name = None
|
|
2324
|
+
if include_name:
|
|
2325
|
+
col_name = builder.CreateString(f.name)
|
|
2326
|
+
|
|
2344
2327
|
field_type, field_type_type = get_field_type(builder, f)
|
|
2345
2328
|
fb_field.Start(builder)
|
|
2346
|
-
|
|
2329
|
+
if col_name is not None:
|
|
2330
|
+
fb_field.AddName(builder, col_name)
|
|
2331
|
+
fb_field.AddNullable(builder, f.nullable)
|
|
2347
2332
|
fb_field.AddTypeType(builder, field_type_type)
|
|
2348
2333
|
fb_field.AddType(builder, field_type)
|
|
2349
2334
|
if children is not None:
|
|
@@ -2358,19 +2343,48 @@ class QueryDataRequest:
|
|
|
2358
2343
|
self.response_parser = response_parser
|
|
2359
2344
|
|
|
2360
2345
|
|
|
2361
|
-
def get_response_schema(schema: 'pa.Schema' = pa.schema([]), field_names: Optional[List[str]] = None):
|
|
2346
|
+
def get_response_schema(schema: 'pa.Schema' = pa.schema([]), field_names: Optional[List[str]] = None) -> pa.Schema:
|
|
2362
2347
|
if field_names is None:
|
|
2363
2348
|
field_names = [field.name for field in schema]
|
|
2364
2349
|
|
|
2365
2350
|
return pa.schema([schema.field(name) for name in field_names])
|
|
2366
2351
|
|
|
2367
2352
|
|
|
2368
|
-
def
|
|
2353
|
+
def _column_names_in_node_tree(expr: ibis.expr.types.Expr) -> set[str]:
|
|
2354
|
+
def walk_op(op: Node):
|
|
2355
|
+
if isinstance(op, Field):
|
|
2356
|
+
names.add(op.name)
|
|
2357
|
+
elif isinstance(op, (list, tuple)):
|
|
2358
|
+
for item in op:
|
|
2359
|
+
walk_op(item)
|
|
2360
|
+
else:
|
|
2361
|
+
for arg in getattr(op, "args", ()):
|
|
2362
|
+
walk_op(arg)
|
|
2363
|
+
|
|
2364
|
+
names: set[str] = set()
|
|
2365
|
+
walk_op(expr.op())
|
|
2366
|
+
return names
|
|
2367
|
+
|
|
2368
|
+
|
|
2369
|
+
def build_query_data_request(schema: pa.Schema,
|
|
2370
|
+
predicate: ibis.expr.types.BooleanColumn = None,
|
|
2371
|
+
field_names: Optional[list[str]] = None) -> QueryDataRequest:
|
|
2372
|
+
if field_names is None:
|
|
2373
|
+
queried_columns = [f.name for f in schema]
|
|
2374
|
+
else:
|
|
2375
|
+
# apparently there are some tests that send a tuple despite the signature asking for a list
|
|
2376
|
+
queried_columns = list(field_names)
|
|
2377
|
+
|
|
2378
|
+
column_names_in_predicate = set() if predicate is None else _column_names_in_node_tree(predicate)
|
|
2379
|
+
column_names_required_by_predicate = column_names_in_predicate - set(queried_columns)
|
|
2380
|
+
queried_columns.extend(column_names_required_by_predicate)
|
|
2381
|
+
schema = pa.schema((schema.field(name) for name in queried_columns))
|
|
2382
|
+
|
|
2369
2383
|
builder = flatbuffers.Builder(1024)
|
|
2370
2384
|
|
|
2371
2385
|
source_name = builder.CreateString('') # required
|
|
2372
2386
|
|
|
2373
|
-
fields = [build_field(builder, f
|
|
2387
|
+
fields = [build_field(builder, f) for f in schema]
|
|
2374
2388
|
|
|
2375
2389
|
fb_schema.StartFieldsVector(builder, len(fields))
|
|
2376
2390
|
for offset in reversed(fields):
|
|
@@ -2387,18 +2401,17 @@ def build_query_data_request(schema: 'pa.Schema' = pa.schema([]), predicate: ibi
|
|
|
2387
2401
|
parser = QueryDataParser(schema)
|
|
2388
2402
|
leaves_map = {node.field.name: [leaf.index for leaf in node._iter_leaves()] for node in parser.nodes}
|
|
2389
2403
|
|
|
2390
|
-
response_schema = get_response_schema(schema, field_names)
|
|
2391
|
-
field_names = [field.name for field in response_schema]
|
|
2392
|
-
|
|
2393
2404
|
projection_fields = []
|
|
2394
|
-
for
|
|
2405
|
+
for field in schema:
|
|
2395
2406
|
# TODO: only root-level projection pushdown is supported (i.e. no support for SELECT s.x FROM t)
|
|
2396
|
-
positions = leaves_map[
|
|
2407
|
+
positions = leaves_map[field.name]
|
|
2408
|
+
|
|
2397
2409
|
for leaf_position in positions:
|
|
2398
2410
|
fb_field_index.Start(builder)
|
|
2399
2411
|
fb_field_index.AddPosition(builder, leaf_position)
|
|
2400
2412
|
offset = fb_field_index.End(builder)
|
|
2401
2413
|
projection_fields.append(offset)
|
|
2414
|
+
|
|
2402
2415
|
fb_source.StartProjectionVector(builder, len(projection_fields))
|
|
2403
2416
|
for offset in reversed(projection_fields):
|
|
2404
2417
|
builder.PrependUOffsetTRelative(offset)
|
|
@@ -2418,4 +2431,4 @@ def build_query_data_request(schema: 'pa.Schema' = pa.schema([]), predicate: ibi
|
|
|
2418
2431
|
|
|
2419
2432
|
builder.Finish(relation)
|
|
2420
2433
|
|
|
2421
|
-
return QueryDataRequest(serialized=builder.Output(), response_schema=
|
|
2434
|
+
return QueryDataRequest(serialized=builder.Output(), response_schema=schema, response_parser=QueryDataParser(schema))
|