vastdb 0.1.4__py3-none-any.whl → 0.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vastdb/bench/test_perf.py +1 -2
- vastdb/bucket.py +12 -37
- vastdb/conftest.py +13 -4
- vastdb/errors.py +5 -1
- vastdb/internal_commands.py +26 -90
- vastdb/schema.py +45 -0
- vastdb/session.py +26 -10
- vastdb/table.py +48 -8
- vastdb/tests/test_imports.py +3 -3
- vastdb/tests/test_nested.py +77 -3
- vastdb/tests/test_projections.py +78 -0
- vastdb/tests/test_schemas.py +49 -0
- vastdb/tests/test_tables.py +123 -95
- vastdb/transaction.py +24 -15
- vastdb/util.py +1 -0
- {vastdb-0.1.4.dist-info → vastdb-0.1.6.dist-info}/METADATA +1 -1
- {vastdb-0.1.4.dist-info → vastdb-0.1.6.dist-info}/RECORD +20 -20
- {vastdb-0.1.4.dist-info → vastdb-0.1.6.dist-info}/LICENSE +0 -0
- {vastdb-0.1.4.dist-info → vastdb-0.1.6.dist-info}/WHEEL +0 -0
- {vastdb-0.1.4.dist-info → vastdb-0.1.6.dist-info}/top_level.txt +0 -0
vastdb/tests/test_nested.py
CHANGED
|
@@ -1,11 +1,15 @@
|
|
|
1
|
+
import functools
|
|
1
2
|
import itertools
|
|
3
|
+
import operator
|
|
2
4
|
|
|
3
5
|
import pyarrow as pa
|
|
6
|
+
import pyarrow.compute as pc
|
|
7
|
+
import pytest
|
|
4
8
|
|
|
5
9
|
from .util import prepare_data
|
|
6
10
|
|
|
7
11
|
|
|
8
|
-
def
|
|
12
|
+
def test_nested_select(session, clean_bucket_name):
|
|
9
13
|
columns = pa.schema([
|
|
10
14
|
('l', pa.list_(pa.int8())),
|
|
11
15
|
('m', pa.map_(pa.utf8(), pa.float64())),
|
|
@@ -18,11 +22,81 @@ def test_nested(session, clean_bucket_name):
|
|
|
18
22
|
])
|
|
19
23
|
|
|
20
24
|
with prepare_data(session, clean_bucket_name, 's', 't', expected) as t:
|
|
21
|
-
actual =
|
|
25
|
+
actual = t.select().read_all()
|
|
22
26
|
assert actual == expected
|
|
23
27
|
|
|
24
28
|
names = [f.name for f in columns]
|
|
25
29
|
for n in range(len(names) + 1):
|
|
26
30
|
for cols in itertools.permutations(names, n):
|
|
27
|
-
actual =
|
|
31
|
+
actual = t.select(columns=cols).read_all()
|
|
28
32
|
assert actual == expected.select(cols)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def test_nested_filter(session, clean_bucket_name):
|
|
36
|
+
columns = pa.schema([
|
|
37
|
+
('x', pa.int64()),
|
|
38
|
+
('l', pa.list_(pa.int8())),
|
|
39
|
+
('y', pa.int64()),
|
|
40
|
+
('m', pa.map_(pa.utf8(), pa.float64())),
|
|
41
|
+
('z', pa.int64()),
|
|
42
|
+
('s', pa.struct([('x', pa.int16()), ('y', pa.int32())])),
|
|
43
|
+
('w', pa.int64()),
|
|
44
|
+
])
|
|
45
|
+
expected = pa.table(schema=columns, data=[
|
|
46
|
+
[1, 2, 3, None],
|
|
47
|
+
[[1], [], [2, 3], None],
|
|
48
|
+
[1, 2, None, 3],
|
|
49
|
+
[None, {'a': 2.5}, {'b': 0.25, 'c': 0.025}, {}],
|
|
50
|
+
[1, None, 2, 3],
|
|
51
|
+
[{'x': 1, 'y': None}, None, {'x': 2, 'y': 3}, {'x': None, 'y': 4}],
|
|
52
|
+
[None, 1, 2, 3],
|
|
53
|
+
])
|
|
54
|
+
|
|
55
|
+
with prepare_data(session, clean_bucket_name, 's', 't', expected) as t:
|
|
56
|
+
actual = t.select().read_all()
|
|
57
|
+
assert actual == expected
|
|
58
|
+
|
|
59
|
+
names = list('xyzw')
|
|
60
|
+
for n in range(1, len(names) + 1):
|
|
61
|
+
for cols in itertools.permutations(names, n):
|
|
62
|
+
ibis_predicate = functools.reduce(
|
|
63
|
+
operator.and_,
|
|
64
|
+
(t[col] > 2 for col in cols))
|
|
65
|
+
actual = t.select(predicate=ibis_predicate).read_all()
|
|
66
|
+
|
|
67
|
+
arrow_predicate = functools.reduce(
|
|
68
|
+
operator.and_,
|
|
69
|
+
(pc.field(col) > 2 for col in cols))
|
|
70
|
+
assert actual == expected.filter(arrow_predicate)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def test_nested_unsupported_filter(session, clean_bucket_name):
|
|
74
|
+
columns = pa.schema([
|
|
75
|
+
('x', pa.int64()),
|
|
76
|
+
('l', pa.list_(pa.int8())),
|
|
77
|
+
('y', pa.int64()),
|
|
78
|
+
('m', pa.map_(pa.utf8(), pa.float64())),
|
|
79
|
+
('z', pa.int64()),
|
|
80
|
+
('s', pa.struct([('x', pa.int16()), ('y', pa.int32())])),
|
|
81
|
+
('w', pa.int64()),
|
|
82
|
+
])
|
|
83
|
+
expected = pa.table(schema=columns, data=[
|
|
84
|
+
[1, 2, 3, None],
|
|
85
|
+
[[1], [], [2, 3], None],
|
|
86
|
+
[1, 2, None, 3],
|
|
87
|
+
[None, {'a': 2.5}, {'b': 0.25, 'c': 0.025}, {}],
|
|
88
|
+
[1, None, 2, 3],
|
|
89
|
+
[{'x': 1, 'y': None}, None, {'x': 2, 'y': 3}, {'x': None, 'y': 4}],
|
|
90
|
+
[None, 1, 2, 3],
|
|
91
|
+
])
|
|
92
|
+
|
|
93
|
+
with prepare_data(session, clean_bucket_name, 's', 't', expected) as t:
|
|
94
|
+
|
|
95
|
+
with pytest.raises(NotImplementedError):
|
|
96
|
+
list(t.select(predicate=(t['l'].isnull())))
|
|
97
|
+
|
|
98
|
+
with pytest.raises(NotImplementedError):
|
|
99
|
+
list(t.select(predicate=(t['m'].isnull())))
|
|
100
|
+
|
|
101
|
+
with pytest.raises(NotImplementedError):
|
|
102
|
+
list(t.select(predicate=(t['s'].isnull())))
|
vastdb/tests/test_projections.py
CHANGED
|
@@ -1,7 +1,10 @@
|
|
|
1
1
|
import logging
|
|
2
|
+
import time
|
|
2
3
|
|
|
3
4
|
import pyarrow as pa
|
|
4
5
|
|
|
6
|
+
from vastdb.table import QueryConfig
|
|
7
|
+
|
|
5
8
|
log = logging.getLogger(__name__)
|
|
6
9
|
|
|
7
10
|
|
|
@@ -41,3 +44,78 @@ def test_basic_projections(session, clean_bucket_name):
|
|
|
41
44
|
projs = t.projections()
|
|
42
45
|
assert len(projs) == 1
|
|
43
46
|
assert projs[0].name == 'p_new'
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def test_query_data_with_projection(session, clean_bucket_name):
|
|
50
|
+
columns = pa.schema([
|
|
51
|
+
('a', pa.int64()),
|
|
52
|
+
('b', pa.int64()),
|
|
53
|
+
('s', pa.utf8()),
|
|
54
|
+
])
|
|
55
|
+
# need to be large enough in order to consider as projection
|
|
56
|
+
|
|
57
|
+
GROUP_SIZE = 128 * 1024
|
|
58
|
+
expected = pa.table(schema=columns, data=[
|
|
59
|
+
[i for i in range(GROUP_SIZE)],
|
|
60
|
+
[i for i in reversed(range(GROUP_SIZE))],
|
|
61
|
+
[f's{i}' for i in range(GROUP_SIZE)],
|
|
62
|
+
])
|
|
63
|
+
|
|
64
|
+
expected_projection_p1 = pa.table(schema=columns, data=[
|
|
65
|
+
[i for i in reversed(range(GROUP_SIZE - 5, GROUP_SIZE))],
|
|
66
|
+
[i for i in range(5)],
|
|
67
|
+
[f's{i}' for i in reversed(range(GROUP_SIZE - 5, GROUP_SIZE))],
|
|
68
|
+
])
|
|
69
|
+
|
|
70
|
+
expected_projection_p2 = pa.table(schema=columns, data=[
|
|
71
|
+
[i for i in range(GROUP_SIZE - 5, GROUP_SIZE)],
|
|
72
|
+
[i for i in reversed(range(5))],
|
|
73
|
+
[f's{i}' for i in range(GROUP_SIZE - 5, GROUP_SIZE)],
|
|
74
|
+
])
|
|
75
|
+
|
|
76
|
+
schema_name = "schema"
|
|
77
|
+
table_name = "table"
|
|
78
|
+
with session.transaction() as tx:
|
|
79
|
+
s = tx.bucket(clean_bucket_name).create_schema(schema_name)
|
|
80
|
+
t = s.create_table(table_name, expected.schema)
|
|
81
|
+
|
|
82
|
+
sorted_columns = ['b']
|
|
83
|
+
unsorted_columns = ['a', 's']
|
|
84
|
+
t.create_projection('p1', sorted_columns, unsorted_columns)
|
|
85
|
+
|
|
86
|
+
sorted_columns = ['a']
|
|
87
|
+
unsorted_columns = ['b', 's']
|
|
88
|
+
t.create_projection('p2', sorted_columns, unsorted_columns)
|
|
89
|
+
|
|
90
|
+
with session.transaction() as tx:
|
|
91
|
+
s = tx.bucket(clean_bucket_name).schema(schema_name)
|
|
92
|
+
t = s.table(table_name)
|
|
93
|
+
t.insert(expected)
|
|
94
|
+
actual = pa.Table.from_batches(t.select(columns=['a', 'b', 's']))
|
|
95
|
+
assert actual == expected
|
|
96
|
+
|
|
97
|
+
time.sleep(3)
|
|
98
|
+
|
|
99
|
+
with session.transaction() as tx:
|
|
100
|
+
config = QueryConfig()
|
|
101
|
+
# in nfs mock server num row groups per row block is 1 so need to change this in the config
|
|
102
|
+
config.num_row_groups_per_sub_split = 1
|
|
103
|
+
|
|
104
|
+
s = tx.bucket(clean_bucket_name).schema(schema_name)
|
|
105
|
+
t = s.table(table_name)
|
|
106
|
+
projection_actual = pa.Table.from_batches(t.select(columns=['a', 'b', 's'], predicate=(t['b'] < 5), config=config))
|
|
107
|
+
# no projection supply - need to be with p1 projeciton
|
|
108
|
+
assert expected_projection_p1 == projection_actual
|
|
109
|
+
|
|
110
|
+
config.semi_sorted_projection_name = 'p1'
|
|
111
|
+
projection_actual = pa.Table.from_batches(t.select(columns=['a', 'b', 's'], predicate=(t['b'] < 5), config=config))
|
|
112
|
+
# expecting results of projection p1 since we asked it specificaly
|
|
113
|
+
assert expected_projection_p1 == projection_actual
|
|
114
|
+
|
|
115
|
+
config.semi_sorted_projection_name = 'p2'
|
|
116
|
+
projection_actual = pa.Table.from_batches(t.select(columns=['a', 'b', 's'], predicate=(t['b'] < 5), config=config))
|
|
117
|
+
# expecting results of projection p2 since we asked it specificaly
|
|
118
|
+
assert expected_projection_p2 == projection_actual
|
|
119
|
+
|
|
120
|
+
t.drop()
|
|
121
|
+
s.drop()
|
vastdb/tests/test_schemas.py
CHANGED
|
@@ -61,3 +61,52 @@ def test_list_snapshots(session, clean_bucket_name):
|
|
|
61
61
|
with session.transaction() as tx:
|
|
62
62
|
b = tx.bucket(clean_bucket_name)
|
|
63
63
|
b.snapshots() # VAST Catalog may create some snapshots
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def test_nested_schemas(session, clean_bucket_name):
|
|
67
|
+
with session.transaction() as tx:
|
|
68
|
+
b = tx.bucket(clean_bucket_name)
|
|
69
|
+
s1 = b.create_schema('s1')
|
|
70
|
+
s1_s2 = s1.create_schema('s2')
|
|
71
|
+
s1_s3 = s1.create_schema('s3')
|
|
72
|
+
s1_s3_s4 = s1_s3.create_schema('s4')
|
|
73
|
+
s5 = b.create_schema('s5')
|
|
74
|
+
|
|
75
|
+
assert b.schema('s1') == s1
|
|
76
|
+
assert s1.schema('s2') == s1_s2
|
|
77
|
+
assert s1.schema('s3') == s1_s3
|
|
78
|
+
assert s1_s3.schema('s4') == s1_s3_s4
|
|
79
|
+
assert b.schema('s5') == s5
|
|
80
|
+
|
|
81
|
+
assert b.schemas() == [s1, s5]
|
|
82
|
+
assert s1.schemas() == [s1_s2, s1_s3]
|
|
83
|
+
assert s1_s2.schemas() == []
|
|
84
|
+
assert s1_s3.schemas() == [s1_s3_s4]
|
|
85
|
+
assert s1_s3_s4.schemas() == []
|
|
86
|
+
assert s5.schemas() == []
|
|
87
|
+
|
|
88
|
+
s1_s3_s4.drop()
|
|
89
|
+
assert s1_s3.schemas() == []
|
|
90
|
+
s1_s3.drop()
|
|
91
|
+
assert s1.schemas() == [s1_s2]
|
|
92
|
+
s1_s2.drop()
|
|
93
|
+
assert s1.schemas() == []
|
|
94
|
+
|
|
95
|
+
assert b.schemas() == [s1, s5]
|
|
96
|
+
s1.drop()
|
|
97
|
+
assert b.schemas() == [s5]
|
|
98
|
+
s5.drop()
|
|
99
|
+
assert b.schemas() == []
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def test_schema_pagination(session, clean_bucket_name):
|
|
103
|
+
with session.transaction() as tx:
|
|
104
|
+
b = tx.bucket(clean_bucket_name)
|
|
105
|
+
names = [f's{i}' for i in range(10)]
|
|
106
|
+
schemas = [b.create_schema(name) for name in names]
|
|
107
|
+
assert b.schemas(batch_size=3) == schemas
|
|
108
|
+
|
|
109
|
+
s0 = b.schema('s0')
|
|
110
|
+
names = [f'q{i}' for i in range(10)]
|
|
111
|
+
subschemas = [s0.create_schema(name) for name in names]
|
|
112
|
+
assert s0.schemas(batch_size=3) == subschemas
|
vastdb/tests/test_tables.py
CHANGED
|
@@ -3,10 +3,10 @@ import decimal
|
|
|
3
3
|
import logging
|
|
4
4
|
import random
|
|
5
5
|
import threading
|
|
6
|
-
import time
|
|
7
6
|
from contextlib import closing
|
|
8
7
|
from tempfile import NamedTemporaryFile
|
|
9
8
|
|
|
9
|
+
import ibis
|
|
10
10
|
import pyarrow as pa
|
|
11
11
|
import pyarrow.compute as pc
|
|
12
12
|
import pyarrow.parquet as pq
|
|
@@ -32,25 +32,25 @@ def test_tables(session, clean_bucket_name):
|
|
|
32
32
|
['a', 'bb', 'ccc'],
|
|
33
33
|
])
|
|
34
34
|
with prepare_data(session, clean_bucket_name, 's', 't', expected) as t:
|
|
35
|
-
actual =
|
|
35
|
+
actual = t.select(columns=['a', 'b', 's']).read_all()
|
|
36
36
|
assert actual == expected
|
|
37
37
|
|
|
38
|
-
actual =
|
|
38
|
+
actual = t.select().read_all()
|
|
39
39
|
assert actual == expected
|
|
40
40
|
|
|
41
|
-
actual =
|
|
41
|
+
actual = t.select(columns=['a', 'b']).read_all()
|
|
42
42
|
assert actual == expected.select(['a', 'b'])
|
|
43
43
|
|
|
44
|
-
actual =
|
|
44
|
+
actual = t.select(columns=['b', 's', 'a']).read_all()
|
|
45
45
|
assert actual == expected.select(['b', 's', 'a'])
|
|
46
46
|
|
|
47
|
-
actual =
|
|
47
|
+
actual = t.select(columns=['s']).read_all()
|
|
48
48
|
assert actual == expected.select(['s'])
|
|
49
49
|
|
|
50
|
-
actual =
|
|
50
|
+
actual = t.select(columns=[]).read_all()
|
|
51
51
|
assert actual == expected.select([])
|
|
52
52
|
|
|
53
|
-
actual =
|
|
53
|
+
actual = t.select(columns=['s'], internal_row_id=True).read_all()
|
|
54
54
|
log.debug("actual=%s", actual)
|
|
55
55
|
assert actual.to_pydict() == {
|
|
56
56
|
's': ['a', 'bb', 'ccc'],
|
|
@@ -61,9 +61,9 @@ def test_tables(session, clean_bucket_name):
|
|
|
61
61
|
rb = pa.record_batch(schema=columns_to_delete, data=[[0]]) # delete rows 0,1
|
|
62
62
|
t.delete(rb)
|
|
63
63
|
|
|
64
|
-
selected_rows =
|
|
64
|
+
selected_rows = t.select(columns=['b'], predicate=(t['a'] == 222), internal_row_id=True).read_all()
|
|
65
65
|
t.delete(selected_rows)
|
|
66
|
-
actual =
|
|
66
|
+
actual = t.select(columns=['a', 'b', 's']).read_all()
|
|
67
67
|
assert actual.to_pydict() == {
|
|
68
68
|
'a': [333],
|
|
69
69
|
'b': [2.5],
|
|
@@ -77,7 +77,7 @@ def test_insert_wide_row(session, clean_bucket_name):
|
|
|
77
77
|
expected = pa.table(schema=columns, data=data)
|
|
78
78
|
|
|
79
79
|
with prepare_data(session, clean_bucket_name, 's', 't', expected) as t:
|
|
80
|
-
actual =
|
|
80
|
+
actual = t.select().read_all()
|
|
81
81
|
assert actual == expected
|
|
82
82
|
|
|
83
83
|
|
|
@@ -124,33 +124,33 @@ def test_update_table(session, clean_bucket_name):
|
|
|
124
124
|
])
|
|
125
125
|
|
|
126
126
|
t.update(rb)
|
|
127
|
-
actual =
|
|
127
|
+
actual = t.select(columns=['a', 'b']).read_all()
|
|
128
128
|
assert actual.to_pydict() == {
|
|
129
129
|
'a': [1110, 222, 3330],
|
|
130
130
|
'b': [0.5, 1.5, 2.5]
|
|
131
131
|
}
|
|
132
132
|
|
|
133
|
-
actual =
|
|
133
|
+
actual = t.select(columns=['a', 'b'], predicate=(t['a'] < 1000), internal_row_id=True).read_all()
|
|
134
134
|
column_index = actual.column_names.index('a')
|
|
135
135
|
column_field = actual.field(column_index)
|
|
136
136
|
new_data = pc.add(actual.column('a'), 2000)
|
|
137
137
|
update_table = actual.set_column(column_index, column_field, new_data)
|
|
138
138
|
|
|
139
139
|
t.update(update_table, columns=['a'])
|
|
140
|
-
actual =
|
|
140
|
+
actual = t.select(columns=['a', 'b']).read_all()
|
|
141
141
|
assert actual.to_pydict() == {
|
|
142
142
|
'a': [1110, 2222, 3330],
|
|
143
143
|
'b': [0.5, 1.5, 2.5]
|
|
144
144
|
}
|
|
145
145
|
|
|
146
|
-
actual =
|
|
146
|
+
actual = t.select(columns=['a', 'b'], predicate=(t['a'] != 2222), internal_row_id=True).read_all()
|
|
147
147
|
column_index = actual.column_names.index('a')
|
|
148
148
|
column_field = actual.field(column_index)
|
|
149
149
|
new_data = pc.divide(actual.column('a'), 10)
|
|
150
150
|
update_table = actual.set_column(column_index, column_field, new_data)
|
|
151
151
|
|
|
152
152
|
t.update(update_table.to_batches()[0], columns=['a'])
|
|
153
|
-
actual =
|
|
153
|
+
actual = t.select(columns=['a', 'b']).read_all()
|
|
154
154
|
assert actual.to_pydict() == {
|
|
155
155
|
'a': [111, 2222, 333],
|
|
156
156
|
'b': [0.5, 1.5, 2.5]
|
|
@@ -170,7 +170,7 @@ def test_select_with_multisplits(session, clean_bucket_name):
|
|
|
170
170
|
config.rows_per_split = 1000
|
|
171
171
|
|
|
172
172
|
with prepare_data(session, clean_bucket_name, 's', 't', expected) as t:
|
|
173
|
-
actual =
|
|
173
|
+
actual = t.select(columns=['a'], config=config).read_all()
|
|
174
174
|
assert actual == expected
|
|
175
175
|
|
|
176
176
|
|
|
@@ -215,46 +215,47 @@ def test_types(session, clean_bucket_name):
|
|
|
215
215
|
[dt.datetime(2024, 4, 10, 12, 34, 56, 789789), dt.datetime(2025, 4, 10, 12, 34, 56, 789789), dt.datetime(2026, 4, 10, 12, 34, 56, 789789)],
|
|
216
216
|
])
|
|
217
217
|
|
|
218
|
-
with prepare_data(session, clean_bucket_name, 's', 't', expected) as
|
|
218
|
+
with prepare_data(session, clean_bucket_name, 's', 't', expected) as table:
|
|
219
219
|
def select(predicate):
|
|
220
|
-
return
|
|
220
|
+
return table.select(predicate=predicate).read_all()
|
|
221
221
|
|
|
222
222
|
assert select(None) == expected
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
223
|
+
for t in [table, ibis._]:
|
|
224
|
+
assert select(t['tb'] == False) == expected.filter(pc.field('tb') == False) # noqa: E712
|
|
225
|
+
assert select(t['a1'] == 2) == expected.filter(pc.field('a1') == 2)
|
|
226
|
+
assert select(t['a2'] == 2000) == expected.filter(pc.field('a2') == 2000)
|
|
227
|
+
assert select(t['a4'] == 222111122) == expected.filter(pc.field('a4') == 222111122)
|
|
228
|
+
assert select(t['b'] == 1.5) == expected.filter(pc.field('b') == 1.5)
|
|
229
|
+
assert select(t['s'] == "v") == expected.filter(pc.field('s') == "v")
|
|
230
|
+
assert select(t['d'] == 231.15) == expected.filter(pc.field('d') == 231.15)
|
|
231
|
+
assert select(t['bin'] == b"\x01\x02") == expected.filter(pc.field('bin') == b"\x01\x02")
|
|
231
232
|
|
|
232
|
-
|
|
233
|
-
|
|
233
|
+
date_literal = dt.date(2024, 4, 10)
|
|
234
|
+
assert select(t['date'] == date_literal) == expected.filter(pc.field('date') == date_literal)
|
|
234
235
|
|
|
235
|
-
|
|
236
|
-
|
|
236
|
+
time_literal = dt.time(12, 34, 56)
|
|
237
|
+
assert select(t['t0'] == time_literal) == expected.filter(pc.field('t0') == time_literal)
|
|
237
238
|
|
|
238
|
-
|
|
239
|
-
|
|
239
|
+
time_literal = dt.time(12, 34, 56, 789000)
|
|
240
|
+
assert select(t['t3'] == time_literal) == expected.filter(pc.field('t3') == time_literal)
|
|
240
241
|
|
|
241
|
-
|
|
242
|
-
|
|
242
|
+
time_literal = dt.time(12, 34, 56, 789789)
|
|
243
|
+
assert select(t['t6'] == time_literal) == expected.filter(pc.field('t6') == time_literal)
|
|
243
244
|
|
|
244
|
-
|
|
245
|
-
|
|
245
|
+
time_literal = dt.time(12, 34, 56, 789789)
|
|
246
|
+
assert select(t['t9'] == time_literal) == expected.filter(pc.field('t9') == time_literal)
|
|
246
247
|
|
|
247
|
-
|
|
248
|
-
|
|
248
|
+
ts_literal = dt.datetime(2024, 4, 10, 12, 34, 56)
|
|
249
|
+
assert select(t['ts0'] == ts_literal) == expected.filter(pc.field('ts0') == ts_literal)
|
|
249
250
|
|
|
250
|
-
|
|
251
|
-
|
|
251
|
+
ts_literal = dt.datetime(2024, 4, 10, 12, 34, 56, 789000)
|
|
252
|
+
assert select(t['ts3'] == ts_literal) == expected.filter(pc.field('ts3') == ts_literal)
|
|
252
253
|
|
|
253
|
-
|
|
254
|
-
|
|
254
|
+
ts_literal = dt.datetime(2024, 4, 10, 12, 34, 56, 789789)
|
|
255
|
+
assert select(t['ts6'] == ts_literal) == expected.filter(pc.field('ts6') == ts_literal)
|
|
255
256
|
|
|
256
|
-
|
|
257
|
-
|
|
257
|
+
ts_literal = dt.datetime(2024, 4, 10, 12, 34, 56, 789789)
|
|
258
|
+
assert select(t['ts9'] == ts_literal) == expected.filter(pc.field('ts9') == ts_literal)
|
|
258
259
|
|
|
259
260
|
|
|
260
261
|
def test_filters(session, clean_bucket_name):
|
|
@@ -270,62 +271,70 @@ def test_filters(session, clean_bucket_name):
|
|
|
270
271
|
['a', 'bb', 'ccc', None, 'xyz'],
|
|
271
272
|
])
|
|
272
273
|
|
|
273
|
-
with prepare_data(session, clean_bucket_name, 's', 't', expected) as
|
|
274
|
+
with prepare_data(session, clean_bucket_name, 's', 't', expected) as table:
|
|
274
275
|
def select(predicate):
|
|
275
|
-
return
|
|
276
|
+
return table.select(predicate=predicate).read_all()
|
|
276
277
|
|
|
277
278
|
assert select(None) == expected
|
|
278
279
|
assert select(True) == expected
|
|
279
280
|
assert select(False) == pa.Table.from_batches([], schema=columns)
|
|
280
281
|
|
|
281
|
-
|
|
282
|
-
|
|
282
|
+
for t in [table, ibis._]:
|
|
283
|
+
|
|
284
|
+
select(t['a'].isin(list(range(100))))
|
|
285
|
+
select(t['a'].isin(list(range(1000))))
|
|
286
|
+
select(t['a'].isin(list(range(10000))))
|
|
287
|
+
with pytest.raises(errors.TooLargeRequest):
|
|
288
|
+
select(t['a'].isin(list(range(100000))))
|
|
283
289
|
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
assert select(t['a'] == 222) == expected.filter(pc.field('a') == 222)
|
|
287
|
-
assert select(t['a'] != 222) == expected.filter(pc.field('a') != 222)
|
|
288
|
-
assert select(t['a'] <= 222) == expected.filter(pc.field('a') <= 222)
|
|
289
|
-
assert select(t['a'] >= 222) == expected.filter(pc.field('a') >= 222)
|
|
290
|
+
assert select(t['a'].between(222, 444)) == expected.filter((pc.field('a') >= 222) & (pc.field('a') <= 444))
|
|
291
|
+
assert select((t['a'].between(222, 444)) & (t['b'] > 2.5)) == expected.filter((pc.field('a') >= 222) & (pc.field('a') <= 444) & (pc.field('b') > 2.5))
|
|
290
292
|
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
293
|
+
assert select(t['a'] > 222) == expected.filter(pc.field('a') > 222)
|
|
294
|
+
assert select(t['a'] < 222) == expected.filter(pc.field('a') < 222)
|
|
295
|
+
assert select(t['a'] == 222) == expected.filter(pc.field('a') == 222)
|
|
296
|
+
assert select(t['a'] != 222) == expected.filter(pc.field('a') != 222)
|
|
297
|
+
assert select(t['a'] <= 222) == expected.filter(pc.field('a') <= 222)
|
|
298
|
+
assert select(t['a'] >= 222) == expected.filter(pc.field('a') >= 222)
|
|
297
299
|
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
300
|
+
assert select(t['b'] > 1.5) == expected.filter(pc.field('b') > 1.5)
|
|
301
|
+
assert select(t['b'] < 1.5) == expected.filter(pc.field('b') < 1.5)
|
|
302
|
+
assert select(t['b'] == 1.5) == expected.filter(pc.field('b') == 1.5)
|
|
303
|
+
assert select(t['b'] != 1.5) == expected.filter(pc.field('b') != 1.5)
|
|
304
|
+
assert select(t['b'] <= 1.5) == expected.filter(pc.field('b') <= 1.5)
|
|
305
|
+
assert select(t['b'] >= 1.5) == expected.filter(pc.field('b') >= 1.5)
|
|
304
306
|
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
307
|
+
assert select(t['s'] > 'bb') == expected.filter(pc.field('s') > 'bb')
|
|
308
|
+
assert select(t['s'] < 'bb') == expected.filter(pc.field('s') < 'bb')
|
|
309
|
+
assert select(t['s'] == 'bb') == expected.filter(pc.field('s') == 'bb')
|
|
310
|
+
assert select(t['s'] != 'bb') == expected.filter(pc.field('s') != 'bb')
|
|
311
|
+
assert select(t['s'] <= 'bb') == expected.filter(pc.field('s') <= 'bb')
|
|
312
|
+
assert select(t['s'] >= 'bb') == expected.filter(pc.field('s') >= 'bb')
|
|
308
313
|
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
assert select((t['a'] > 111) | (t['b'] > 0) | (t['s'] < 'ccc')) == expected.filter((pc.field('a') > 111) | (pc.field('b') > 0) | (pc.field('s') < 'ccc'))
|
|
313
|
-
assert select((t['a'] > 111) | (t['a'] < 333) | (t['a'] == 777)) == expected.filter((pc.field('a') > 111) | (pc.field('a') < 333) | (pc.field('a') == 777))
|
|
314
|
+
assert select((t['a'] > 111) & (t['b'] > 0) & (t['s'] < 'ccc')) == expected.filter((pc.field('a') > 111) & (pc.field('b') > 0) & (pc.field('s') < 'ccc'))
|
|
315
|
+
assert select((t['a'] > 111) & (t['b'] < 2.5)) == expected.filter((pc.field('a') > 111) & (pc.field('b') < 2.5))
|
|
316
|
+
assert select((t['a'] > 111) & (t['a'] < 333)) == expected.filter((pc.field('a') > 111) & (pc.field('a') < 333))
|
|
314
317
|
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
+
assert select((t['a'] > 111) | (t['a'] < 333)) == expected.filter((pc.field('a') > 111) | (pc.field('a') < 333))
|
|
319
|
+
assert select(((t['a'] > 111) | (t['a'] < 333)) & (t['b'] < 2.5)) == expected.filter(((pc.field('a') > 111) | (pc.field('a') < 333)) & (pc.field('b') < 2.5))
|
|
320
|
+
with pytest.raises(NotImplementedError):
|
|
321
|
+
assert select((t['a'] > 111) | (t['b'] > 0) | (t['s'] < 'ccc')) == expected.filter((pc.field('a') > 111) | (pc.field('b') > 0) | (pc.field('s') < 'ccc'))
|
|
322
|
+
assert select((t['a'] > 111) | (t['a'] < 333) | (t['a'] == 777)) == expected.filter((pc.field('a') > 111) | (pc.field('a') < 333) | (pc.field('a') == 777))
|
|
318
323
|
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
324
|
+
assert select(t['s'].isnull()) == expected.filter(pc.field('s').is_null())
|
|
325
|
+
assert select((t['s'].isnull()) | (t['s'] == 'bb')) == expected.filter((pc.field('s').is_null()) | (pc.field('s') == 'bb'))
|
|
326
|
+
assert select((t['s'].isnull()) & (t['b'] == 3.5)) == expected.filter((pc.field('s').is_null()) & (pc.field('b') == 3.5))
|
|
322
327
|
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
328
|
+
assert select(~t['s'].isnull()) == expected.filter(~pc.field('s').is_null())
|
|
329
|
+
assert select(t['s'].contains('b')) == expected.filter(pc.field('s') == 'bb')
|
|
330
|
+
assert select(t['s'].contains('y')) == expected.filter(pc.field('s') == 'xyz')
|
|
326
331
|
|
|
327
|
-
|
|
328
|
-
select(t['a'].isin([]))
|
|
332
|
+
assert select(t['a'].isin([555])) == expected.filter(pc.field('a').isin([555]))
|
|
333
|
+
assert select(t['a'].isin([111, 222, 999])) == expected.filter(pc.field('a').isin([111, 222, 999]))
|
|
334
|
+
assert select((t['a'] == 111) | t['a'].isin([333, 444]) | (t['a'] > 600)) == expected.filter((pc.field('a') == 111) | pc.field('a').isin([333, 444]) | (pc.field('a') > 600))
|
|
335
|
+
|
|
336
|
+
with pytest.raises(NotImplementedError):
|
|
337
|
+
select(t['a'].isin([]))
|
|
329
338
|
|
|
330
339
|
|
|
331
340
|
def test_parquet_export(session, clean_bucket_name):
|
|
@@ -348,7 +357,7 @@ def test_parquet_export(session, clean_bucket_name):
|
|
|
348
357
|
expected = pa.Table.from_batches([rb])
|
|
349
358
|
rb = t.insert(rb)
|
|
350
359
|
assert rb.to_pylist() == [0, 1]
|
|
351
|
-
actual =
|
|
360
|
+
actual = t.select().read_all()
|
|
352
361
|
assert actual == expected
|
|
353
362
|
|
|
354
363
|
table_batches = t.select()
|
|
@@ -664,18 +673,37 @@ def test_select_stop(session, clean_bucket_name):
|
|
|
664
673
|
assert active_threads() == 0
|
|
665
674
|
|
|
666
675
|
|
|
667
|
-
def
|
|
676
|
+
def test_catalog_select(session, clean_bucket_name):
|
|
668
677
|
with session.transaction() as tx:
|
|
669
678
|
bc = tx.catalog()
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
679
|
+
assert bc.columns()
|
|
680
|
+
rows = bc.select(['name']).read_all()
|
|
681
|
+
assert len(rows) > 0, rows
|
|
682
|
+
|
|
673
683
|
|
|
684
|
+
class NotReady(Exception):
|
|
685
|
+
pass
|
|
674
686
|
|
|
687
|
+
|
|
688
|
+
@pytest.mark.flaky(retries=30, delay=1, only_on=[NotReady])
|
|
675
689
|
def test_audit_log_select(session, clean_bucket_name):
|
|
676
690
|
with session.transaction() as tx:
|
|
677
691
|
a = tx.audit_log()
|
|
678
|
-
a.columns()
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
692
|
+
assert a.columns()
|
|
693
|
+
rows = a.select().read_all()
|
|
694
|
+
if len(rows) == 0:
|
|
695
|
+
raise NotReady
|
|
696
|
+
|
|
697
|
+
|
|
698
|
+
@pytest.mark.flaky(retries=30, delay=1, only_on=[NotReady])
|
|
699
|
+
def test_catalog_snapshots_select(session, clean_bucket_name):
|
|
700
|
+
with session.transaction() as tx:
|
|
701
|
+
snaps = tx.catalog_snapshots()
|
|
702
|
+
if not snaps:
|
|
703
|
+
raise NotReady
|
|
704
|
+
latest = snaps[-1]
|
|
705
|
+
t = tx.catalog(latest)
|
|
706
|
+
assert t.columns()
|
|
707
|
+
rows = t.select().read_all()
|
|
708
|
+
if not rows:
|
|
709
|
+
raise NotReady
|