vastdb 0.1.4__py3-none-any.whl → 0.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,11 +1,15 @@
1
+ import functools
1
2
  import itertools
3
+ import operator
2
4
 
3
5
  import pyarrow as pa
6
+ import pyarrow.compute as pc
7
+ import pytest
4
8
 
5
9
  from .util import prepare_data
6
10
 
7
11
 
8
- def test_nested(session, clean_bucket_name):
12
+ def test_nested_select(session, clean_bucket_name):
9
13
  columns = pa.schema([
10
14
  ('l', pa.list_(pa.int8())),
11
15
  ('m', pa.map_(pa.utf8(), pa.float64())),
@@ -18,11 +22,81 @@ def test_nested(session, clean_bucket_name):
18
22
  ])
19
23
 
20
24
  with prepare_data(session, clean_bucket_name, 's', 't', expected) as t:
21
- actual = pa.Table.from_batches(t.select())
25
+ actual = t.select().read_all()
22
26
  assert actual == expected
23
27
 
24
28
  names = [f.name for f in columns]
25
29
  for n in range(len(names) + 1):
26
30
  for cols in itertools.permutations(names, n):
27
- actual = pa.Table.from_batches(t.select(columns=cols))
31
+ actual = t.select(columns=cols).read_all()
28
32
  assert actual == expected.select(cols)
33
+
34
+
35
+ def test_nested_filter(session, clean_bucket_name):
36
+ columns = pa.schema([
37
+ ('x', pa.int64()),
38
+ ('l', pa.list_(pa.int8())),
39
+ ('y', pa.int64()),
40
+ ('m', pa.map_(pa.utf8(), pa.float64())),
41
+ ('z', pa.int64()),
42
+ ('s', pa.struct([('x', pa.int16()), ('y', pa.int32())])),
43
+ ('w', pa.int64()),
44
+ ])
45
+ expected = pa.table(schema=columns, data=[
46
+ [1, 2, 3, None],
47
+ [[1], [], [2, 3], None],
48
+ [1, 2, None, 3],
49
+ [None, {'a': 2.5}, {'b': 0.25, 'c': 0.025}, {}],
50
+ [1, None, 2, 3],
51
+ [{'x': 1, 'y': None}, None, {'x': 2, 'y': 3}, {'x': None, 'y': 4}],
52
+ [None, 1, 2, 3],
53
+ ])
54
+
55
+ with prepare_data(session, clean_bucket_name, 's', 't', expected) as t:
56
+ actual = t.select().read_all()
57
+ assert actual == expected
58
+
59
+ names = list('xyzw')
60
+ for n in range(1, len(names) + 1):
61
+ for cols in itertools.permutations(names, n):
62
+ ibis_predicate = functools.reduce(
63
+ operator.and_,
64
+ (t[col] > 2 for col in cols))
65
+ actual = t.select(predicate=ibis_predicate).read_all()
66
+
67
+ arrow_predicate = functools.reduce(
68
+ operator.and_,
69
+ (pc.field(col) > 2 for col in cols))
70
+ assert actual == expected.filter(arrow_predicate)
71
+
72
+
73
+ def test_nested_unsupported_filter(session, clean_bucket_name):
74
+ columns = pa.schema([
75
+ ('x', pa.int64()),
76
+ ('l', pa.list_(pa.int8())),
77
+ ('y', pa.int64()),
78
+ ('m', pa.map_(pa.utf8(), pa.float64())),
79
+ ('z', pa.int64()),
80
+ ('s', pa.struct([('x', pa.int16()), ('y', pa.int32())])),
81
+ ('w', pa.int64()),
82
+ ])
83
+ expected = pa.table(schema=columns, data=[
84
+ [1, 2, 3, None],
85
+ [[1], [], [2, 3], None],
86
+ [1, 2, None, 3],
87
+ [None, {'a': 2.5}, {'b': 0.25, 'c': 0.025}, {}],
88
+ [1, None, 2, 3],
89
+ [{'x': 1, 'y': None}, None, {'x': 2, 'y': 3}, {'x': None, 'y': 4}],
90
+ [None, 1, 2, 3],
91
+ ])
92
+
93
+ with prepare_data(session, clean_bucket_name, 's', 't', expected) as t:
94
+
95
+ with pytest.raises(NotImplementedError):
96
+ list(t.select(predicate=(t['l'].isnull())))
97
+
98
+ with pytest.raises(NotImplementedError):
99
+ list(t.select(predicate=(t['m'].isnull())))
100
+
101
+ with pytest.raises(NotImplementedError):
102
+ list(t.select(predicate=(t['s'].isnull())))
@@ -1,7 +1,10 @@
1
1
  import logging
2
+ import time
2
3
 
3
4
  import pyarrow as pa
4
5
 
6
+ from vastdb.table import QueryConfig
7
+
5
8
  log = logging.getLogger(__name__)
6
9
 
7
10
 
@@ -41,3 +44,78 @@ def test_basic_projections(session, clean_bucket_name):
41
44
  projs = t.projections()
42
45
  assert len(projs) == 1
43
46
  assert projs[0].name == 'p_new'
47
+
48
+
49
+ def test_query_data_with_projection(session, clean_bucket_name):
50
+ columns = pa.schema([
51
+ ('a', pa.int64()),
52
+ ('b', pa.int64()),
53
+ ('s', pa.utf8()),
54
+ ])
55
+ # need to be large enough in order to consider as projection
56
+
57
+ GROUP_SIZE = 128 * 1024
58
+ expected = pa.table(schema=columns, data=[
59
+ [i for i in range(GROUP_SIZE)],
60
+ [i for i in reversed(range(GROUP_SIZE))],
61
+ [f's{i}' for i in range(GROUP_SIZE)],
62
+ ])
63
+
64
+ expected_projection_p1 = pa.table(schema=columns, data=[
65
+ [i for i in reversed(range(GROUP_SIZE - 5, GROUP_SIZE))],
66
+ [i for i in range(5)],
67
+ [f's{i}' for i in reversed(range(GROUP_SIZE - 5, GROUP_SIZE))],
68
+ ])
69
+
70
+ expected_projection_p2 = pa.table(schema=columns, data=[
71
+ [i for i in range(GROUP_SIZE - 5, GROUP_SIZE)],
72
+ [i for i in reversed(range(5))],
73
+ [f's{i}' for i in range(GROUP_SIZE - 5, GROUP_SIZE)],
74
+ ])
75
+
76
+ schema_name = "schema"
77
+ table_name = "table"
78
+ with session.transaction() as tx:
79
+ s = tx.bucket(clean_bucket_name).create_schema(schema_name)
80
+ t = s.create_table(table_name, expected.schema)
81
+
82
+ sorted_columns = ['b']
83
+ unsorted_columns = ['a', 's']
84
+ t.create_projection('p1', sorted_columns, unsorted_columns)
85
+
86
+ sorted_columns = ['a']
87
+ unsorted_columns = ['b', 's']
88
+ t.create_projection('p2', sorted_columns, unsorted_columns)
89
+
90
+ with session.transaction() as tx:
91
+ s = tx.bucket(clean_bucket_name).schema(schema_name)
92
+ t = s.table(table_name)
93
+ t.insert(expected)
94
+ actual = pa.Table.from_batches(t.select(columns=['a', 'b', 's']))
95
+ assert actual == expected
96
+
97
+ time.sleep(3)
98
+
99
+ with session.transaction() as tx:
100
+ config = QueryConfig()
101
+ # in nfs mock server num row groups per row block is 1 so need to change this in the config
102
+ config.num_row_groups_per_sub_split = 1
103
+
104
+ s = tx.bucket(clean_bucket_name).schema(schema_name)
105
+ t = s.table(table_name)
106
+ projection_actual = pa.Table.from_batches(t.select(columns=['a', 'b', 's'], predicate=(t['b'] < 5), config=config))
107
+ # no projection supply - need to be with p1 projeciton
108
+ assert expected_projection_p1 == projection_actual
109
+
110
+ config.semi_sorted_projection_name = 'p1'
111
+ projection_actual = pa.Table.from_batches(t.select(columns=['a', 'b', 's'], predicate=(t['b'] < 5), config=config))
112
+ # expecting results of projection p1 since we asked it specificaly
113
+ assert expected_projection_p1 == projection_actual
114
+
115
+ config.semi_sorted_projection_name = 'p2'
116
+ projection_actual = pa.Table.from_batches(t.select(columns=['a', 'b', 's'], predicate=(t['b'] < 5), config=config))
117
+ # expecting results of projection p2 since we asked it specificaly
118
+ assert expected_projection_p2 == projection_actual
119
+
120
+ t.drop()
121
+ s.drop()
@@ -61,3 +61,52 @@ def test_list_snapshots(session, clean_bucket_name):
61
61
  with session.transaction() as tx:
62
62
  b = tx.bucket(clean_bucket_name)
63
63
  b.snapshots() # VAST Catalog may create some snapshots
64
+
65
+
66
+ def test_nested_schemas(session, clean_bucket_name):
67
+ with session.transaction() as tx:
68
+ b = tx.bucket(clean_bucket_name)
69
+ s1 = b.create_schema('s1')
70
+ s1_s2 = s1.create_schema('s2')
71
+ s1_s3 = s1.create_schema('s3')
72
+ s1_s3_s4 = s1_s3.create_schema('s4')
73
+ s5 = b.create_schema('s5')
74
+
75
+ assert b.schema('s1') == s1
76
+ assert s1.schema('s2') == s1_s2
77
+ assert s1.schema('s3') == s1_s3
78
+ assert s1_s3.schema('s4') == s1_s3_s4
79
+ assert b.schema('s5') == s5
80
+
81
+ assert b.schemas() == [s1, s5]
82
+ assert s1.schemas() == [s1_s2, s1_s3]
83
+ assert s1_s2.schemas() == []
84
+ assert s1_s3.schemas() == [s1_s3_s4]
85
+ assert s1_s3_s4.schemas() == []
86
+ assert s5.schemas() == []
87
+
88
+ s1_s3_s4.drop()
89
+ assert s1_s3.schemas() == []
90
+ s1_s3.drop()
91
+ assert s1.schemas() == [s1_s2]
92
+ s1_s2.drop()
93
+ assert s1.schemas() == []
94
+
95
+ assert b.schemas() == [s1, s5]
96
+ s1.drop()
97
+ assert b.schemas() == [s5]
98
+ s5.drop()
99
+ assert b.schemas() == []
100
+
101
+
102
+ def test_schema_pagination(session, clean_bucket_name):
103
+ with session.transaction() as tx:
104
+ b = tx.bucket(clean_bucket_name)
105
+ names = [f's{i}' for i in range(10)]
106
+ schemas = [b.create_schema(name) for name in names]
107
+ assert b.schemas(batch_size=3) == schemas
108
+
109
+ s0 = b.schema('s0')
110
+ names = [f'q{i}' for i in range(10)]
111
+ subschemas = [s0.create_schema(name) for name in names]
112
+ assert s0.schemas(batch_size=3) == subschemas
@@ -3,10 +3,10 @@ import decimal
3
3
  import logging
4
4
  import random
5
5
  import threading
6
- import time
7
6
  from contextlib import closing
8
7
  from tempfile import NamedTemporaryFile
9
8
 
9
+ import ibis
10
10
  import pyarrow as pa
11
11
  import pyarrow.compute as pc
12
12
  import pyarrow.parquet as pq
@@ -32,25 +32,25 @@ def test_tables(session, clean_bucket_name):
32
32
  ['a', 'bb', 'ccc'],
33
33
  ])
34
34
  with prepare_data(session, clean_bucket_name, 's', 't', expected) as t:
35
- actual = pa.Table.from_batches(t.select(columns=['a', 'b', 's']))
35
+ actual = t.select(columns=['a', 'b', 's']).read_all()
36
36
  assert actual == expected
37
37
 
38
- actual = pa.Table.from_batches(t.select())
38
+ actual = t.select().read_all()
39
39
  assert actual == expected
40
40
 
41
- actual = pa.Table.from_batches(t.select(columns=['a', 'b']))
41
+ actual = t.select(columns=['a', 'b']).read_all()
42
42
  assert actual == expected.select(['a', 'b'])
43
43
 
44
- actual = pa.Table.from_batches(t.select(columns=['b', 's', 'a']))
44
+ actual = t.select(columns=['b', 's', 'a']).read_all()
45
45
  assert actual == expected.select(['b', 's', 'a'])
46
46
 
47
- actual = pa.Table.from_batches(t.select(columns=['s']))
47
+ actual = t.select(columns=['s']).read_all()
48
48
  assert actual == expected.select(['s'])
49
49
 
50
- actual = pa.Table.from_batches(t.select(columns=[]))
50
+ actual = t.select(columns=[]).read_all()
51
51
  assert actual == expected.select([])
52
52
 
53
- actual = pa.Table.from_batches(t.select(columns=['s'], internal_row_id=True))
53
+ actual = t.select(columns=['s'], internal_row_id=True).read_all()
54
54
  log.debug("actual=%s", actual)
55
55
  assert actual.to_pydict() == {
56
56
  's': ['a', 'bb', 'ccc'],
@@ -61,9 +61,9 @@ def test_tables(session, clean_bucket_name):
61
61
  rb = pa.record_batch(schema=columns_to_delete, data=[[0]]) # delete rows 0,1
62
62
  t.delete(rb)
63
63
 
64
- selected_rows = pa.Table.from_batches(t.select(columns=['b'], predicate=(t['a'] == 222), internal_row_id=True))
64
+ selected_rows = t.select(columns=['b'], predicate=(t['a'] == 222), internal_row_id=True).read_all()
65
65
  t.delete(selected_rows)
66
- actual = pa.Table.from_batches(t.select(columns=['a', 'b', 's']))
66
+ actual = t.select(columns=['a', 'b', 's']).read_all()
67
67
  assert actual.to_pydict() == {
68
68
  'a': [333],
69
69
  'b': [2.5],
@@ -77,7 +77,7 @@ def test_insert_wide_row(session, clean_bucket_name):
77
77
  expected = pa.table(schema=columns, data=data)
78
78
 
79
79
  with prepare_data(session, clean_bucket_name, 's', 't', expected) as t:
80
- actual = pa.Table.from_batches(t.select())
80
+ actual = t.select().read_all()
81
81
  assert actual == expected
82
82
 
83
83
 
@@ -124,33 +124,33 @@ def test_update_table(session, clean_bucket_name):
124
124
  ])
125
125
 
126
126
  t.update(rb)
127
- actual = pa.Table.from_batches(t.select(columns=['a', 'b']))
127
+ actual = t.select(columns=['a', 'b']).read_all()
128
128
  assert actual.to_pydict() == {
129
129
  'a': [1110, 222, 3330],
130
130
  'b': [0.5, 1.5, 2.5]
131
131
  }
132
132
 
133
- actual = pa.Table.from_batches(t.select(columns=['a', 'b'], predicate=(t['a'] < 1000), internal_row_id=True))
133
+ actual = t.select(columns=['a', 'b'], predicate=(t['a'] < 1000), internal_row_id=True).read_all()
134
134
  column_index = actual.column_names.index('a')
135
135
  column_field = actual.field(column_index)
136
136
  new_data = pc.add(actual.column('a'), 2000)
137
137
  update_table = actual.set_column(column_index, column_field, new_data)
138
138
 
139
139
  t.update(update_table, columns=['a'])
140
- actual = pa.Table.from_batches(t.select(columns=['a', 'b']))
140
+ actual = t.select(columns=['a', 'b']).read_all()
141
141
  assert actual.to_pydict() == {
142
142
  'a': [1110, 2222, 3330],
143
143
  'b': [0.5, 1.5, 2.5]
144
144
  }
145
145
 
146
- actual = pa.Table.from_batches(t.select(columns=['a', 'b'], predicate=(t['a'] != 2222), internal_row_id=True))
146
+ actual = t.select(columns=['a', 'b'], predicate=(t['a'] != 2222), internal_row_id=True).read_all()
147
147
  column_index = actual.column_names.index('a')
148
148
  column_field = actual.field(column_index)
149
149
  new_data = pc.divide(actual.column('a'), 10)
150
150
  update_table = actual.set_column(column_index, column_field, new_data)
151
151
 
152
152
  t.update(update_table.to_batches()[0], columns=['a'])
153
- actual = pa.Table.from_batches(t.select(columns=['a', 'b']))
153
+ actual = t.select(columns=['a', 'b']).read_all()
154
154
  assert actual.to_pydict() == {
155
155
  'a': [111, 2222, 333],
156
156
  'b': [0.5, 1.5, 2.5]
@@ -170,7 +170,7 @@ def test_select_with_multisplits(session, clean_bucket_name):
170
170
  config.rows_per_split = 1000
171
171
 
172
172
  with prepare_data(session, clean_bucket_name, 's', 't', expected) as t:
173
- actual = pa.Table.from_batches(t.select(columns=['a'], config=config))
173
+ actual = t.select(columns=['a'], config=config).read_all()
174
174
  assert actual == expected
175
175
 
176
176
 
@@ -215,46 +215,47 @@ def test_types(session, clean_bucket_name):
215
215
  [dt.datetime(2024, 4, 10, 12, 34, 56, 789789), dt.datetime(2025, 4, 10, 12, 34, 56, 789789), dt.datetime(2026, 4, 10, 12, 34, 56, 789789)],
216
216
  ])
217
217
 
218
- with prepare_data(session, clean_bucket_name, 's', 't', expected) as t:
218
+ with prepare_data(session, clean_bucket_name, 's', 't', expected) as table:
219
219
  def select(predicate):
220
- return pa.Table.from_batches(t.select(predicate=predicate))
220
+ return table.select(predicate=predicate).read_all()
221
221
 
222
222
  assert select(None) == expected
223
- assert select(t['tb'] == False) == expected.filter(pc.field('tb') == False) # noqa: E712
224
- assert select(t['a1'] == 2) == expected.filter(pc.field('a1') == 2)
225
- assert select(t['a2'] == 2000) == expected.filter(pc.field('a2') == 2000)
226
- assert select(t['a4'] == 222111122) == expected.filter(pc.field('a4') == 222111122)
227
- assert select(t['b'] == 1.5) == expected.filter(pc.field('b') == 1.5)
228
- assert select(t['s'] == "v") == expected.filter(pc.field('s') == "v")
229
- assert select(t['d'] == 231.15) == expected.filter(pc.field('d') == 231.15)
230
- assert select(t['bin'] == b"\x01\x02") == expected.filter(pc.field('bin') == b"\x01\x02")
223
+ for t in [table, ibis._]:
224
+ assert select(t['tb'] == False) == expected.filter(pc.field('tb') == False) # noqa: E712
225
+ assert select(t['a1'] == 2) == expected.filter(pc.field('a1') == 2)
226
+ assert select(t['a2'] == 2000) == expected.filter(pc.field('a2') == 2000)
227
+ assert select(t['a4'] == 222111122) == expected.filter(pc.field('a4') == 222111122)
228
+ assert select(t['b'] == 1.5) == expected.filter(pc.field('b') == 1.5)
229
+ assert select(t['s'] == "v") == expected.filter(pc.field('s') == "v")
230
+ assert select(t['d'] == 231.15) == expected.filter(pc.field('d') == 231.15)
231
+ assert select(t['bin'] == b"\x01\x02") == expected.filter(pc.field('bin') == b"\x01\x02")
231
232
 
232
- date_literal = dt.date(2024, 4, 10)
233
- assert select(t['date'] == date_literal) == expected.filter(pc.field('date') == date_literal)
233
+ date_literal = dt.date(2024, 4, 10)
234
+ assert select(t['date'] == date_literal) == expected.filter(pc.field('date') == date_literal)
234
235
 
235
- time_literal = dt.time(12, 34, 56)
236
- assert select(t['t0'] == time_literal) == expected.filter(pc.field('t0') == time_literal)
236
+ time_literal = dt.time(12, 34, 56)
237
+ assert select(t['t0'] == time_literal) == expected.filter(pc.field('t0') == time_literal)
237
238
 
238
- time_literal = dt.time(12, 34, 56, 789000)
239
- assert select(t['t3'] == time_literal) == expected.filter(pc.field('t3') == time_literal)
239
+ time_literal = dt.time(12, 34, 56, 789000)
240
+ assert select(t['t3'] == time_literal) == expected.filter(pc.field('t3') == time_literal)
240
241
 
241
- time_literal = dt.time(12, 34, 56, 789789)
242
- assert select(t['t6'] == time_literal) == expected.filter(pc.field('t6') == time_literal)
242
+ time_literal = dt.time(12, 34, 56, 789789)
243
+ assert select(t['t6'] == time_literal) == expected.filter(pc.field('t6') == time_literal)
243
244
 
244
- time_literal = dt.time(12, 34, 56, 789789)
245
- assert select(t['t9'] == time_literal) == expected.filter(pc.field('t9') == time_literal)
245
+ time_literal = dt.time(12, 34, 56, 789789)
246
+ assert select(t['t9'] == time_literal) == expected.filter(pc.field('t9') == time_literal)
246
247
 
247
- ts_literal = dt.datetime(2024, 4, 10, 12, 34, 56)
248
- assert select(t['ts0'] == ts_literal) == expected.filter(pc.field('ts0') == ts_literal)
248
+ ts_literal = dt.datetime(2024, 4, 10, 12, 34, 56)
249
+ assert select(t['ts0'] == ts_literal) == expected.filter(pc.field('ts0') == ts_literal)
249
250
 
250
- ts_literal = dt.datetime(2024, 4, 10, 12, 34, 56, 789000)
251
- assert select(t['ts3'] == ts_literal) == expected.filter(pc.field('ts3') == ts_literal)
251
+ ts_literal = dt.datetime(2024, 4, 10, 12, 34, 56, 789000)
252
+ assert select(t['ts3'] == ts_literal) == expected.filter(pc.field('ts3') == ts_literal)
252
253
 
253
- ts_literal = dt.datetime(2024, 4, 10, 12, 34, 56, 789789)
254
- assert select(t['ts6'] == ts_literal) == expected.filter(pc.field('ts6') == ts_literal)
254
+ ts_literal = dt.datetime(2024, 4, 10, 12, 34, 56, 789789)
255
+ assert select(t['ts6'] == ts_literal) == expected.filter(pc.field('ts6') == ts_literal)
255
256
 
256
- ts_literal = dt.datetime(2024, 4, 10, 12, 34, 56, 789789)
257
- assert select(t['ts9'] == ts_literal) == expected.filter(pc.field('ts9') == ts_literal)
257
+ ts_literal = dt.datetime(2024, 4, 10, 12, 34, 56, 789789)
258
+ assert select(t['ts9'] == ts_literal) == expected.filter(pc.field('ts9') == ts_literal)
258
259
 
259
260
 
260
261
  def test_filters(session, clean_bucket_name):
@@ -270,62 +271,70 @@ def test_filters(session, clean_bucket_name):
270
271
  ['a', 'bb', 'ccc', None, 'xyz'],
271
272
  ])
272
273
 
273
- with prepare_data(session, clean_bucket_name, 's', 't', expected) as t:
274
+ with prepare_data(session, clean_bucket_name, 's', 't', expected) as table:
274
275
  def select(predicate):
275
- return pa.Table.from_batches(t.select(predicate=predicate), t.arrow_schema)
276
+ return table.select(predicate=predicate).read_all()
276
277
 
277
278
  assert select(None) == expected
278
279
  assert select(True) == expected
279
280
  assert select(False) == pa.Table.from_batches([], schema=columns)
280
281
 
281
- assert select(t['a'].between(222, 444)) == expected.filter((pc.field('a') >= 222) & (pc.field('a') <= 444))
282
- assert select((t['a'].between(222, 444)) & (t['b'] > 2.5)) == expected.filter((pc.field('a') >= 222) & (pc.field('a') <= 444) & (pc.field('b') > 2.5))
282
+ for t in [table, ibis._]:
283
+
284
+ select(t['a'].isin(list(range(100))))
285
+ select(t['a'].isin(list(range(1000))))
286
+ select(t['a'].isin(list(range(10000))))
287
+ with pytest.raises(errors.TooLargeRequest):
288
+ select(t['a'].isin(list(range(100000))))
283
289
 
284
- assert select(t['a'] > 222) == expected.filter(pc.field('a') > 222)
285
- assert select(t['a'] < 222) == expected.filter(pc.field('a') < 222)
286
- assert select(t['a'] == 222) == expected.filter(pc.field('a') == 222)
287
- assert select(t['a'] != 222) == expected.filter(pc.field('a') != 222)
288
- assert select(t['a'] <= 222) == expected.filter(pc.field('a') <= 222)
289
- assert select(t['a'] >= 222) == expected.filter(pc.field('a') >= 222)
290
+ assert select(t['a'].between(222, 444)) == expected.filter((pc.field('a') >= 222) & (pc.field('a') <= 444))
291
+ assert select((t['a'].between(222, 444)) & (t['b'] > 2.5)) == expected.filter((pc.field('a') >= 222) & (pc.field('a') <= 444) & (pc.field('b') > 2.5))
290
292
 
291
- assert select(t['b'] > 1.5) == expected.filter(pc.field('b') > 1.5)
292
- assert select(t['b'] < 1.5) == expected.filter(pc.field('b') < 1.5)
293
- assert select(t['b'] == 1.5) == expected.filter(pc.field('b') == 1.5)
294
- assert select(t['b'] != 1.5) == expected.filter(pc.field('b') != 1.5)
295
- assert select(t['b'] <= 1.5) == expected.filter(pc.field('b') <= 1.5)
296
- assert select(t['b'] >= 1.5) == expected.filter(pc.field('b') >= 1.5)
293
+ assert select(t['a'] > 222) == expected.filter(pc.field('a') > 222)
294
+ assert select(t['a'] < 222) == expected.filter(pc.field('a') < 222)
295
+ assert select(t['a'] == 222) == expected.filter(pc.field('a') == 222)
296
+ assert select(t['a'] != 222) == expected.filter(pc.field('a') != 222)
297
+ assert select(t['a'] <= 222) == expected.filter(pc.field('a') <= 222)
298
+ assert select(t['a'] >= 222) == expected.filter(pc.field('a') >= 222)
297
299
 
298
- assert select(t['s'] > 'bb') == expected.filter(pc.field('s') > 'bb')
299
- assert select(t['s'] < 'bb') == expected.filter(pc.field('s') < 'bb')
300
- assert select(t['s'] == 'bb') == expected.filter(pc.field('s') == 'bb')
301
- assert select(t['s'] != 'bb') == expected.filter(pc.field('s') != 'bb')
302
- assert select(t['s'] <= 'bb') == expected.filter(pc.field('s') <= 'bb')
303
- assert select(t['s'] >= 'bb') == expected.filter(pc.field('s') >= 'bb')
300
+ assert select(t['b'] > 1.5) == expected.filter(pc.field('b') > 1.5)
301
+ assert select(t['b'] < 1.5) == expected.filter(pc.field('b') < 1.5)
302
+ assert select(t['b'] == 1.5) == expected.filter(pc.field('b') == 1.5)
303
+ assert select(t['b'] != 1.5) == expected.filter(pc.field('b') != 1.5)
304
+ assert select(t['b'] <= 1.5) == expected.filter(pc.field('b') <= 1.5)
305
+ assert select(t['b'] >= 1.5) == expected.filter(pc.field('b') >= 1.5)
304
306
 
305
- assert select((t['a'] > 111) & (t['b'] > 0) & (t['s'] < 'ccc')) == expected.filter((pc.field('a') > 111) & (pc.field('b') > 0) & (pc.field('s') < 'ccc'))
306
- assert select((t['a'] > 111) & (t['b'] < 2.5)) == expected.filter((pc.field('a') > 111) & (pc.field('b') < 2.5))
307
- assert select((t['a'] > 111) & (t['a'] < 333)) == expected.filter((pc.field('a') > 111) & (pc.field('a') < 333))
307
+ assert select(t['s'] > 'bb') == expected.filter(pc.field('s') > 'bb')
308
+ assert select(t['s'] < 'bb') == expected.filter(pc.field('s') < 'bb')
309
+ assert select(t['s'] == 'bb') == expected.filter(pc.field('s') == 'bb')
310
+ assert select(t['s'] != 'bb') == expected.filter(pc.field('s') != 'bb')
311
+ assert select(t['s'] <= 'bb') == expected.filter(pc.field('s') <= 'bb')
312
+ assert select(t['s'] >= 'bb') == expected.filter(pc.field('s') >= 'bb')
308
313
 
309
- assert select((t['a'] > 111) | (t['a'] < 333)) == expected.filter((pc.field('a') > 111) | (pc.field('a') < 333))
310
- assert select(((t['a'] > 111) | (t['a'] < 333)) & (t['b'] < 2.5)) == expected.filter(((pc.field('a') > 111) | (pc.field('a') < 333)) & (pc.field('b') < 2.5))
311
- with pytest.raises(NotImplementedError):
312
- assert select((t['a'] > 111) | (t['b'] > 0) | (t['s'] < 'ccc')) == expected.filter((pc.field('a') > 111) | (pc.field('b') > 0) | (pc.field('s') < 'ccc'))
313
- assert select((t['a'] > 111) | (t['a'] < 333) | (t['a'] == 777)) == expected.filter((pc.field('a') > 111) | (pc.field('a') < 333) | (pc.field('a') == 777))
314
+ assert select((t['a'] > 111) & (t['b'] > 0) & (t['s'] < 'ccc')) == expected.filter((pc.field('a') > 111) & (pc.field('b') > 0) & (pc.field('s') < 'ccc'))
315
+ assert select((t['a'] > 111) & (t['b'] < 2.5)) == expected.filter((pc.field('a') > 111) & (pc.field('b') < 2.5))
316
+ assert select((t['a'] > 111) & (t['a'] < 333)) == expected.filter((pc.field('a') > 111) & (pc.field('a') < 333))
314
317
 
315
- assert select(t['s'].isnull()) == expected.filter(pc.field('s').is_null())
316
- assert select((t['s'].isnull()) | (t['s'] == 'bb')) == expected.filter((pc.field('s').is_null()) | (pc.field('s') == 'bb'))
317
- assert select((t['s'].isnull()) & (t['b'] == 3.5)) == expected.filter((pc.field('s').is_null()) & (pc.field('b') == 3.5))
318
+ assert select((t['a'] > 111) | (t['a'] < 333)) == expected.filter((pc.field('a') > 111) | (pc.field('a') < 333))
319
+ assert select(((t['a'] > 111) | (t['a'] < 333)) & (t['b'] < 2.5)) == expected.filter(((pc.field('a') > 111) | (pc.field('a') < 333)) & (pc.field('b') < 2.5))
320
+ with pytest.raises(NotImplementedError):
321
+ assert select((t['a'] > 111) | (t['b'] > 0) | (t['s'] < 'ccc')) == expected.filter((pc.field('a') > 111) | (pc.field('b') > 0) | (pc.field('s') < 'ccc'))
322
+ assert select((t['a'] > 111) | (t['a'] < 333) | (t['a'] == 777)) == expected.filter((pc.field('a') > 111) | (pc.field('a') < 333) | (pc.field('a') == 777))
318
323
 
319
- assert select(~t['s'].isnull()) == expected.filter(~pc.field('s').is_null())
320
- assert select(t['s'].contains('b')) == expected.filter(pc.field('s') == 'bb')
321
- assert select(t['s'].contains('y')) == expected.filter(pc.field('s') == 'xyz')
324
+ assert select(t['s'].isnull()) == expected.filter(pc.field('s').is_null())
325
+ assert select((t['s'].isnull()) | (t['s'] == 'bb')) == expected.filter((pc.field('s').is_null()) | (pc.field('s') == 'bb'))
326
+ assert select((t['s'].isnull()) & (t['b'] == 3.5)) == expected.filter((pc.field('s').is_null()) & (pc.field('b') == 3.5))
322
327
 
323
- assert select(t['a'].isin([555])) == expected.filter(pc.field('a').isin([555]))
324
- assert select(t['a'].isin([111, 222, 999])) == expected.filter(pc.field('a').isin([111, 222, 999]))
325
- assert select((t['a'] == 111) | t['a'].isin([333, 444]) | (t['a'] > 600)) == expected.filter((pc.field('a') == 111) | pc.field('a').isin([333, 444]) | (pc.field('a') > 600))
328
+ assert select(~t['s'].isnull()) == expected.filter(~pc.field('s').is_null())
329
+ assert select(t['s'].contains('b')) == expected.filter(pc.field('s') == 'bb')
330
+ assert select(t['s'].contains('y')) == expected.filter(pc.field('s') == 'xyz')
326
331
 
327
- with pytest.raises(NotImplementedError):
328
- select(t['a'].isin([]))
332
+ assert select(t['a'].isin([555])) == expected.filter(pc.field('a').isin([555]))
333
+ assert select(t['a'].isin([111, 222, 999])) == expected.filter(pc.field('a').isin([111, 222, 999]))
334
+ assert select((t['a'] == 111) | t['a'].isin([333, 444]) | (t['a'] > 600)) == expected.filter((pc.field('a') == 111) | pc.field('a').isin([333, 444]) | (pc.field('a') > 600))
335
+
336
+ with pytest.raises(NotImplementedError):
337
+ select(t['a'].isin([]))
329
338
 
330
339
 
331
340
  def test_parquet_export(session, clean_bucket_name):
@@ -348,7 +357,7 @@ def test_parquet_export(session, clean_bucket_name):
348
357
  expected = pa.Table.from_batches([rb])
349
358
  rb = t.insert(rb)
350
359
  assert rb.to_pylist() == [0, 1]
351
- actual = pa.Table.from_batches(t.select())
360
+ actual = t.select().read_all()
352
361
  assert actual == expected
353
362
 
354
363
  table_batches = t.select()
@@ -664,18 +673,37 @@ def test_select_stop(session, clean_bucket_name):
664
673
  assert active_threads() == 0
665
674
 
666
675
 
667
- def test_big_catalog_select(session, clean_bucket_name):
676
+ def test_catalog_select(session, clean_bucket_name):
668
677
  with session.transaction() as tx:
669
678
  bc = tx.catalog()
670
- actual = pa.Table.from_batches(bc.select(['name']))
671
- assert actual
672
- log.info("actual=%s", actual)
679
+ assert bc.columns()
680
+ rows = bc.select(['name']).read_all()
681
+ assert len(rows) > 0, rows
682
+
673
683
 
684
+ class NotReady(Exception):
685
+ pass
674
686
 
687
+
688
+ @pytest.mark.flaky(retries=30, delay=1, only_on=[NotReady])
675
689
  def test_audit_log_select(session, clean_bucket_name):
676
690
  with session.transaction() as tx:
677
691
  a = tx.audit_log()
678
- a.columns()
679
- time.sleep(1)
680
- actual = pa.Table.from_batches(a.select(), a.arrow_schema)
681
- log.info("actual=%s", actual)
692
+ assert a.columns()
693
+ rows = a.select().read_all()
694
+ if len(rows) == 0:
695
+ raise NotReady
696
+
697
+
698
+ @pytest.mark.flaky(retries=30, delay=1, only_on=[NotReady])
699
+ def test_catalog_snapshots_select(session, clean_bucket_name):
700
+ with session.transaction() as tx:
701
+ snaps = tx.catalog_snapshots()
702
+ if not snaps:
703
+ raise NotReady
704
+ latest = snaps[-1]
705
+ t = tx.catalog(latest)
706
+ assert t.columns()
707
+ rows = t.select().read_all()
708
+ if not rows:
709
+ raise NotReady