vastdb 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vastdb/__init__.py +6 -2
- vastdb/bench/__init__.py +0 -0
- vastdb/bench/test_perf.py +29 -0
- vastdb/bucket.py +21 -9
- vastdb/{tests/conftest.py → conftest.py} +21 -7
- vastdb/errors.py +32 -9
- vastdb/internal_commands.py +236 -278
- vastdb/schema.py +22 -9
- vastdb/session.py +2 -3
- vastdb/table.py +57 -57
- vastdb/tests/test_duckdb.py +61 -0
- vastdb/tests/test_imports.py +3 -5
- vastdb/tests/test_nested.py +28 -0
- vastdb/tests/test_projections.py +3 -1
- vastdb/tests/test_sanity.py +5 -6
- vastdb/tests/test_schemas.py +20 -1
- vastdb/tests/test_tables.py +108 -76
- vastdb/tests/util.py +15 -0
- vastdb/transaction.py +18 -9
- vastdb/util.py +6 -4
- {vastdb-0.1.0.dist-info → vastdb-0.1.2.dist-info}/METADATA +1 -4
- {vastdb-0.1.0.dist-info → vastdb-0.1.2.dist-info}/RECORD +25 -20
- {vastdb-0.1.0.dist-info → vastdb-0.1.2.dist-info}/WHEEL +1 -1
- {vastdb-0.1.0.dist-info → vastdb-0.1.2.dist-info}/LICENSE +0 -0
- {vastdb-0.1.0.dist-info → vastdb-0.1.2.dist-info}/top_level.txt +0 -0
vastdb/tests/test_tables.py
CHANGED
|
@@ -1,41 +1,24 @@
|
|
|
1
|
-
import
|
|
2
|
-
import
|
|
3
|
-
import
|
|
1
|
+
import datetime as dt
|
|
2
|
+
import decimal
|
|
3
|
+
import logging
|
|
4
4
|
import random
|
|
5
|
+
import threading
|
|
6
|
+
from contextlib import closing
|
|
7
|
+
from tempfile import NamedTemporaryFile
|
|
8
|
+
|
|
5
9
|
import pyarrow as pa
|
|
6
10
|
import pyarrow.compute as pc
|
|
7
11
|
import pyarrow.parquet as pq
|
|
8
|
-
import
|
|
9
|
-
import datetime as dt
|
|
10
|
-
|
|
11
|
-
from tempfile import NamedTemporaryFile
|
|
12
|
-
from contextlib import contextmanager, closing
|
|
13
|
-
|
|
12
|
+
import pytest
|
|
14
13
|
from requests.exceptions import HTTPError
|
|
15
|
-
import logging
|
|
16
14
|
|
|
17
|
-
from ..table import INTERNAL_ROW_ID, QueryConfig
|
|
18
15
|
from .. import errors
|
|
19
|
-
|
|
16
|
+
from ..table import INTERNAL_ROW_ID, QueryConfig
|
|
17
|
+
from .util import prepare_data
|
|
20
18
|
|
|
21
19
|
log = logging.getLogger(__name__)
|
|
22
20
|
|
|
23
21
|
|
|
24
|
-
@contextmanager
|
|
25
|
-
def prepare_data(session, clean_bucket_name, schema_name, table_name, arrow_table):
|
|
26
|
-
with session.transaction() as tx:
|
|
27
|
-
s = tx.bucket(clean_bucket_name).create_schema(schema_name)
|
|
28
|
-
t = s.create_table(table_name, arrow_table.schema)
|
|
29
|
-
row_ids_array = t.insert(arrow_table)
|
|
30
|
-
row_ids = row_ids_array.to_pylist()
|
|
31
|
-
log.debug("row_ids=%s" % row_ids)
|
|
32
|
-
assert row_ids == list(range(arrow_table.num_rows))
|
|
33
|
-
yield t
|
|
34
|
-
t.drop()
|
|
35
|
-
s.drop()
|
|
36
|
-
|
|
37
|
-
log = logging.getLogger(__name__)
|
|
38
|
-
|
|
39
22
|
def test_tables(session, clean_bucket_name):
|
|
40
23
|
columns = pa.schema([
|
|
41
24
|
('a', pa.int64()),
|
|
@@ -86,6 +69,27 @@ def test_tables(session, clean_bucket_name):
|
|
|
86
69
|
's': ['ccc']
|
|
87
70
|
}
|
|
88
71
|
|
|
72
|
+
|
|
73
|
+
def test_exists(session, clean_bucket_name):
|
|
74
|
+
with session.transaction() as tx:
|
|
75
|
+
s = tx.bucket(clean_bucket_name).create_schema('s1')
|
|
76
|
+
assert s.tables() == []
|
|
77
|
+
|
|
78
|
+
t = s.create_table('t', pa.schema([('x', pa.int64())]))
|
|
79
|
+
|
|
80
|
+
assert s.tables() == [t]
|
|
81
|
+
with pytest.raises(errors.TableExists):
|
|
82
|
+
s.create_table('t', pa.schema([('x', pa.int64())]))
|
|
83
|
+
|
|
84
|
+
assert s.tables() == [t]
|
|
85
|
+
assert s.create_table('t', pa.schema([('x', pa.int64())]), fail_if_exists=False) == t
|
|
86
|
+
assert s.tables() == [t]
|
|
87
|
+
assert s.create_table('t', pa.schema([('y', pa.int64())]), fail_if_exists=False) == t
|
|
88
|
+
assert s.tables() == [t]
|
|
89
|
+
assert s.create_table('t', pa.schema([('x', pa.int64())]), fail_if_exists=False) == t
|
|
90
|
+
assert s.tables() == [t]
|
|
91
|
+
|
|
92
|
+
|
|
89
93
|
def test_update_table(session, clean_bucket_name):
|
|
90
94
|
columns = pa.schema([
|
|
91
95
|
('a', pa.int64()),
|
|
@@ -141,12 +145,13 @@ def test_update_table(session, clean_bucket_name):
|
|
|
141
145
|
'b': [0.5, 1.5, 2.5]
|
|
142
146
|
}
|
|
143
147
|
|
|
148
|
+
|
|
144
149
|
def test_select_with_multisplits(session, clean_bucket_name):
|
|
145
150
|
columns = pa.schema([
|
|
146
151
|
('a', pa.int32())
|
|
147
152
|
])
|
|
148
153
|
|
|
149
|
-
data = [
|
|
154
|
+
data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
|
|
150
155
|
data = data * 1000
|
|
151
156
|
expected = pa.table(schema=columns, data=[data])
|
|
152
157
|
|
|
@@ -169,21 +174,36 @@ def test_types(session, clean_bucket_name):
|
|
|
169
174
|
('d', pa.decimal128(7, 3)),
|
|
170
175
|
('bin', pa.binary()),
|
|
171
176
|
('date', pa.date32()),
|
|
172
|
-
('
|
|
177
|
+
('t0', pa.time32('s')),
|
|
178
|
+
('t3', pa.time32('ms')),
|
|
179
|
+
('t6', pa.time64('us')),
|
|
180
|
+
('t9', pa.time64('ns')),
|
|
181
|
+
('ts0', pa.timestamp('s')),
|
|
182
|
+
('ts3', pa.timestamp('ms')),
|
|
183
|
+
('ts6', pa.timestamp('us')),
|
|
184
|
+
('ts9', pa.timestamp('ns')),
|
|
173
185
|
])
|
|
174
186
|
|
|
175
187
|
expected = pa.table(schema=columns, data=[
|
|
176
188
|
[True, True, False],
|
|
177
|
-
[1
|
|
189
|
+
[1, 2, 4],
|
|
178
190
|
[1999, 2000, 2001],
|
|
179
191
|
[11122221, 222111122, 333333],
|
|
180
192
|
[0.5, 1.5, 2.5],
|
|
181
193
|
["a", "v", "s"],
|
|
182
194
|
[decimal.Decimal('110.52'), decimal.Decimal('231.15'), decimal.Decimal('3332.44')],
|
|
183
195
|
[b"\x01\x02", b"\x01\x05", b"\x01\x07"],
|
|
184
|
-
[dt.
|
|
185
|
-
[dt.
|
|
196
|
+
[dt.date(2024, 4, 10), dt.date(2024, 4, 11), dt.date(2024, 4, 12)],
|
|
197
|
+
[dt.time(12, 34, 56), dt.time(12, 34, 57), dt.time(12, 34, 58)],
|
|
198
|
+
[dt.time(12, 34, 56, 789000), dt.time(12, 34, 57, 789000), dt.time(12, 34, 58, 789000)],
|
|
199
|
+
[dt.time(12, 34, 56, 789789), dt.time(12, 34, 57, 789789), dt.time(12, 34, 58, 789789)],
|
|
200
|
+
[dt.time(12, 34, 56, 789789), dt.time(12, 34, 57, 789789), dt.time(12, 34, 58, 789789)],
|
|
201
|
+
[dt.datetime(2024, 4, 10, 12, 34, 56), dt.datetime(2025, 4, 10, 12, 34, 56), dt.datetime(2026, 4, 10, 12, 34, 56)],
|
|
202
|
+
[dt.datetime(2024, 4, 10, 12, 34, 56, 789000), dt.datetime(2025, 4, 10, 12, 34, 56, 789000), dt.datetime(2026, 4, 10, 12, 34, 56, 789000)],
|
|
203
|
+
[dt.datetime(2024, 4, 10, 12, 34, 56, 789789), dt.datetime(2025, 4, 10, 12, 34, 56, 789789), dt.datetime(2026, 4, 10, 12, 34, 56, 789789)],
|
|
204
|
+
[dt.datetime(2024, 4, 10, 12, 34, 56, 789789), dt.datetime(2025, 4, 10, 12, 34, 56, 789789), dt.datetime(2026, 4, 10, 12, 34, 56, 789789)],
|
|
186
205
|
])
|
|
206
|
+
|
|
187
207
|
with prepare_data(session, clean_bucket_name, 's', 't', expected) as t:
|
|
188
208
|
def select(predicate):
|
|
189
209
|
return pa.Table.from_batches(t.select(predicate=predicate))
|
|
@@ -197,7 +217,33 @@ def test_types(session, clean_bucket_name):
|
|
|
197
217
|
assert select(t['s'] == "v") == expected.filter(pc.field('s') == "v")
|
|
198
218
|
assert select(t['d'] == 231.15) == expected.filter(pc.field('d') == 231.15)
|
|
199
219
|
assert select(t['bin'] == b"\x01\x02") == expected.filter(pc.field('bin') == b"\x01\x02")
|
|
200
|
-
|
|
220
|
+
|
|
221
|
+
date_literal = dt.date(2024, 4, 10)
|
|
222
|
+
assert select(t['date'] == date_literal) == expected.filter(pc.field('date') == date_literal)
|
|
223
|
+
|
|
224
|
+
time_literal = dt.time(12, 34, 56)
|
|
225
|
+
assert select(t['t0'] == time_literal) == expected.filter(pc.field('t0') == time_literal)
|
|
226
|
+
|
|
227
|
+
time_literal = dt.time(12, 34, 56, 789000)
|
|
228
|
+
assert select(t['t3'] == time_literal) == expected.filter(pc.field('t3') == time_literal)
|
|
229
|
+
|
|
230
|
+
time_literal = dt.time(12, 34, 56, 789789)
|
|
231
|
+
assert select(t['t6'] == time_literal) == expected.filter(pc.field('t6') == time_literal)
|
|
232
|
+
|
|
233
|
+
time_literal = dt.time(12, 34, 56, 789789)
|
|
234
|
+
assert select(t['t9'] == time_literal) == expected.filter(pc.field('t9') == time_literal)
|
|
235
|
+
|
|
236
|
+
ts_literal = dt.datetime(2024, 4, 10, 12, 34, 56)
|
|
237
|
+
assert select(t['ts0'] == ts_literal) == expected.filter(pc.field('ts0') == ts_literal)
|
|
238
|
+
|
|
239
|
+
ts_literal = dt.datetime(2024, 4, 10, 12, 34, 56, 789000)
|
|
240
|
+
assert select(t['ts3'] == ts_literal) == expected.filter(pc.field('ts3') == ts_literal)
|
|
241
|
+
|
|
242
|
+
ts_literal = dt.datetime(2024, 4, 10, 12, 34, 56, 789789)
|
|
243
|
+
assert select(t['ts6'] == ts_literal) == expected.filter(pc.field('ts6') == ts_literal)
|
|
244
|
+
|
|
245
|
+
ts_literal = dt.datetime(2024, 4, 10, 12, 34, 56, 789789)
|
|
246
|
+
assert select(t['ts9'] == ts_literal) == expected.filter(pc.field('ts9') == ts_literal)
|
|
201
247
|
|
|
202
248
|
|
|
203
249
|
def test_filters(session, clean_bucket_name):
|
|
@@ -248,7 +294,7 @@ def test_filters(session, clean_bucket_name):
|
|
|
248
294
|
assert select(((t['a'] > 111) | (t['a'] < 333)) & (t['b'] < 2.5)) == expected.filter(((pc.field('a') > 111) | (pc.field('a') < 333)) & (pc.field('b') < 2.5))
|
|
249
295
|
with pytest.raises(NotImplementedError):
|
|
250
296
|
assert select((t['a'] > 111) | (t['b'] > 0) | (t['s'] < 'ccc')) == expected.filter((pc.field('a') > 111) | (pc.field('b') > 0) | (pc.field('s') < 'ccc'))
|
|
251
|
-
assert select((t['a'] > 111) | (t['a'] < 333) | (t['a'] == 777)
|
|
297
|
+
assert select((t['a'] > 111) | (t['a'] < 333) | (t['a'] == 777)) == expected.filter((pc.field('a') > 111) | (pc.field('a') < 333) | (pc.field('a') == 777))
|
|
252
298
|
|
|
253
299
|
assert select(t['s'].isnull()) == expected.filter(pc.field('s').is_null())
|
|
254
300
|
assert select((t['s'].isnull()) | (t['s'] == 'bb')) == expected.filter((pc.field('s').is_null()) | (pc.field('s') == 'bb'))
|
|
@@ -259,26 +305,6 @@ def test_filters(session, clean_bucket_name):
|
|
|
259
305
|
assert select(t['s'].contains('y')) == expected.filter(pc.field('s') == 'xyz')
|
|
260
306
|
|
|
261
307
|
|
|
262
|
-
def test_duckdb(session, clean_bucket_name):
|
|
263
|
-
columns = pa.schema([
|
|
264
|
-
('a', pa.int32()),
|
|
265
|
-
('b', pa.float64()),
|
|
266
|
-
])
|
|
267
|
-
data = pa.table(schema=columns, data=[
|
|
268
|
-
[111, 222, 333],
|
|
269
|
-
[0.5, 1.5, 2.5],
|
|
270
|
-
])
|
|
271
|
-
with prepare_data(session, clean_bucket_name, 's', 't', data) as t:
|
|
272
|
-
conn = duckdb.connect()
|
|
273
|
-
batches = t.select(columns=['a'], predicate=(t['b'] < 2)) # noqa: F841
|
|
274
|
-
actual = conn.execute('SELECT max(a) as "a_max" FROM batches').arrow()
|
|
275
|
-
expected = (data
|
|
276
|
-
.filter(pc.field('b') < 2)
|
|
277
|
-
.group_by([])
|
|
278
|
-
.aggregate([('a', 'max')]))
|
|
279
|
-
assert actual == expected
|
|
280
|
-
|
|
281
|
-
|
|
282
308
|
def test_parquet_export(session, clean_bucket_name):
|
|
283
309
|
with session.transaction() as tx:
|
|
284
310
|
s = tx.bucket(clean_bucket_name).create_schema('s1')
|
|
@@ -297,8 +323,7 @@ def test_parquet_export(session, clean_bucket_name):
|
|
|
297
323
|
['a', 'b'],
|
|
298
324
|
])
|
|
299
325
|
expected = pa.Table.from_batches([rb])
|
|
300
|
-
|
|
301
|
-
assert rb.to_pylist() == [0, 1]
|
|
326
|
+
t.insert(rb)
|
|
302
327
|
actual = pa.Table.from_batches(t.select())
|
|
303
328
|
assert actual == expected
|
|
304
329
|
|
|
@@ -312,6 +337,7 @@ def test_parquet_export(session, clean_bucket_name):
|
|
|
312
337
|
|
|
313
338
|
assert expected == pq.read_table(parquet_file.name)
|
|
314
339
|
|
|
340
|
+
|
|
315
341
|
def test_errors(session, clean_bucket_name):
|
|
316
342
|
with pytest.raises(errors.MissingSchema):
|
|
317
343
|
with session.transaction() as tx:
|
|
@@ -331,7 +357,8 @@ def test_errors(session, clean_bucket_name):
|
|
|
331
357
|
('s', pa.utf8()),
|
|
332
358
|
])
|
|
333
359
|
s.create_table('t1', columns)
|
|
334
|
-
s.drop()
|
|
360
|
+
s.drop() # cannot drop schema without dropping its tables first
|
|
361
|
+
|
|
335
362
|
|
|
336
363
|
def test_rename_schema(session, clean_bucket_name):
|
|
337
364
|
|
|
@@ -389,20 +416,21 @@ def test_rename_table(session, clean_bucket_name):
|
|
|
389
416
|
s.table('t')
|
|
390
417
|
t = s.table('t2')
|
|
391
418
|
|
|
392
|
-
#assert that other transactions are isolated
|
|
419
|
+
# assert that other transactions are isolated
|
|
393
420
|
with pytest.raises(errors.MissingTable):
|
|
394
421
|
tx2.bucket(clean_bucket_name).schema('s').table('t2')
|
|
395
422
|
tx2.bucket(clean_bucket_name).schema('s').table('t')
|
|
396
423
|
|
|
397
424
|
with session.transaction() as tx:
|
|
398
425
|
s = tx.bucket(clean_bucket_name).schema('s')
|
|
399
|
-
#assert that new transactions see the change
|
|
426
|
+
# assert that new transactions see the change
|
|
400
427
|
with pytest.raises(errors.MissingTable):
|
|
401
428
|
s.table('t')
|
|
402
429
|
t = s.table('t2')
|
|
403
430
|
t.drop()
|
|
404
431
|
s.drop()
|
|
405
432
|
|
|
433
|
+
|
|
406
434
|
def test_add_column(session, clean_bucket_name):
|
|
407
435
|
columns = pa.schema([
|
|
408
436
|
('a', pa.int16()),
|
|
@@ -425,18 +453,18 @@ def test_add_column(session, clean_bucket_name):
|
|
|
425
453
|
# in which it was added
|
|
426
454
|
assert t.arrow_schema == new_schema
|
|
427
455
|
|
|
428
|
-
#assert that other transactions are isolated
|
|
456
|
+
# assert that other transactions are isolated
|
|
429
457
|
assert tx2.bucket(clean_bucket_name).schema('s').table('t').arrow_schema == columns
|
|
430
458
|
|
|
431
|
-
|
|
432
459
|
with session.transaction() as tx:
|
|
433
460
|
s = tx.bucket(clean_bucket_name).schema('s')
|
|
434
461
|
t = s.table('t')
|
|
435
|
-
#assert that new transactions see the change
|
|
462
|
+
# assert that new transactions see the change
|
|
436
463
|
assert t.arrow_schema == new_schema
|
|
437
464
|
t.drop()
|
|
438
465
|
s.drop()
|
|
439
466
|
|
|
467
|
+
|
|
440
468
|
def test_drop_column(session, clean_bucket_name):
|
|
441
469
|
columns = pa.schema([
|
|
442
470
|
('a', pa.int16()),
|
|
@@ -460,31 +488,32 @@ def test_drop_column(session, clean_bucket_name):
|
|
|
460
488
|
# in which it was added
|
|
461
489
|
assert t.arrow_schema == new_schema
|
|
462
490
|
|
|
463
|
-
#assert that other transactions are isolated
|
|
491
|
+
# assert that other transactions are isolated
|
|
464
492
|
assert tx2.bucket(clean_bucket_name).schema('s').table('t').arrow_schema == columns
|
|
465
493
|
|
|
466
|
-
|
|
467
494
|
with session.transaction() as tx:
|
|
468
495
|
s = tx.bucket(clean_bucket_name).schema('s')
|
|
469
496
|
t = s.table('t')
|
|
470
|
-
#assert that new transactions see the change
|
|
497
|
+
# assert that new transactions see the change
|
|
471
498
|
assert t.arrow_schema == new_schema
|
|
472
499
|
t.drop()
|
|
473
500
|
s.drop()
|
|
474
501
|
|
|
502
|
+
|
|
475
503
|
def test_rename_column(session, clean_bucket_name):
|
|
476
504
|
columns = pa.schema([
|
|
477
505
|
('a', pa.int16()),
|
|
478
506
|
('b', pa.float32()),
|
|
479
507
|
('s', pa.utf8()),
|
|
480
508
|
])
|
|
481
|
-
|
|
509
|
+
|
|
510
|
+
def prepare_rename_column(schema: pa.Schema, old_name: str, new_name: str) -> pa.Schema:
|
|
482
511
|
field_idx = schema.get_field_index(old_name)
|
|
483
512
|
column_to_rename = schema.field(field_idx)
|
|
484
513
|
renamed_column = column_to_rename.with_name(new_name)
|
|
485
514
|
return schema.set(field_idx, renamed_column)
|
|
486
515
|
|
|
487
|
-
new_schema = prepare_rename_column(columns,'a','aaa')
|
|
516
|
+
new_schema = prepare_rename_column(columns, 'a', 'aaa')
|
|
488
517
|
|
|
489
518
|
with session.transaction() as tx:
|
|
490
519
|
s = tx.bucket(clean_bucket_name).create_schema('s')
|
|
@@ -499,10 +528,10 @@ def test_rename_column(session, clean_bucket_name):
|
|
|
499
528
|
# in which it was added
|
|
500
529
|
assert t.arrow_schema == new_schema
|
|
501
530
|
|
|
502
|
-
#assert that other transactions are isolated
|
|
531
|
+
# assert that other transactions are isolated
|
|
503
532
|
assert tx2.bucket(clean_bucket_name).schema('s').table('t').arrow_schema == columns
|
|
504
533
|
|
|
505
|
-
#assert that new transactions see the change
|
|
534
|
+
# assert that new transactions see the change
|
|
506
535
|
with session.transaction() as tx:
|
|
507
536
|
s = tx.bucket(clean_bucket_name).schema('s')
|
|
508
537
|
t = s.table('t')
|
|
@@ -517,7 +546,7 @@ def test_rename_column(session, clean_bucket_name):
|
|
|
517
546
|
t1 = tx1.bucket(clean_bucket_name).schema('s').table('t')
|
|
518
547
|
t2 = tx2.bucket(clean_bucket_name).schema('s').table('t')
|
|
519
548
|
t1.rename_column('b', 'bb')
|
|
520
|
-
with pytest.raises(HTTPError, match
|
|
549
|
+
with pytest.raises(HTTPError, match='409 Client Error: Conflict'):
|
|
521
550
|
t2.rename_column('b', 'bbb')
|
|
522
551
|
|
|
523
552
|
with session.transaction() as tx:
|
|
@@ -533,6 +562,7 @@ def test_rename_column(session, clean_bucket_name):
|
|
|
533
562
|
t.drop()
|
|
534
563
|
s.drop()
|
|
535
564
|
|
|
565
|
+
|
|
536
566
|
def test_select_stop(session, clean_bucket_name):
|
|
537
567
|
columns = pa.schema([
|
|
538
568
|
('a', pa.uint8()),
|
|
@@ -555,15 +585,16 @@ def test_select_stop(session, clean_bucket_name):
|
|
|
555
585
|
qc = QueryConfig(num_sub_splits=2, num_splits=4, num_row_groups_per_sub_split=1)
|
|
556
586
|
with session.transaction() as tx:
|
|
557
587
|
t = tx.bucket(clean_bucket_name).schema('s').table('t')
|
|
558
|
-
t.
|
|
559
|
-
qc.data_endpoints = list(t.stats.endpoints) * 2
|
|
588
|
+
qc.data_endpoints = list(t.get_stats().endpoints) * 2
|
|
560
589
|
|
|
561
590
|
# Duplicate the table until it is large enough to generate enough batches
|
|
562
591
|
while num_rows < (qc.num_sub_splits * qc.num_splits) * ROWS_PER_GROUP:
|
|
592
|
+
# We need two separate transactions to prevent an infinite loop that may happen
|
|
593
|
+
# while appending and reading the same table using a single transaction.
|
|
563
594
|
with session.transaction() as tx_read, session.transaction() as tx_write:
|
|
564
595
|
t_read = tx_read.bucket(clean_bucket_name).schema('s').table('t')
|
|
565
596
|
t_write = tx_write.bucket(clean_bucket_name).schema('s').table('t')
|
|
566
|
-
for batch in t_read.select(['a'],config=qc):
|
|
597
|
+
for batch in t_read.select(['a'], config=qc):
|
|
567
598
|
t_write.insert(batch)
|
|
568
599
|
num_rows = num_rows * 2
|
|
569
600
|
log.info("Num rows: %d", num_rows)
|
|
@@ -580,11 +611,12 @@ def test_select_stop(session, clean_bucket_name):
|
|
|
580
611
|
# If this assert triggers it just means that the test assumptions about how
|
|
581
612
|
# the tabular server splits the batches is not true anymore and we need to
|
|
582
613
|
# rewrite the test.
|
|
583
|
-
assert read_batches == qc.num_splits*qc.num_sub_splits
|
|
584
|
-
qc.query_id = str(random.randint(0,2**32))
|
|
614
|
+
assert read_batches == qc.num_splits * qc.num_sub_splits
|
|
615
|
+
qc.query_id = str(random.randint(0, 2**32))
|
|
585
616
|
log.info("query id is: %s", qc.query_id)
|
|
617
|
+
|
|
586
618
|
def active_threads():
|
|
587
|
-
log.debug("%s",[t.getName() for t in threading.enumerate() if t.is_alive()])
|
|
619
|
+
log.debug("%s", [t.getName() for t in threading.enumerate() if t.is_alive()])
|
|
588
620
|
return sum([1 if t.is_alive() and qc.query_id in t.getName() else 0 for t in threading.enumerate()])
|
|
589
621
|
|
|
590
622
|
assert active_threads() == 0
|
vastdb/tests/util.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from contextlib import contextmanager
|
|
3
|
+
|
|
4
|
+
log = logging.getLogger(__name__)
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@contextmanager
|
|
8
|
+
def prepare_data(session, clean_bucket_name, schema_name, table_name, arrow_table):
|
|
9
|
+
with session.transaction() as tx:
|
|
10
|
+
s = tx.bucket(clean_bucket_name).create_schema(schema_name)
|
|
11
|
+
t = s.create_table(table_name, arrow_table.schema)
|
|
12
|
+
t.insert(arrow_table)
|
|
13
|
+
yield t
|
|
14
|
+
t.drop()
|
|
15
|
+
s.drop()
|
vastdb/transaction.py
CHANGED
|
@@ -6,22 +6,23 @@ A transcation is used as a context manager, since every Database-related operati
|
|
|
6
6
|
tx.bucket("bucket").create_schema("schema")
|
|
7
7
|
"""
|
|
8
8
|
|
|
9
|
-
|
|
9
|
+
import logging
|
|
10
|
+
from dataclasses import dataclass
|
|
11
|
+
from typing import Optional
|
|
10
12
|
|
|
11
13
|
import botocore
|
|
12
14
|
|
|
13
|
-
from
|
|
14
|
-
import logging
|
|
15
|
-
|
|
15
|
+
from . import bucket, errors, schema, session, table
|
|
16
16
|
|
|
17
17
|
log = logging.getLogger(__name__)
|
|
18
18
|
|
|
19
|
+
|
|
19
20
|
@dataclass
|
|
20
21
|
class Transaction:
|
|
21
22
|
"""A holder of a single VAST transaction."""
|
|
22
23
|
|
|
23
24
|
_rpc: "session.Session"
|
|
24
|
-
txid: int = None
|
|
25
|
+
txid: Optional[int] = None
|
|
25
26
|
|
|
26
27
|
def __enter__(self):
|
|
27
28
|
"""Create a transaction and store its ID."""
|
|
@@ -32,12 +33,14 @@ class Transaction:
|
|
|
32
33
|
|
|
33
34
|
def __exit__(self, exc_type, exc_value, exc_traceback):
|
|
34
35
|
"""On success, the transaction is committed. Otherwise, it is rolled back."""
|
|
36
|
+
txid = self.txid
|
|
37
|
+
self.txid = None
|
|
35
38
|
if (exc_type, exc_value, exc_traceback) == (None, None, None):
|
|
36
|
-
log.debug("committing txid=%016x",
|
|
37
|
-
self._rpc.api.commit_transaction(
|
|
39
|
+
log.debug("committing txid=%016x", txid)
|
|
40
|
+
self._rpc.api.commit_transaction(txid)
|
|
38
41
|
else:
|
|
39
|
-
log.exception("rolling back txid=%016x due to:",
|
|
40
|
-
self._rpc.api.rollback_transaction(
|
|
42
|
+
log.exception("rolling back txid=%016x due to:", txid)
|
|
43
|
+
self._rpc.api.rollback_transaction(txid)
|
|
41
44
|
|
|
42
45
|
def __repr__(self):
|
|
43
46
|
"""Don't show the session details."""
|
|
@@ -53,3 +56,9 @@ class Transaction:
|
|
|
53
56
|
raise errors.MissingBucket(name)
|
|
54
57
|
raise
|
|
55
58
|
return bucket.Bucket(name, self)
|
|
59
|
+
|
|
60
|
+
def catalog(self, fail_if_missing=True) -> Optional["table.Table"]:
|
|
61
|
+
"""Return VAST Catalog table."""
|
|
62
|
+
b = bucket.Bucket("vast-big-catalog-bucket", self)
|
|
63
|
+
s = schema.Schema("vast_big_catalog_schema", b)
|
|
64
|
+
return s.table(name="vast_big_catalog_table", fail_if_missing=fail_if_missing)
|
vastdb/util.py
CHANGED
|
@@ -1,18 +1,20 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from typing import Callable
|
|
2
|
+
from typing import Callable, List, Optional
|
|
3
3
|
|
|
4
4
|
import pyarrow as pa
|
|
5
5
|
import pyarrow.parquet as pq
|
|
6
6
|
|
|
7
7
|
from .errors import InvalidArgument
|
|
8
8
|
from .schema import Schema
|
|
9
|
-
from .table import Table
|
|
9
|
+
from .table import ImportConfig, Table
|
|
10
10
|
|
|
11
11
|
log = logging.getLogger(__name__)
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
def create_table_from_files(
|
|
15
|
-
schema: Schema, table_name: str, parquet_files: [str],
|
|
15
|
+
schema: Schema, table_name: str, parquet_files: List[str],
|
|
16
|
+
schema_merge_func: Optional[Callable] = None,
|
|
17
|
+
config: Optional[ImportConfig] = None) -> Table:
|
|
16
18
|
if not schema_merge_func:
|
|
17
19
|
schema_merge_func = default_schema_merge
|
|
18
20
|
else:
|
|
@@ -32,7 +34,7 @@ def create_table_from_files(
|
|
|
32
34
|
table = schema.create_table(table_name, current_schema)
|
|
33
35
|
|
|
34
36
|
log.info("Starting import of %d files to table: %s", len(parquet_files), table)
|
|
35
|
-
table.import_files(parquet_files)
|
|
37
|
+
table.import_files(parquet_files, config=config)
|
|
36
38
|
log.info("Finished import of %d files to table: %s", len(parquet_files), table)
|
|
37
39
|
return table
|
|
38
40
|
|
|
@@ -1,12 +1,11 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: vastdb
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.2
|
|
4
4
|
Summary: VAST Data SDK
|
|
5
5
|
Home-page: https://github.com/vast-data/vastdb_sdk
|
|
6
6
|
Author: VAST DATA
|
|
7
7
|
Author-email: hello@vastdata.com
|
|
8
8
|
License: Copyright (C) VAST Data Ltd.
|
|
9
|
-
Platform: UNKNOWN
|
|
10
9
|
Classifier: Development Status :: 4 - Beta
|
|
11
10
|
Classifier: License :: OSI Approved :: Apache Software License
|
|
12
11
|
Classifier: Programming Language :: Python :: 3
|
|
@@ -34,5 +33,3 @@ and [VAST Catalog](https://vastdata.com/blog/vast-catalog-treat-your-file-system
|
|
|
34
33
|
enabling schema and table management, efficient ingest, query and modification of columnar data.
|
|
35
34
|
|
|
36
35
|
For more details, see [our whitepaper](https://vastdata.com/whitepaper/#TheVASTDataBase).
|
|
37
|
-
|
|
38
|
-
|
|
@@ -148,24 +148,29 @@ vast_flatbuf/tabular/ObjectDetails.py,sha256=qW0WtbkCYYE_L-Kw6VNRDCLYaRm5lKvTbLN
|
|
|
148
148
|
vast_flatbuf/tabular/S3File.py,sha256=KC9c2oS5-JXwTTriUVFdjOvRG0B54Cq9kviSDZY3NI0,4450
|
|
149
149
|
vast_flatbuf/tabular/VipRange.py,sha256=_BJd1RRZAcK76T9vlsHzXKYVsPVaz6WTEAqStMQCAUQ,2069
|
|
150
150
|
vast_flatbuf/tabular/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
151
|
-
vastdb/__init__.py,sha256=
|
|
152
|
-
vastdb/bucket.py,sha256=
|
|
153
|
-
vastdb/
|
|
154
|
-
vastdb/
|
|
155
|
-
vastdb/
|
|
156
|
-
vastdb/
|
|
157
|
-
vastdb/
|
|
158
|
-
vastdb/
|
|
159
|
-
vastdb/
|
|
151
|
+
vastdb/__init__.py,sha256=cMJtZuJ0IL9aKyM3DUWqTCzuP1H1MXXVivKKE1-q0DY,292
|
|
152
|
+
vastdb/bucket.py,sha256=xtKs7S4w0jmI4MujDWH3HDI-iEgbq5Xqqsod-tw4zSo,2991
|
|
153
|
+
vastdb/conftest.py,sha256=pKpo_46Vq4QHzTDQAFxasrVhnZ2V2L-y6IMLxojxaFM,2132
|
|
154
|
+
vastdb/errors.py,sha256=fxpKSxjEgoJZuBtEGWzTW9lpDlEjuzgpgXwAQc1W6BQ,3436
|
|
155
|
+
vastdb/internal_commands.py,sha256=3F6FiYu-Ama1zBO7hENPxCaQYJT8mcZP6rSQvtI7Sks,101273
|
|
156
|
+
vastdb/schema.py,sha256=MrQr-WIrES8KcQ0V6cJkRRp_-9jj9FboyrBnkNBsw-8,3324
|
|
157
|
+
vastdb/session.py,sha256=VZOFGZbAdr5Tl4cp88VRQYnR4Q16UNuYjSmX_QPW1II,1718
|
|
158
|
+
vastdb/table.py,sha256=bdx3C1iWiFivKmtifH7MyG7TMqnVVIU91as-_hMn1rE,20532
|
|
159
|
+
vastdb/transaction.py,sha256=1uCSHXqWcwsMJv6DuNx4WyQMGUm8P-RCCqYdBdUGusI,2196
|
|
160
|
+
vastdb/util.py,sha256=Tjj6p4gqabK5G21uWuCiuYM9FaaR04_Zk5X8NWtcdj8,3022
|
|
161
|
+
vastdb/bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
162
|
+
vastdb/bench/test_perf.py,sha256=iHE3E60fvyU5SBDHPi4h03Dj6QcY6VI9l9mMhgNMtPc,1117
|
|
160
163
|
vastdb/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
161
|
-
vastdb/tests/
|
|
162
|
-
vastdb/tests/test_imports.py,sha256=
|
|
163
|
-
vastdb/tests/
|
|
164
|
-
vastdb/tests/
|
|
165
|
-
vastdb/tests/
|
|
166
|
-
vastdb/tests/
|
|
167
|
-
vastdb
|
|
168
|
-
vastdb
|
|
169
|
-
vastdb-0.1.
|
|
170
|
-
vastdb-0.1.
|
|
171
|
-
vastdb-0.1.
|
|
164
|
+
vastdb/tests/test_duckdb.py,sha256=KDuv4PrjGEwChCGHG36xNT2JiFlBOt6K3DQ3L06Kq-A,1913
|
|
165
|
+
vastdb/tests/test_imports.py,sha256=fDUjO5U-5i4QTIMoNnSSW4X_ZnOStLbx0mJkNq2pj9Q,5033
|
|
166
|
+
vastdb/tests/test_nested.py,sha256=3kejEvtSqV0LrUgb1QglRjrlxnKI4_AXTFw2nE7Q520,951
|
|
167
|
+
vastdb/tests/test_projections.py,sha256=_cDNfD5zTwbCXLk6uGpPUWGN0P-4HElu5OjubWu-Jg0,1255
|
|
168
|
+
vastdb/tests/test_sanity.py,sha256=ixx0QPo73hLHjAa7bByFXjS1XST0WvmSwLEpgnHh_JY,2960
|
|
169
|
+
vastdb/tests/test_schemas.py,sha256=b-JpYHOFYVTdE570_La7O2RWf8BGN-q8KDXNXeC8CSg,1724
|
|
170
|
+
vastdb/tests/test_tables.py,sha256=TXM4LSBvPb3EEu7XScZ5iEiu_zhHClq61W18EQodxw8,25667
|
|
171
|
+
vastdb/tests/util.py,sha256=NaCzKymEGy1xuiyMxyt2_0frKVfVk9iGrFwLf3GHjTI,435
|
|
172
|
+
vastdb-0.1.2.dist-info/LICENSE,sha256=obffan7LYrq7hLHNrY7vHcn2pKUTBUYXMKu-VOAvDxU,11333
|
|
173
|
+
vastdb-0.1.2.dist-info/METADATA,sha256=edJPdDWmHj6tRHRR97eSppfN9_4ARfIr0jS9HMjHfSQ,1311
|
|
174
|
+
vastdb-0.1.2.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
|
175
|
+
vastdb-0.1.2.dist-info/top_level.txt,sha256=Vsj2MKtlhPg0J4so64slQtnwjhgoPmJgcG-6YcVAwVc,20
|
|
176
|
+
vastdb-0.1.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|