vastdb 0.0.5.2__py3-none-any.whl → 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vast_flatbuf/tabular/GetTableStatsResponse.py +45 -1
- vast_flatbuf/tabular/VipRange.py +56 -0
- vastdb/__init__.py +7 -0
- vastdb/bucket.py +77 -0
- vastdb/errors.py +158 -0
- vastdb/{api.py → internal_commands.py} +283 -747
- vastdb/schema.py +77 -0
- vastdb/session.py +48 -0
- vastdb/table.py +480 -0
- vastdb/tests/conftest.py +46 -0
- vastdb/tests/test_imports.py +125 -0
- vastdb/tests/test_projections.py +41 -0
- vastdb/tests/test_sanity.py +83 -0
- vastdb/tests/test_schemas.py +45 -0
- vastdb/tests/test_tables.py +608 -0
- vastdb/transaction.py +55 -0
- vastdb/util.py +77 -0
- vastdb-0.1.0.dist-info/METADATA +38 -0
- {vastdb-0.0.5.2.dist-info → vastdb-0.1.0.dist-info}/RECORD +23 -24
- vast_protobuf/substrait/__init__.py +0 -0
- vast_protobuf/substrait/algebra_pb2.py +0 -1344
- vast_protobuf/substrait/capabilities_pb2.py +0 -46
- vast_protobuf/substrait/ddl_pb2.py +0 -57
- vast_protobuf/substrait/extended_expression_pb2.py +0 -49
- vast_protobuf/substrait/extensions/__init__.py +0 -0
- vast_protobuf/substrait/extensions/extensions_pb2.py +0 -89
- vast_protobuf/substrait/function_pb2.py +0 -168
- vast_protobuf/substrait/parameterized_types_pb2.py +0 -181
- vast_protobuf/substrait/plan_pb2.py +0 -67
- vast_protobuf/substrait/type_expressions_pb2.py +0 -198
- vast_protobuf/substrait/type_pb2.py +0 -350
- vast_protobuf/tabular/__init__.py +0 -0
- vast_protobuf/tabular/rpc_pb2.py +0 -344
- vastdb/v2.py +0 -108
- vastdb-0.0.5.2.dist-info/METADATA +0 -47
- {vast_protobuf → vastdb/tests}/__init__.py +0 -0
- {vastdb-0.0.5.2.dist-info → vastdb-0.1.0.dist-info}/LICENSE +0 -0
- {vastdb-0.0.5.2.dist-info → vastdb-0.1.0.dist-info}/WHEEL +0 -0
- {vastdb-0.0.5.2.dist-info → vastdb-0.1.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
|
|
3
|
+
from tempfile import NamedTemporaryFile
|
|
4
|
+
import logging
|
|
5
|
+
|
|
6
|
+
import pyarrow as pa
|
|
7
|
+
import pyarrow.parquet as pq
|
|
8
|
+
|
|
9
|
+
from vastdb.errors import InvalidArgument, ImportFilesError
|
|
10
|
+
from vastdb import util
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
log = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def test_parallel_imports(session, clean_bucket_name, s3):
|
|
17
|
+
num_rows = 1000
|
|
18
|
+
num_files = 53
|
|
19
|
+
ds = {'num': [i for i in range(num_rows)]}
|
|
20
|
+
files = []
|
|
21
|
+
table = pa.Table.from_pydict(ds)
|
|
22
|
+
with NamedTemporaryFile() as f:
|
|
23
|
+
pq.write_table(table, f.name)
|
|
24
|
+
s3.put_object(Bucket=clean_bucket_name, Key='prq0', Body=f)
|
|
25
|
+
files.append(f'/{clean_bucket_name}/prq0')
|
|
26
|
+
|
|
27
|
+
for i in range(1, num_files):
|
|
28
|
+
copy_source = {
|
|
29
|
+
'Bucket': clean_bucket_name,
|
|
30
|
+
'Key': 'prq0'
|
|
31
|
+
}
|
|
32
|
+
s3.copy(copy_source, clean_bucket_name, f'prq{i}')
|
|
33
|
+
files.append(f'/{clean_bucket_name}/prq{i}')
|
|
34
|
+
|
|
35
|
+
with session.transaction() as tx:
|
|
36
|
+
b = tx.bucket(clean_bucket_name)
|
|
37
|
+
s = b.create_schema('s1')
|
|
38
|
+
t = s.create_table('t1', pa.schema([('num', pa.int64())]))
|
|
39
|
+
log.info("Starting import of %d files", num_files)
|
|
40
|
+
t.import_files(files)
|
|
41
|
+
arrow_table = pa.Table.from_batches(t.select(columns=['num']))
|
|
42
|
+
assert arrow_table.num_rows == num_rows * num_files
|
|
43
|
+
arrow_table = pa.Table.from_batches(t.select(columns=['num'], predicate=t['num'] == 100))
|
|
44
|
+
assert arrow_table.num_rows == num_files
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def test_create_table_from_files(session, clean_bucket_name, s3):
|
|
48
|
+
datasets = [
|
|
49
|
+
{'num': [0],
|
|
50
|
+
'varch': ['z']},
|
|
51
|
+
{'num': [1, 2, 3, 4, 5],
|
|
52
|
+
'varch': ['a', 'b', 'c', 'd', 'e']},
|
|
53
|
+
{'num': [1, 2, 3, 4, 5],
|
|
54
|
+
'bool': [True, False, None, None, False],
|
|
55
|
+
'varch': ['a', 'b', 'c', 'd', 'e']},
|
|
56
|
+
{'num': [1, 2],
|
|
57
|
+
'bool': [True, True]},
|
|
58
|
+
{'varch': ['a', 'b', 'c'],
|
|
59
|
+
'mismatch': [1, 2, 3]}
|
|
60
|
+
]
|
|
61
|
+
for i, ds in enumerate(datasets):
|
|
62
|
+
table = pa.Table.from_pydict(ds)
|
|
63
|
+
with NamedTemporaryFile() as f:
|
|
64
|
+
pq.write_table(table, f.name)
|
|
65
|
+
s3.put_object(Bucket=clean_bucket_name, Key=f'prq{i}', Body=f)
|
|
66
|
+
|
|
67
|
+
same_schema_files = [f'/{clean_bucket_name}/prq{i}' for i in range(2)]
|
|
68
|
+
contained_schema_files = [f'/{clean_bucket_name}/prq{i}' for i in range(4)]
|
|
69
|
+
different_schema_files = [f'/{clean_bucket_name}/prq{i}' for i in range(5)]
|
|
70
|
+
|
|
71
|
+
with session.transaction() as tx:
|
|
72
|
+
b = tx.bucket(clean_bucket_name)
|
|
73
|
+
s = b.create_schema('s1')
|
|
74
|
+
t = util.create_table_from_files(s, 't1', contained_schema_files)
|
|
75
|
+
assert len(t.arrow_schema) == 3
|
|
76
|
+
assert t.arrow_schema == pa.schema([('num', pa.int64()), ('bool', pa.bool_()), ('varch', pa.string())])
|
|
77
|
+
|
|
78
|
+
with pytest.raises(InvalidArgument):
|
|
79
|
+
util.create_table_from_files(s, 't2', different_schema_files)
|
|
80
|
+
|
|
81
|
+
with pytest.raises(InvalidArgument):
|
|
82
|
+
util.create_table_from_files(s, 't2', contained_schema_files, schema_merge_func=util.strict_schema_merge)
|
|
83
|
+
|
|
84
|
+
util.create_table_from_files(s, 't2', different_schema_files, schema_merge_func=util.union_schema_merge)
|
|
85
|
+
util.create_table_from_files(s, 't3', same_schema_files, schema_merge_func=util.strict_schema_merge)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def test_import_name_mismatch_error(session, clean_bucket_name, s3):
|
|
89
|
+
ds = {'varch': ['a', 'b', 'c'],
|
|
90
|
+
'invalid_column_name': [1, 2, 3]}
|
|
91
|
+
prq_name = 'name_mismatch.parquet'
|
|
92
|
+
table = pa.Table.from_pydict(ds)
|
|
93
|
+
with NamedTemporaryFile() as f:
|
|
94
|
+
pq.write_table(table, f.name)
|
|
95
|
+
s3.put_object(Bucket=clean_bucket_name, Key=prq_name, Body=f)
|
|
96
|
+
|
|
97
|
+
with session.transaction() as tx:
|
|
98
|
+
b = tx.bucket(clean_bucket_name)
|
|
99
|
+
s = b.create_schema('s1')
|
|
100
|
+
t = s.create_table('t1', pa.schema([('varch', pa.string()), ('num', pa.int64())]))
|
|
101
|
+
with pytest.raises(ImportFilesError) as exc:
|
|
102
|
+
t.import_files([f'/{clean_bucket_name}/{prq_name}'])
|
|
103
|
+
assert exc.value.error_dict['object_name'] == prq_name
|
|
104
|
+
assert exc.value.error_dict['res'] == 'TabularMismatchColumnName'
|
|
105
|
+
assert 'invalid_column_name' in exc.value.error_dict['err_msg']
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def test_import_type_mismatch_error(session, clean_bucket_name, s3):
|
|
109
|
+
ds = {'varch': ['a', 'b', 'c'],
|
|
110
|
+
'num_type_mismatch': [1, 2, 3]}
|
|
111
|
+
prq_name = 'type_mismatch.parquet'
|
|
112
|
+
table = pa.Table.from_pydict(ds)
|
|
113
|
+
with NamedTemporaryFile() as f:
|
|
114
|
+
pq.write_table(table, f.name)
|
|
115
|
+
s3.put_object(Bucket=clean_bucket_name, Key=prq_name, Body=f)
|
|
116
|
+
|
|
117
|
+
with session.transaction() as tx:
|
|
118
|
+
b = tx.bucket(clean_bucket_name)
|
|
119
|
+
s = b.create_schema('s1')
|
|
120
|
+
t = s.create_table('t1', pa.schema([('varch', pa.string()), ('num_type_mismatch', pa.bool_())]))
|
|
121
|
+
with pytest.raises(ImportFilesError) as exc:
|
|
122
|
+
t.import_files([f'/{clean_bucket_name}/{prq_name}'])
|
|
123
|
+
assert exc.value.error_dict['object_name'] == prq_name
|
|
124
|
+
assert exc.value.error_dict['res'] == 'TabularMismatchColumnType'
|
|
125
|
+
assert 'num_type_mismatch' in exc.value.error_dict['err_msg']
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
import pyarrow as pa
|
|
2
|
+
import logging
|
|
3
|
+
|
|
4
|
+
log = logging.getLogger(__name__)
|
|
5
|
+
|
|
6
|
+
def test_basic_projections(session, clean_bucket_name):
|
|
7
|
+
with session.transaction() as tx:
|
|
8
|
+
s = tx.bucket(clean_bucket_name).create_schema('s1')
|
|
9
|
+
columns = pa.schema([
|
|
10
|
+
('a', pa.int8()),
|
|
11
|
+
('b', pa.int16()),
|
|
12
|
+
('c', pa.string()),
|
|
13
|
+
('d', pa.int16()),
|
|
14
|
+
('e', pa.int64()),
|
|
15
|
+
('s', pa.struct([('x', pa.int8()), ('y', pa.int16())]))
|
|
16
|
+
])
|
|
17
|
+
|
|
18
|
+
assert s.tables() == []
|
|
19
|
+
t = s.create_table('t1', columns)
|
|
20
|
+
assert s.tables() == [t]
|
|
21
|
+
|
|
22
|
+
sorted_columns = ['a']
|
|
23
|
+
unsorted_columns = ['b']
|
|
24
|
+
p1 = t.create_projection('p1', sorted_columns, unsorted_columns)
|
|
25
|
+
|
|
26
|
+
sorted_columns = ['b']
|
|
27
|
+
unsorted_columns = ['c', 'd']
|
|
28
|
+
p2 = t.create_projection('p2', sorted_columns, unsorted_columns)
|
|
29
|
+
|
|
30
|
+
projs = t.projections()
|
|
31
|
+
assert projs == [t.projection('p1'), t.projection('p2')]
|
|
32
|
+
p1 = t.projection('p1')
|
|
33
|
+
assert p1.name == 'p1'
|
|
34
|
+
p2 = t.projection('p2')
|
|
35
|
+
assert p2.name == 'p2'
|
|
36
|
+
|
|
37
|
+
p1.rename('p_new')
|
|
38
|
+
p2.drop()
|
|
39
|
+
projs = t.projections()
|
|
40
|
+
assert len(projs) == 1
|
|
41
|
+
assert projs[0].name == 'p_new'
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
from http.server import HTTPServer, BaseHTTPRequestHandler
|
|
2
|
+
from itertools import cycle
|
|
3
|
+
import logging
|
|
4
|
+
import threading
|
|
5
|
+
import contextlib
|
|
6
|
+
|
|
7
|
+
import pytest
|
|
8
|
+
import requests
|
|
9
|
+
|
|
10
|
+
import vastdb
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
log = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def test_hello_world(session):
|
|
17
|
+
with session.transaction() as tx:
|
|
18
|
+
assert tx.txid is not None
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def test_bad_credentials(session):
|
|
22
|
+
bad_session = vastdb.connect(access='BAD', secret='BAD', endpoint=session.api.url)
|
|
23
|
+
with pytest.raises(vastdb.errors.Forbidden):
|
|
24
|
+
with bad_session.transaction():
|
|
25
|
+
pass
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def test_bad_endpoint(session):
|
|
29
|
+
with pytest.raises(requests.exceptions.ConnectionError):
|
|
30
|
+
vastdb.connect(access='BAD', secret='BAD', endpoint='http://invalid-host-name-for-tests:12345')
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def test_version_extraction():
|
|
34
|
+
# A list of version and expected version parsed by API
|
|
35
|
+
TEST_CASES = [
|
|
36
|
+
(None, None), # vast server without version in header
|
|
37
|
+
("5", None), # major
|
|
38
|
+
("5.2", None), # major.minor
|
|
39
|
+
("5.2.0", None), # major.minor.patch
|
|
40
|
+
("5.2.0.10", "5.2.0.10"), # major.minor.patch.protocol
|
|
41
|
+
("5.2.0.10 some other things", None), # suffix
|
|
42
|
+
("5.2.0.10.20", None), # extra version
|
|
43
|
+
]
|
|
44
|
+
|
|
45
|
+
# Mock OPTIONS handle that cycles through the test cases response
|
|
46
|
+
class MockOptionsHandler(BaseHTTPRequestHandler):
|
|
47
|
+
versions_iterator = cycle(TEST_CASES)
|
|
48
|
+
|
|
49
|
+
def __init__(self, *args) -> None:
|
|
50
|
+
super().__init__(*args)
|
|
51
|
+
|
|
52
|
+
def do_OPTIONS(self):
|
|
53
|
+
self.send_response(204)
|
|
54
|
+
self.end_headers()
|
|
55
|
+
|
|
56
|
+
def version_string(self):
|
|
57
|
+
version = next(self.versions_iterator)[0]
|
|
58
|
+
return f"vast {version}" if version else "vast"
|
|
59
|
+
|
|
60
|
+
def log_message(self, format, *args):
|
|
61
|
+
log.debug(format,*args)
|
|
62
|
+
|
|
63
|
+
# start the server on localhost on some available port port
|
|
64
|
+
server_address =('localhost', 0)
|
|
65
|
+
httpd = HTTPServer(server_address, MockOptionsHandler)
|
|
66
|
+
|
|
67
|
+
def start_http_server_in_thread():
|
|
68
|
+
log.info(f"Mock HTTP server is running on port {httpd.server_port}")
|
|
69
|
+
httpd.serve_forever()
|
|
70
|
+
log.info("Mock HTTP server killed")
|
|
71
|
+
|
|
72
|
+
# start the server in a thread so we have the main thread to operate the API
|
|
73
|
+
server_thread = threading.Thread(target=start_http_server_in_thread)
|
|
74
|
+
server_thread.start()
|
|
75
|
+
|
|
76
|
+
try:
|
|
77
|
+
for _, expected in TEST_CASES:
|
|
78
|
+
with (pytest.raises(NotImplementedError) if expected is None else contextlib.nullcontext()):
|
|
79
|
+
s = vastdb.connect(endpoint=f"http://localhost:{httpd.server_port}", access="abc", secret="abc")
|
|
80
|
+
assert s.api.vast_version == expected
|
|
81
|
+
finally:
|
|
82
|
+
# make sure we shut the server down no matter what
|
|
83
|
+
httpd.shutdown()
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def test_schemas(session, clean_bucket_name):
|
|
5
|
+
with session.transaction() as tx:
|
|
6
|
+
b = tx.bucket(clean_bucket_name)
|
|
7
|
+
assert b.schemas() == []
|
|
8
|
+
|
|
9
|
+
s = b.create_schema('s1')
|
|
10
|
+
assert s.bucket == b
|
|
11
|
+
assert b.schemas() == [s]
|
|
12
|
+
|
|
13
|
+
s.rename('s2')
|
|
14
|
+
assert s.bucket == b
|
|
15
|
+
assert s.name == 's2'
|
|
16
|
+
assert b.schemas()[0].name == 's2'
|
|
17
|
+
|
|
18
|
+
s.drop()
|
|
19
|
+
assert b.schemas() == []
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def test_commits_and_rollbacks(session, clean_bucket_name):
|
|
23
|
+
with session.transaction() as tx:
|
|
24
|
+
b = tx.bucket(clean_bucket_name)
|
|
25
|
+
assert b.schemas() == []
|
|
26
|
+
b.create_schema("s3")
|
|
27
|
+
assert b.schemas() != []
|
|
28
|
+
# implicit commit
|
|
29
|
+
|
|
30
|
+
with pytest.raises(ZeroDivisionError):
|
|
31
|
+
with session.transaction() as tx:
|
|
32
|
+
b = tx.bucket(clean_bucket_name)
|
|
33
|
+
b.schema("s3").drop()
|
|
34
|
+
assert b.schemas() == []
|
|
35
|
+
1/0 # rollback schema dropping
|
|
36
|
+
|
|
37
|
+
with session.transaction() as tx:
|
|
38
|
+
b = tx.bucket(clean_bucket_name)
|
|
39
|
+
assert b.schemas() != []
|
|
40
|
+
|
|
41
|
+
def test_list_snapshots(session, clean_bucket_name):
|
|
42
|
+
with session.transaction() as tx:
|
|
43
|
+
b = tx.bucket(clean_bucket_name)
|
|
44
|
+
s = b.snapshots()
|
|
45
|
+
assert s == []
|