vastdb 0.0.5.3__py3-none-any.whl → 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. vast_flatbuf/tabular/GetTableStatsResponse.py +45 -1
  2. vast_flatbuf/tabular/VipRange.py +56 -0
  3. vastdb/__init__.py +7 -0
  4. vastdb/bucket.py +77 -0
  5. vastdb/errors.py +158 -0
  6. vastdb/{api.py → internal_commands.py} +280 -746
  7. vastdb/schema.py +77 -0
  8. vastdb/session.py +48 -0
  9. vastdb/table.py +480 -0
  10. vastdb/tests/conftest.py +15 -14
  11. vastdb/tests/test_imports.py +125 -0
  12. vastdb/tests/test_projections.py +41 -0
  13. vastdb/tests/test_sanity.py +36 -16
  14. vastdb/tests/test_schemas.py +12 -6
  15. vastdb/tests/test_tables.py +581 -13
  16. vastdb/transaction.py +55 -0
  17. vastdb/util.py +8 -8
  18. vastdb-0.1.0.dist-info/METADATA +38 -0
  19. {vastdb-0.0.5.3.dist-info → vastdb-0.1.0.dist-info}/RECORD +22 -31
  20. vast_protobuf/__init__.py +0 -0
  21. vast_protobuf/substrait/__init__.py +0 -0
  22. vast_protobuf/substrait/algebra_pb2.py +0 -1344
  23. vast_protobuf/substrait/capabilities_pb2.py +0 -46
  24. vast_protobuf/substrait/ddl_pb2.py +0 -57
  25. vast_protobuf/substrait/extended_expression_pb2.py +0 -49
  26. vast_protobuf/substrait/extensions/__init__.py +0 -0
  27. vast_protobuf/substrait/extensions/extensions_pb2.py +0 -89
  28. vast_protobuf/substrait/function_pb2.py +0 -168
  29. vast_protobuf/substrait/parameterized_types_pb2.py +0 -181
  30. vast_protobuf/substrait/plan_pb2.py +0 -67
  31. vast_protobuf/substrait/type_expressions_pb2.py +0 -198
  32. vast_protobuf/substrait/type_pb2.py +0 -350
  33. vast_protobuf/tabular/__init__.py +0 -0
  34. vast_protobuf/tabular/rpc_pb2.py +0 -344
  35. vastdb/bench_scan.py +0 -45
  36. vastdb/tests/test_create_table_from_parquets.py +0 -50
  37. vastdb/v2.py +0 -360
  38. vastdb-0.0.5.3.dist-info/METADATA +0 -47
  39. {vastdb-0.0.5.3.dist-info → vastdb-0.1.0.dist-info}/LICENSE +0 -0
  40. {vastdb-0.0.5.3.dist-info → vastdb-0.1.0.dist-info}/WHEEL +0 -0
  41. {vastdb-0.0.5.3.dist-info → vastdb-0.1.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,125 @@
1
+ import pytest
2
+
3
+ from tempfile import NamedTemporaryFile
4
+ import logging
5
+
6
+ import pyarrow as pa
7
+ import pyarrow.parquet as pq
8
+
9
+ from vastdb.errors import InvalidArgument, ImportFilesError
10
+ from vastdb import util
11
+
12
+
13
+ log = logging.getLogger(__name__)
14
+
15
+
16
+ def test_parallel_imports(session, clean_bucket_name, s3):
17
+ num_rows = 1000
18
+ num_files = 53
19
+ ds = {'num': [i for i in range(num_rows)]}
20
+ files = []
21
+ table = pa.Table.from_pydict(ds)
22
+ with NamedTemporaryFile() as f:
23
+ pq.write_table(table, f.name)
24
+ s3.put_object(Bucket=clean_bucket_name, Key='prq0', Body=f)
25
+ files.append(f'/{clean_bucket_name}/prq0')
26
+
27
+ for i in range(1, num_files):
28
+ copy_source = {
29
+ 'Bucket': clean_bucket_name,
30
+ 'Key': 'prq0'
31
+ }
32
+ s3.copy(copy_source, clean_bucket_name, f'prq{i}')
33
+ files.append(f'/{clean_bucket_name}/prq{i}')
34
+
35
+ with session.transaction() as tx:
36
+ b = tx.bucket(clean_bucket_name)
37
+ s = b.create_schema('s1')
38
+ t = s.create_table('t1', pa.schema([('num', pa.int64())]))
39
+ log.info("Starting import of %d files", num_files)
40
+ t.import_files(files)
41
+ arrow_table = pa.Table.from_batches(t.select(columns=['num']))
42
+ assert arrow_table.num_rows == num_rows * num_files
43
+ arrow_table = pa.Table.from_batches(t.select(columns=['num'], predicate=t['num'] == 100))
44
+ assert arrow_table.num_rows == num_files
45
+
46
+
47
+ def test_create_table_from_files(session, clean_bucket_name, s3):
48
+ datasets = [
49
+ {'num': [0],
50
+ 'varch': ['z']},
51
+ {'num': [1, 2, 3, 4, 5],
52
+ 'varch': ['a', 'b', 'c', 'd', 'e']},
53
+ {'num': [1, 2, 3, 4, 5],
54
+ 'bool': [True, False, None, None, False],
55
+ 'varch': ['a', 'b', 'c', 'd', 'e']},
56
+ {'num': [1, 2],
57
+ 'bool': [True, True]},
58
+ {'varch': ['a', 'b', 'c'],
59
+ 'mismatch': [1, 2, 3]}
60
+ ]
61
+ for i, ds in enumerate(datasets):
62
+ table = pa.Table.from_pydict(ds)
63
+ with NamedTemporaryFile() as f:
64
+ pq.write_table(table, f.name)
65
+ s3.put_object(Bucket=clean_bucket_name, Key=f'prq{i}', Body=f)
66
+
67
+ same_schema_files = [f'/{clean_bucket_name}/prq{i}' for i in range(2)]
68
+ contained_schema_files = [f'/{clean_bucket_name}/prq{i}' for i in range(4)]
69
+ different_schema_files = [f'/{clean_bucket_name}/prq{i}' for i in range(5)]
70
+
71
+ with session.transaction() as tx:
72
+ b = tx.bucket(clean_bucket_name)
73
+ s = b.create_schema('s1')
74
+ t = util.create_table_from_files(s, 't1', contained_schema_files)
75
+ assert len(t.arrow_schema) == 3
76
+ assert t.arrow_schema == pa.schema([('num', pa.int64()), ('bool', pa.bool_()), ('varch', pa.string())])
77
+
78
+ with pytest.raises(InvalidArgument):
79
+ util.create_table_from_files(s, 't2', different_schema_files)
80
+
81
+ with pytest.raises(InvalidArgument):
82
+ util.create_table_from_files(s, 't2', contained_schema_files, schema_merge_func=util.strict_schema_merge)
83
+
84
+ util.create_table_from_files(s, 't2', different_schema_files, schema_merge_func=util.union_schema_merge)
85
+ util.create_table_from_files(s, 't3', same_schema_files, schema_merge_func=util.strict_schema_merge)
86
+
87
+
88
+ def test_import_name_mismatch_error(session, clean_bucket_name, s3):
89
+ ds = {'varch': ['a', 'b', 'c'],
90
+ 'invalid_column_name': [1, 2, 3]}
91
+ prq_name = 'name_mismatch.parquet'
92
+ table = pa.Table.from_pydict(ds)
93
+ with NamedTemporaryFile() as f:
94
+ pq.write_table(table, f.name)
95
+ s3.put_object(Bucket=clean_bucket_name, Key=prq_name, Body=f)
96
+
97
+ with session.transaction() as tx:
98
+ b = tx.bucket(clean_bucket_name)
99
+ s = b.create_schema('s1')
100
+ t = s.create_table('t1', pa.schema([('varch', pa.string()), ('num', pa.int64())]))
101
+ with pytest.raises(ImportFilesError) as exc:
102
+ t.import_files([f'/{clean_bucket_name}/{prq_name}'])
103
+ assert exc.value.error_dict['object_name'] == prq_name
104
+ assert exc.value.error_dict['res'] == 'TabularMismatchColumnName'
105
+ assert 'invalid_column_name' in exc.value.error_dict['err_msg']
106
+
107
+
108
+ def test_import_type_mismatch_error(session, clean_bucket_name, s3):
109
+ ds = {'varch': ['a', 'b', 'c'],
110
+ 'num_type_mismatch': [1, 2, 3]}
111
+ prq_name = 'type_mismatch.parquet'
112
+ table = pa.Table.from_pydict(ds)
113
+ with NamedTemporaryFile() as f:
114
+ pq.write_table(table, f.name)
115
+ s3.put_object(Bucket=clean_bucket_name, Key=prq_name, Body=f)
116
+
117
+ with session.transaction() as tx:
118
+ b = tx.bucket(clean_bucket_name)
119
+ s = b.create_schema('s1')
120
+ t = s.create_table('t1', pa.schema([('varch', pa.string()), ('num_type_mismatch', pa.bool_())]))
121
+ with pytest.raises(ImportFilesError) as exc:
122
+ t.import_files([f'/{clean_bucket_name}/{prq_name}'])
123
+ assert exc.value.error_dict['object_name'] == prq_name
124
+ assert exc.value.error_dict['res'] == 'TabularMismatchColumnType'
125
+ assert 'num_type_mismatch' in exc.value.error_dict['err_msg']
@@ -0,0 +1,41 @@
1
+ import pyarrow as pa
2
+ import logging
3
+
4
+ log = logging.getLogger(__name__)
5
+
6
+ def test_basic_projections(session, clean_bucket_name):
7
+ with session.transaction() as tx:
8
+ s = tx.bucket(clean_bucket_name).create_schema('s1')
9
+ columns = pa.schema([
10
+ ('a', pa.int8()),
11
+ ('b', pa.int16()),
12
+ ('c', pa.string()),
13
+ ('d', pa.int16()),
14
+ ('e', pa.int64()),
15
+ ('s', pa.struct([('x', pa.int8()), ('y', pa.int16())]))
16
+ ])
17
+
18
+ assert s.tables() == []
19
+ t = s.create_table('t1', columns)
20
+ assert s.tables() == [t]
21
+
22
+ sorted_columns = ['a']
23
+ unsorted_columns = ['b']
24
+ p1 = t.create_projection('p1', sorted_columns, unsorted_columns)
25
+
26
+ sorted_columns = ['b']
27
+ unsorted_columns = ['c', 'd']
28
+ p2 = t.create_projection('p2', sorted_columns, unsorted_columns)
29
+
30
+ projs = t.projections()
31
+ assert projs == [t.projection('p1'), t.projection('p2')]
32
+ p1 = t.projection('p1')
33
+ assert p1.name == 'p1'
34
+ p2 = t.projection('p2')
35
+ assert p2.name == 'p2'
36
+
37
+ p1.rename('p_new')
38
+ p2.drop()
39
+ projs = t.projections()
40
+ assert len(projs) == 1
41
+ assert projs[0].name == 'p_new'
@@ -1,26 +1,45 @@
1
- import logging
2
-
3
- import threading
4
1
  from http.server import HTTPServer, BaseHTTPRequestHandler
5
- from vastdb import api
6
2
  from itertools import cycle
3
+ import logging
4
+ import threading
5
+ import contextlib
6
+
7
+ import pytest
8
+ import requests
9
+
10
+ import vastdb
11
+
7
12
 
8
13
  log = logging.getLogger(__name__)
9
14
 
10
- def test_hello_world(rpc):
11
- with rpc.transaction() as tx:
15
+
16
+ def test_hello_world(session):
17
+ with session.transaction() as tx:
12
18
  assert tx.txid is not None
13
19
 
20
+
21
+ def test_bad_credentials(session):
22
+ bad_session = vastdb.connect(access='BAD', secret='BAD', endpoint=session.api.url)
23
+ with pytest.raises(vastdb.errors.Forbidden):
24
+ with bad_session.transaction():
25
+ pass
26
+
27
+
28
+ def test_bad_endpoint(session):
29
+ with pytest.raises(requests.exceptions.ConnectionError):
30
+ vastdb.connect(access='BAD', secret='BAD', endpoint='http://invalid-host-name-for-tests:12345')
31
+
32
+
14
33
  def test_version_extraction():
15
34
  # A list of version and expected version parsed by API
16
35
  TEST_CASES = [
17
- (None, None), # vast server without version in header
18
- ("5", None), # major only is not supported
19
- ("5.2", "5.2"), # major.minor
20
- ("5.2.0", "5.2.0"), # major.minor.patch
21
- ("5.2.0.0", "5.2.0.0"), # major.minor.patch.protocol
22
- ("5.2.0.0 some other things", "5.2.0.0"), # Test forward comptibility 1
23
- ("5.2.0.0.20 some other things", "5.2.0.0"), # Test forward comptibility 2
36
+ (None, None), # vast server without version in header
37
+ ("5", None), # major
38
+ ("5.2", None), # major.minor
39
+ ("5.2.0", None), # major.minor.patch
40
+ ("5.2.0.10", "5.2.0.10"), # major.minor.patch.protocol
41
+ ("5.2.0.10 some other things", None), # suffix
42
+ ("5.2.0.10.20", None), # extra version
24
43
  ]
25
44
 
26
45
  # Mock OPTIONS handle that cycles through the test cases response
@@ -55,9 +74,10 @@ def test_version_extraction():
55
74
  server_thread.start()
56
75
 
57
76
  try:
58
- for test_case in TEST_CASES:
59
- tester = api.VastdbApi(endpoint=f"http://localhost:{httpd.server_port}", access_key="abc", secret_key="abc")
60
- assert tester.vast_version == test_case[1]
77
+ for _, expected in TEST_CASES:
78
+ with (pytest.raises(NotImplementedError) if expected is None else contextlib.nullcontext()):
79
+ s = vastdb.connect(endpoint=f"http://localhost:{httpd.server_port}", access="abc", secret="abc")
80
+ assert s.api.vast_version == expected
61
81
  finally:
62
82
  # make sure we shut the server down no matter what
63
83
  httpd.shutdown()
@@ -1,8 +1,8 @@
1
1
  import pytest
2
2
 
3
3
 
4
- def test_schemas(rpc, clean_bucket_name):
5
- with rpc.transaction() as tx:
4
+ def test_schemas(session, clean_bucket_name):
5
+ with session.transaction() as tx:
6
6
  b = tx.bucket(clean_bucket_name)
7
7
  assert b.schemas() == []
8
8
 
@@ -19,8 +19,8 @@ def test_schemas(rpc, clean_bucket_name):
19
19
  assert b.schemas() == []
20
20
 
21
21
 
22
- def test_commits_and_rollbacks(rpc, clean_bucket_name):
23
- with rpc.transaction() as tx:
22
+ def test_commits_and_rollbacks(session, clean_bucket_name):
23
+ with session.transaction() as tx:
24
24
  b = tx.bucket(clean_bucket_name)
25
25
  assert b.schemas() == []
26
26
  b.create_schema("s3")
@@ -28,12 +28,18 @@ def test_commits_and_rollbacks(rpc, clean_bucket_name):
28
28
  # implicit commit
29
29
 
30
30
  with pytest.raises(ZeroDivisionError):
31
- with rpc.transaction() as tx:
31
+ with session.transaction() as tx:
32
32
  b = tx.bucket(clean_bucket_name)
33
33
  b.schema("s3").drop()
34
34
  assert b.schemas() == []
35
35
  1/0 # rollback schema dropping
36
36
 
37
- with rpc.transaction() as tx:
37
+ with session.transaction() as tx:
38
38
  b = tx.bucket(clean_bucket_name)
39
39
  assert b.schemas() != []
40
+
41
+ def test_list_snapshots(session, clean_bucket_name):
42
+ with session.transaction() as tx:
43
+ b = tx.bucket(clean_bucket_name)
44
+ s = b.snapshots()
45
+ assert s == []