PyPI - vastdb - Versions diffs - 0.0.5.3__py3-none-any.whl → 0.1.1__py3-none-any.whl - Mend

vastdb 0.0.5.3py3-none-any.whl → 0.1.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

vast_flatbuf/tabular/GetTableStatsResponse.py +45 -1
vast_flatbuf/tabular/VipRange.py +56 -0
vastdb/__init__.py +7 -0
vastdb/bench/test_perf.py +29 -0
vastdb/bucket.py +85 -0
vastdb/{tests/conftest.py → conftest.py} +29 -14
vastdb/errors.py +175 -0
vastdb/{api.py → internal_commands.py} +373 -875
vastdb/schema.py +85 -0
vastdb/session.py +47 -0
vastdb/table.py +483 -0
vastdb/tests/test_imports.py +123 -0
vastdb/tests/test_nested.py +28 -0
vastdb/tests/test_projections.py +42 -0
vastdb/tests/test_sanity.py +34 -15
vastdb/tests/test_schemas.py +30 -6
vastdb/tests/test_tables.py +628 -13
vastdb/tests/util.py +18 -0
vastdb/transaction.py +54 -0
vastdb/util.py +11 -10
vastdb-0.1.1.dist-info/METADATA +38 -0
{vastdb-0.0.5.3.dist-info → vastdb-0.1.1.dist-info}/RECORD +26 -31
vast_protobuf/substrait/__init__.py +0 -0
vast_protobuf/substrait/algebra_pb2.py +0 -1344
vast_protobuf/substrait/capabilities_pb2.py +0 -46
vast_protobuf/substrait/ddl_pb2.py +0 -57
vast_protobuf/substrait/extended_expression_pb2.py +0 -49
vast_protobuf/substrait/extensions/__init__.py +0 -0
vast_protobuf/substrait/extensions/extensions_pb2.py +0 -89
vast_protobuf/substrait/function_pb2.py +0 -168
vast_protobuf/substrait/parameterized_types_pb2.py +0 -181
vast_protobuf/substrait/plan_pb2.py +0 -67
vast_protobuf/substrait/type_expressions_pb2.py +0 -198
vast_protobuf/substrait/type_pb2.py +0 -350
vast_protobuf/tabular/__init__.py +0 -0
vast_protobuf/tabular/rpc_pb2.py +0 -344
vastdb/bench_scan.py +0 -45
vastdb/tests/test_create_table_from_parquets.py +0 -50
vastdb/v2.py +0 -360
vastdb-0.0.5.3.dist-info/METADATA +0 -47
{vast_protobuf → vastdb/bench}/__init__.py +0 -0
{vastdb-0.0.5.3.dist-info → vastdb-0.1.1.dist-info}/LICENSE +0 -0
{vastdb-0.0.5.3.dist-info → vastdb-0.1.1.dist-info}/WHEEL +0 -0
{vastdb-0.0.5.3.dist-info → vastdb-0.1.1.dist-info}/top_level.txt +0 -0

vastdb/tests/test_imports.py ADDED Viewed

@@ -0,0 +1,123 @@
+import logging
+from tempfile import NamedTemporaryFile
+import pyarrow as pa
+import pyarrow.parquet as pq
+import pytest
+from vastdb import util
+from vastdb.errors import ImportFilesError, InvalidArgument
+log = logging.getLogger(__name__)
+def test_parallel_imports(session, clean_bucket_name, s3):
+    num_rows = 1000
+    num_files = 53
+    ds = {'num': [i for i in range(num_rows)]}
+    files = []
+    table = pa.Table.from_pydict(ds)
+    with NamedTemporaryFile() as f:
+        pq.write_table(table, f.name)
+        s3.put_object(Bucket=clean_bucket_name, Key='prq0', Body=f)
+        files.append(f'/{clean_bucket_name}/prq0')
+    for i in range(1, num_files):
+        copy_source = {
+            'Bucket': clean_bucket_name,
+            'Key': 'prq0'
+        }
+        s3.copy(copy_source, clean_bucket_name, f'prq{i}')
+        files.append(f'/{clean_bucket_name}/prq{i}')
+    with session.transaction() as tx:
+        b = tx.bucket(clean_bucket_name)
+        s = b.create_schema('s1')
+        t = s.create_table('t1', pa.schema([('num', pa.int64())]))
+        log.info("Starting import of %d files", num_files)
+        t.import_files(files)
+        arrow_table = pa.Table.from_batches(t.select(columns=['num']))
+        assert arrow_table.num_rows == num_rows * num_files
+        arrow_table = pa.Table.from_batches(t.select(columns=['num'], predicate=t['num'] == 100))
+        assert arrow_table.num_rows == num_files
+def test_create_table_from_files(session, clean_bucket_name, s3):
+    datasets = [
+        {'num': [0],
+         'varch': ['z']},
+        {'num': [1, 2, 3, 4, 5],
+         'varch': ['a', 'b', 'c', 'd', 'e']},
+        {'num': [1, 2, 3, 4, 5],
+         'bool': [True, False, None, None, False],
+         'varch': ['a', 'b', 'c', 'd', 'e']},
+        {'num': [1, 2],
+         'bool': [True, True]},
+        {'varch': ['a', 'b', 'c'],
+         'mismatch': [1, 2, 3]}
+    ]
+    for i, ds in enumerate(datasets):
+        table = pa.Table.from_pydict(ds)
+        with NamedTemporaryFile() as f:
+            pq.write_table(table, f.name)
+            s3.put_object(Bucket=clean_bucket_name, Key=f'prq{i}', Body=f)
+    same_schema_files = [f'/{clean_bucket_name}/prq{i}' for i in range(2)]
+    contained_schema_files = [f'/{clean_bucket_name}/prq{i}' for i in range(4)]
+    different_schema_files = [f'/{clean_bucket_name}/prq{i}' for i in range(5)]
+    with session.transaction() as tx:
+        b = tx.bucket(clean_bucket_name)
+        s = b.create_schema('s1')
+        t = util.create_table_from_files(s, 't1', contained_schema_files)
+        assert len(t.arrow_schema) == 3
+        assert t.arrow_schema == pa.schema([('num', pa.int64()), ('bool', pa.bool_()), ('varch', pa.string())])
+        with pytest.raises(InvalidArgument):
+            util.create_table_from_files(s, 't2', different_schema_files)
+        with pytest.raises(InvalidArgument):
+            util.create_table_from_files(s, 't2', contained_schema_files, schema_merge_func=util.strict_schema_merge)
+        util.create_table_from_files(s, 't2', different_schema_files, schema_merge_func=util.union_schema_merge)
+        util.create_table_from_files(s, 't3', same_schema_files, schema_merge_func=util.strict_schema_merge)
+def test_import_name_mismatch_error(session, clean_bucket_name, s3):
+    ds = {'varch': ['a', 'b', 'c'],
+          'invalid_column_name': [1, 2, 3]}
+    prq_name = 'name_mismatch.parquet'
+    table = pa.Table.from_pydict(ds)
+    with NamedTemporaryFile() as f:
+        pq.write_table(table, f.name)
+        s3.put_object(Bucket=clean_bucket_name, Key=prq_name, Body=f)
+    with session.transaction() as tx:
+        b = tx.bucket(clean_bucket_name)
+        s = b.create_schema('s1')
+        t = s.create_table('t1', pa.schema([('varch', pa.string()), ('num', pa.int64())]))
+        with pytest.raises(ImportFilesError) as exc:
+            t.import_files([f'/{clean_bucket_name}/{prq_name}'])
+        assert exc.value.error_dict['object_name'] == prq_name
+        assert exc.value.error_dict['res'] == 'TabularMismatchColumnName'
+        assert 'invalid_column_name' in exc.value.error_dict['err_msg']
+def test_import_type_mismatch_error(session, clean_bucket_name, s3):
+    ds = {'varch': ['a', 'b', 'c'],
+          'num_type_mismatch': [1, 2, 3]}
+    prq_name = 'type_mismatch.parquet'
+    table = pa.Table.from_pydict(ds)
+    with NamedTemporaryFile() as f:
+        pq.write_table(table, f.name)
+        s3.put_object(Bucket=clean_bucket_name, Key=prq_name, Body=f)
+    with session.transaction() as tx:
+        b = tx.bucket(clean_bucket_name)
+        s = b.create_schema('s1')
+        t = s.create_table('t1', pa.schema([('varch', pa.string()), ('num_type_mismatch', pa.bool_())]))
+        with pytest.raises(ImportFilesError) as exc:
+            t.import_files([f'/{clean_bucket_name}/{prq_name}'])
+        assert exc.value.error_dict['object_name'] == prq_name
+        assert exc.value.error_dict['res'] == 'TabularMismatchColumnType'
+        assert 'num_type_mismatch' in exc.value.error_dict['err_msg']

vastdb/tests/test_nested.py ADDED Viewed

@@ -0,0 +1,28 @@
+import itertools
+import pyarrow as pa
+from .util import prepare_data
+def test_nested(session, clean_bucket_name):
+    columns = pa.schema([
+        ('l', pa.list_(pa.int8())),
+        ('m', pa.map_(pa.utf8(), pa.float64())),
+        ('s', pa.struct([('x', pa.int16()), ('y', pa.int32())])),
+    ])
+    expected = pa.table(schema=columns, data=[
+        [[1], [], [2, 3], None],
+        [None, {'a': 2.5}, {'b': 0.25, 'c': 0.025}, {}],
+        [{'x': 1, 'y': None}, None, {'x': 2, 'y': 3}, {'x': None, 'y': 4}],
+    ])
+    with prepare_data(session, clean_bucket_name, 's', 't', expected) as t:
+        actual = pa.Table.from_batches(t.select())
+        assert actual == expected
+        names = [f.name for f in columns]
+        for n in range(len(names) + 1):
+            for cols in itertools.permutations(names, n):
+                actual = pa.Table.from_batches(t.select(columns=cols))
+                assert actual == expected.select(cols)

vastdb/tests/test_projections.py ADDED Viewed

@@ -0,0 +1,42 @@
+import logging
+import pyarrow as pa
+log = logging.getLogger(__name__)
+def test_basic_projections(session, clean_bucket_name):
+    with session.transaction() as tx:
+        s = tx.bucket(clean_bucket_name).create_schema('s1')
+        columns = pa.schema([
+            ('a', pa.int8()),
+            ('b', pa.int16()),
+            ('c', pa.string()),
+            ('d', pa.int16()),
+            ('e', pa.int64()),
+            ('s', pa.struct([('x', pa.int8()), ('y', pa.int16())]))
+        ])
+        assert s.tables() == []
+        t = s.create_table('t1', columns)
+        assert s.tables() == [t]
+        sorted_columns = ['a']
+        unsorted_columns = ['b']
+        p1 = t.create_projection('p1', sorted_columns, unsorted_columns)
+        sorted_columns = ['b']
+        unsorted_columns = ['c', 'd']
+        p2 = t.create_projection('p2', sorted_columns, unsorted_columns)
+        projs = t.projections()
+        assert projs == [t.projection('p1'), t.projection('p2')]
+        p1 = t.projection('p1')
+        assert p1.name == 'p1'
+        p2 = t.projection('p2')
+        assert p2.name == 'p2'
+        p1.rename('p_new')
+        p2.drop()
+        projs = t.projections()
+        assert len(projs) == 1
+        assert projs[0].name == 'p_new'

vastdb/tests/test_sanity.py CHANGED Viewed

@@ -1,26 +1,44 @@
+import contextlib
 import logging
 import threading
-from http.server import HTTPServer, BaseHTTPRequestHandler
-from vastdb import api
+from http.server import BaseHTTPRequestHandler, HTTPServer
 from itertools import cycle
+import pytest
+import requests
+import vastdb
 log = logging.getLogger(__name__)
-def test_hello_world(rpc):
-    with rpc.transaction() as tx:
+def test_hello_world(session):
+    with session.transaction() as tx:
         assert tx.txid is not None
+def test_bad_credentials(session):
+    bad_session = vastdb.connect(access='BAD', secret='BAD', endpoint=session.api.url)
+    with pytest.raises(vastdb.errors.Forbidden):
+        with bad_session.transaction():
+            pass
+def test_bad_endpoint(session):
+    with pytest.raises(requests.exceptions.ConnectionError):
+        vastdb.connect(access='BAD', secret='BAD', endpoint='http://invalid-host-name-for-tests:12345')
 def test_version_extraction():
     # A list of version and expected version parsed by API
     TEST_CASES = [
-            (None, None), # vast server without version in header
-            ("5", None),                                    # major only is not supported
-            ("5.2", "5.2"),                                 # major.minor
-            ("5.2.0", "5.2.0"),                             # major.minor.patch
-            ("5.2.0.0", "5.2.0.0"),                         # major.minor.patch.protocol
-            ("5.2.0.0 some other things", "5.2.0.0"),       # Test forward comptibility 1
-            ("5.2.0.0.20 some other things", "5.2.0.0"),    # Test forward comptibility 2
+            (None, None),                                   # vast server without version in header
+            ("5", None),                                    # major
+            ("5.2", None),                                  # major.minor
+            ("5.2.0", None),                                # major.minor.patch
+            ("5.2.0.10", "5.2.0.10"),                       # major.minor.patch.protocol
+            ("5.2.0.10 some other things", None),           # suffix
+            ("5.2.0.10.20", None),                          # extra version
     ]
     # Mock OPTIONS handle that cycles through the test cases response
@@ -55,9 +73,10 @@ def test_version_extraction():
     server_thread.start()
     try:
-        for test_case in TEST_CASES:
-            tester = api.VastdbApi(endpoint=f"http://localhost:{httpd.server_port}", access_key="abc", secret_key="abc")
-            assert tester.vast_version == test_case[1]
+        for _, expected in TEST_CASES:
+            with (pytest.raises(NotImplementedError) if expected is None else contextlib.nullcontext()):
+                s = vastdb.connect(endpoint=f"http://localhost:{httpd.server_port}", access="abc", secret="abc")
+                assert s.api.vast_version == expected
     finally:
         # make sure we shut the server down no matter what
         httpd.shutdown()

vastdb/tests/test_schemas.py CHANGED Viewed

@@ -1,8 +1,10 @@
 import pytest
+from .. import errors
-def test_schemas(rpc, clean_bucket_name):
-    with rpc.transaction() as tx:
+def test_schemas(session, clean_bucket_name):
+    with session.transaction() as tx:
         b = tx.bucket(clean_bucket_name)
         assert b.schemas() == []
@@ -19,8 +21,24 @@ def test_schemas(rpc, clean_bucket_name):
         assert b.schemas() == []
-def test_commits_and_rollbacks(rpc, clean_bucket_name):
-    with rpc.transaction() as tx:
+def test_exists(session, clean_bucket_name):
+    with session.transaction() as tx:
+        b = tx.bucket(clean_bucket_name)
+        assert b.schemas() == []
+        s = b.create_schema('s1')
+        assert b.schemas() == [s]
+        with pytest.raises(errors.SchemaExists):
+            b.create_schema('s1')
+        assert b.schemas() == [s]
+        assert b.create_schema('s1', fail_if_exists=False) == s
+        assert b.schemas() == [s]
+def test_commits_and_rollbacks(session, clean_bucket_name):
+    with session.transaction() as tx:
         b = tx.bucket(clean_bucket_name)
         assert b.schemas() == []
         b.create_schema("s3")
@@ -28,12 +46,18 @@ def test_commits_and_rollbacks(rpc, clean_bucket_name):
         # implicit commit
     with pytest.raises(ZeroDivisionError):
-        with rpc.transaction() as tx:
+        with session.transaction() as tx:
             b = tx.bucket(clean_bucket_name)
             b.schema("s3").drop()
             assert b.schemas() == []
             1/0  # rollback schema dropping
-    with rpc.transaction() as tx:
+    with session.transaction() as tx:
         b = tx.bucket(clean_bucket_name)
         assert b.schemas() != []
+def test_list_snapshots(session, clean_bucket_name):
+    with session.transaction() as tx:
+        b = tx.bucket(clean_bucket_name)
+        s = b.snapshots()
+        assert s == []

vastdb 0.0.5.3__py3-none-any.whl → 0.1.1__py3-none-any.whl

vastdb 0.0.5.3py3-none-any.whl → 0.1.1py3-none-any.whl