PyPI - vastdb - Versions diffs - 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl - Mend

vastdb 0.1.0py3-none-any.whl → 0.1.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

vastdb/__init__.py +6 -2
vastdb/bench/__init__.py +0 -0
vastdb/bench/test_perf.py +29 -0
vastdb/bucket.py +21 -9
vastdb/{tests/conftest.py → conftest.py} +21 -7
vastdb/errors.py +32 -9
vastdb/internal_commands.py +236 -278
vastdb/schema.py +22 -9
vastdb/session.py +2 -3
vastdb/table.py +57 -57
vastdb/tests/test_duckdb.py +61 -0
vastdb/tests/test_imports.py +3 -5
vastdb/tests/test_nested.py +28 -0
vastdb/tests/test_projections.py +3 -1
vastdb/tests/test_sanity.py +5 -6
vastdb/tests/test_schemas.py +20 -1
vastdb/tests/test_tables.py +108 -76
vastdb/tests/util.py +15 -0
vastdb/transaction.py +18 -9
vastdb/util.py +6 -4
{vastdb-0.1.0.dist-info → vastdb-0.1.2.dist-info}/METADATA +1 -4
{vastdb-0.1.0.dist-info → vastdb-0.1.2.dist-info}/RECORD +25 -20
{vastdb-0.1.0.dist-info → vastdb-0.1.2.dist-info}/WHEEL +1 -1
{vastdb-0.1.0.dist-info → vastdb-0.1.2.dist-info}/LICENSE +0 -0
{vastdb-0.1.0.dist-info → vastdb-0.1.2.dist-info}/top_level.txt +0 -0

vastdb/__init__.py CHANGED Viewed

@@ -1,7 +1,11 @@
 """VAST Database Python SDK."""
+import functools
 from . import session
 # A helper function, useful as a short-hand for Session c-tor: `session = vastdb.connect(...)`
-connect = session.Session
-connect.__name__ = 'connect'
+@functools.wraps(session.Session)
+def connect(*args, **kwargs):  # noqa: D103
+    return session.Session(*args, **kwargs)

vastdb/bench/__init__.py ADDED Viewed

File without changes

vastdb/bench/test_perf.py ADDED Viewed

@@ -0,0 +1,29 @@
+import logging
+import time
+import pyarrow as pa
+import pytest
+from vastdb import util
+from vastdb.table import ImportConfig, QueryConfig
+log = logging.getLogger(__name__)
+@pytest.mark.benchmark
+def test_bench(session, clean_bucket_name, parquets_path, crater_path):
+    files = [str(parquets_path / f) for f in (parquets_path.glob('**/*.pq'))]
+    with session.transaction() as tx:
+        b = tx.bucket(clean_bucket_name)
+        s = b.create_schema('s1')
+        t = util.create_table_from_files(s, 't1', files, config=ImportConfig(import_concurrency=8))
+        config = QueryConfig(num_splits=8, num_sub_splits=4)
+        s = time.time()
+        pa_table = pa.Table.from_batches(t.select(columns=['sid'], predicate=t['sid'] == 10033007, config=config))
+        e = time.time()
+        log.info("'SELECT sid from TABLE WHERE sid = 10033007' returned in %s seconds.", e - s)
+        if crater_path:
+            with open(f'{crater_path}/bench_results', 'a') as f:
+                f.write(f"'SELECT sid FROM TABLE WHERE sid = 10033007' returned in {e - s} seconds")
+        assert pa_table.num_rows == 255_075

vastdb/bucket.py CHANGED Viewed

@@ -4,10 +4,14 @@ VAST S3 buckets can be used to create Database schemas and tables.
 It is possible to list and access VAST snapshots generated over a bucket.
 """
+import logging
+from dataclasses import dataclass
+from typing import TYPE_CHECKING, List, Optional
 from . import errors, schema, transaction
-from dataclasses import dataclass
-import logging
+if TYPE_CHECKING:
+    from .schema import Schema
 log = logging.getLogger(__name__)
@@ -27,30 +31,38 @@ class Bucket:
     name: str
     tx: "transaction.Transaction"
-    def create_schema(self, path: str) -> "schema.Schema":
+    def create_schema(self, path: str, fail_if_exists=True) -> "Schema":
         """Create a new schema (a container of tables) under this bucket."""
+        if current := self.schema(path, fail_if_missing=False):
+            if fail_if_exists:
+                raise errors.SchemaExists(self.name, path)
+            else:
+                return current
         self.tx._rpc.api.create_schema(self.name, path, txid=self.tx.txid)
         log.info("Created schema: %s", path)
-        return self.schema(path)
+        return self.schema(path)  # type: ignore[return-value]
-    def schema(self, path: str) -> "schema.Schema":
+    def schema(self, path: str, fail_if_missing=True) -> Optional["Schema"]:
         """Get a specific schema (a container of tables) under this bucket."""
         s = self.schemas(path)
         log.debug("schema: %s", s)
         if not s:
-            raise errors.MissingSchema(self.name, path)
+            if fail_if_missing:
+                raise errors.MissingSchema(self.name, path)
+            else:
+                return None
         assert len(s) == 1, f"Expected to receive only a single schema, but got: {len(s)}. ({s})"
         log.debug("Found schema: %s", s[0].name)
         return s[0]
-    def schemas(self, name: str = None) -> ["schema.Schema"]:
+    def schemas(self, name: Optional[str] = None) -> List["Schema"]:
         """List bucket's schemas."""
         schemas = []
         next_key = 0
         exact_match = bool(name)
         log.debug("list schemas param: schema=%s, exact_match=%s", name, exact_match)
         while True:
-            bucket_name, curr_schemas, next_key, is_truncated, _ = \
+            _bucket_name, curr_schemas, next_key, is_truncated, _ = \
                 self.tx._rpc.api.list_schemas(bucket=self.name, next_key=next_key, txid=self.tx.txid,
                                                name_prefix=name, exact_match=exact_match)
             if not curr_schemas:
@@ -61,7 +73,7 @@ class Bucket:
         return [schema.Schema(name=name, bucket=self) for name, *_ in schemas]
-    def snapshots(self) -> [Snapshot]:
+    def snapshots(self) -> List[Snapshot]:
         """List bucket's snapshots."""
         snapshots = []
         next_key = 0

vastdb/{tests/conftest.py → conftest.py} RENAMED Viewed

@@ -1,15 +1,19 @@
-import vastdb
+import os
+from pathlib import Path
-import pytest
 import boto3
-import os
+import pytest
+import vastdb
 def pytest_addoption(parser):
-    parser.addoption("--tabular-bucket-name", help="Name of the S3 bucket with Tabular enabled", default = "vastdb")
-    parser.addoption("--tabular-access-key", help="Access key with Tabular permissions (AWS_ACCESS_KEY_ID)", default = os.environ.get("AWS_ACCESS_KEY_ID", None))
-    parser.addoption("--tabular-secret-key", help="Secret key with Tabular permissions (AWS_SECRET_ACCESS_KEY)" , default = os.environ.get("AWS_SECRET_ACCESS_KEY", None))
-    parser.addoption("--tabular-endpoint-url", help="Tabular server endpoint", default = "http://localhost:9090")
+    parser.addoption("--tabular-bucket-name", help="Name of the S3 bucket with Tabular enabled", default="vastdb")
+    parser.addoption("--tabular-access-key", help="Access key with Tabular permissions (AWS_ACCESS_KEY_ID)", default=os.environ.get("AWS_ACCESS_KEY_ID", None))
+    parser.addoption("--tabular-secret-key", help="Secret key with Tabular permissions (AWS_SECRET_ACCESS_KEY)", default=os.environ.get("AWS_SECRET_ACCESS_KEY", None))
+    parser.addoption("--tabular-endpoint-url", help="Tabular server endpoint", default="http://localhost:9090")
+    parser.addoption("--data-path", help="Data files location", default=None)
+    parser.addoption("--crater-path", help="Save benchmark results in a dedicated location", default=None)
 @pytest.fixture(scope="session")
@@ -44,3 +48,13 @@ def s3(request):
         aws_access_key_id=request.config.getoption("--tabular-access-key"),
         aws_secret_access_key=request.config.getoption("--tabular-secret-key"),
         endpoint_url=request.config.getoption("--tabular-endpoint-url"))
+@pytest.fixture(scope="function")
+def parquets_path(request):
+    return Path(request.config.getoption("--data-path"))
+@pytest.fixture(scope="function")
+def crater_path(request):
+    return request.config.getoption("--crater-path")

vastdb/errors.py CHANGED Viewed

@@ -1,9 +1,9 @@
 import logging
-import requests
 import xml.etree.ElementTree
-from enum import Enum
 from dataclasses import dataclass
+from enum import Enum
+import requests
 class HttpStatus(Enum):
@@ -26,6 +26,7 @@ log = logging.getLogger(__name__)
 class HttpError(Exception):
     code: str
     message: str
+    method: str
     url: str
     status: int  # HTTP status
     headers: requests.structures.CaseInsensitiveDict  # HTTP response headers
@@ -88,6 +89,10 @@ class Missing(Exception):
     pass
+class MissingTransaction(Missing):
+    pass
 @dataclass
 class MissingBucket(Missing):
     bucket: str
@@ -114,6 +119,23 @@ class MissingProjection(Missing):
     projection: str
+class Exists(Exception):
+    pass
+@dataclass
+class SchemaExists(Exists):
+    bucket: str
+    schema: str
+@dataclass
+class TableExists(Exists):
+    bucket: str
+    schema: str
+    table: str
 ERROR_TYPES_MAP = {
     HttpStatus.BAD_REQUEST: BadRequest,
     HttpStatus.FOBIDDEN: Forbidden,
@@ -133,21 +155,22 @@ def from_response(res: requests.Response):
     log.debug("response: url='%s', code=%s, headers=%s, body='%s'", res.request.url, res.status_code, res.headers, res.text)
     # try to parse S3 XML response for the error details:
-    code = None
-    message = None
+    code_str = None
+    message_str = None
     if res.text:
         try:
             root = xml.etree.ElementTree.fromstring(res.text)
             code = root.find('Code')
-            code = code.text if code is not None else None
+            code_str = code.text if code is not None else None
             message = root.find('Message')
-            message = message.text if message is not None else None
+            message_str = message.text if message is not None else None
         except xml.etree.ElementTree.ParseError:
             log.debug("invalid XML: %r", res.text)
     kwargs = dict(
-        code=code,
-        message=message,
+        code=code_str,
+        message=message_str,
+        method=res.request.method,
         url=res.request.url,
         status=res.status_code,
         headers=res.headers,

vastdb 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl

vastdb 0.1.0py3-none-any.whl → 0.1.2py3-none-any.whl