vastdb 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vastdb/__init__.py +6 -2
- vastdb/bench/__init__.py +0 -0
- vastdb/bench/test_perf.py +29 -0
- vastdb/bucket.py +21 -9
- vastdb/{tests/conftest.py → conftest.py} +21 -7
- vastdb/errors.py +32 -9
- vastdb/internal_commands.py +236 -278
- vastdb/schema.py +22 -9
- vastdb/session.py +2 -3
- vastdb/table.py +57 -57
- vastdb/tests/test_duckdb.py +61 -0
- vastdb/tests/test_imports.py +3 -5
- vastdb/tests/test_nested.py +28 -0
- vastdb/tests/test_projections.py +3 -1
- vastdb/tests/test_sanity.py +5 -6
- vastdb/tests/test_schemas.py +20 -1
- vastdb/tests/test_tables.py +108 -76
- vastdb/tests/util.py +15 -0
- vastdb/transaction.py +18 -9
- vastdb/util.py +6 -4
- {vastdb-0.1.0.dist-info → vastdb-0.1.2.dist-info}/METADATA +1 -4
- {vastdb-0.1.0.dist-info → vastdb-0.1.2.dist-info}/RECORD +25 -20
- {vastdb-0.1.0.dist-info → vastdb-0.1.2.dist-info}/WHEEL +1 -1
- {vastdb-0.1.0.dist-info → vastdb-0.1.2.dist-info}/LICENSE +0 -0
- {vastdb-0.1.0.dist-info → vastdb-0.1.2.dist-info}/top_level.txt +0 -0
vastdb/__init__.py
CHANGED
|
@@ -1,7 +1,11 @@
|
|
|
1
1
|
"""VAST Database Python SDK."""
|
|
2
2
|
|
|
3
|
+
import functools
|
|
4
|
+
|
|
3
5
|
from . import session
|
|
4
6
|
|
|
7
|
+
|
|
5
8
|
# A helper function, useful as a short-hand for Session c-tor: `session = vastdb.connect(...)`
|
|
6
|
-
|
|
7
|
-
connect
|
|
9
|
+
@functools.wraps(session.Session)
|
|
10
|
+
def connect(*args, **kwargs): # noqa: D103
|
|
11
|
+
return session.Session(*args, **kwargs)
|
vastdb/bench/__init__.py
ADDED
|
File without changes
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import time
|
|
3
|
+
|
|
4
|
+
import pyarrow as pa
|
|
5
|
+
import pytest
|
|
6
|
+
|
|
7
|
+
from vastdb import util
|
|
8
|
+
from vastdb.table import ImportConfig, QueryConfig
|
|
9
|
+
|
|
10
|
+
log = logging.getLogger(__name__)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@pytest.mark.benchmark
|
|
14
|
+
def test_bench(session, clean_bucket_name, parquets_path, crater_path):
|
|
15
|
+
files = [str(parquets_path / f) for f in (parquets_path.glob('**/*.pq'))]
|
|
16
|
+
|
|
17
|
+
with session.transaction() as tx:
|
|
18
|
+
b = tx.bucket(clean_bucket_name)
|
|
19
|
+
s = b.create_schema('s1')
|
|
20
|
+
t = util.create_table_from_files(s, 't1', files, config=ImportConfig(import_concurrency=8))
|
|
21
|
+
config = QueryConfig(num_splits=8, num_sub_splits=4)
|
|
22
|
+
s = time.time()
|
|
23
|
+
pa_table = pa.Table.from_batches(t.select(columns=['sid'], predicate=t['sid'] == 10033007, config=config))
|
|
24
|
+
e = time.time()
|
|
25
|
+
log.info("'SELECT sid from TABLE WHERE sid = 10033007' returned in %s seconds.", e - s)
|
|
26
|
+
if crater_path:
|
|
27
|
+
with open(f'{crater_path}/bench_results', 'a') as f:
|
|
28
|
+
f.write(f"'SELECT sid FROM TABLE WHERE sid = 10033007' returned in {e - s} seconds")
|
|
29
|
+
assert pa_table.num_rows == 255_075
|
vastdb/bucket.py
CHANGED
|
@@ -4,10 +4,14 @@ VAST S3 buckets can be used to create Database schemas and tables.
|
|
|
4
4
|
It is possible to list and access VAST snapshots generated over a bucket.
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
|
+
import logging
|
|
8
|
+
from dataclasses import dataclass
|
|
9
|
+
from typing import TYPE_CHECKING, List, Optional
|
|
10
|
+
|
|
7
11
|
from . import errors, schema, transaction
|
|
8
12
|
|
|
9
|
-
|
|
10
|
-
import
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
from .schema import Schema
|
|
11
15
|
|
|
12
16
|
log = logging.getLogger(__name__)
|
|
13
17
|
|
|
@@ -27,30 +31,38 @@ class Bucket:
|
|
|
27
31
|
name: str
|
|
28
32
|
tx: "transaction.Transaction"
|
|
29
33
|
|
|
30
|
-
def create_schema(self, path: str) -> "
|
|
34
|
+
def create_schema(self, path: str, fail_if_exists=True) -> "Schema":
|
|
31
35
|
"""Create a new schema (a container of tables) under this bucket."""
|
|
36
|
+
if current := self.schema(path, fail_if_missing=False):
|
|
37
|
+
if fail_if_exists:
|
|
38
|
+
raise errors.SchemaExists(self.name, path)
|
|
39
|
+
else:
|
|
40
|
+
return current
|
|
32
41
|
self.tx._rpc.api.create_schema(self.name, path, txid=self.tx.txid)
|
|
33
42
|
log.info("Created schema: %s", path)
|
|
34
|
-
return self.schema(path)
|
|
43
|
+
return self.schema(path) # type: ignore[return-value]
|
|
35
44
|
|
|
36
|
-
def schema(self, path: str) -> "
|
|
45
|
+
def schema(self, path: str, fail_if_missing=True) -> Optional["Schema"]:
|
|
37
46
|
"""Get a specific schema (a container of tables) under this bucket."""
|
|
38
47
|
s = self.schemas(path)
|
|
39
48
|
log.debug("schema: %s", s)
|
|
40
49
|
if not s:
|
|
41
|
-
|
|
50
|
+
if fail_if_missing:
|
|
51
|
+
raise errors.MissingSchema(self.name, path)
|
|
52
|
+
else:
|
|
53
|
+
return None
|
|
42
54
|
assert len(s) == 1, f"Expected to receive only a single schema, but got: {len(s)}. ({s})"
|
|
43
55
|
log.debug("Found schema: %s", s[0].name)
|
|
44
56
|
return s[0]
|
|
45
57
|
|
|
46
|
-
def schemas(self, name: str = None) -> ["
|
|
58
|
+
def schemas(self, name: Optional[str] = None) -> List["Schema"]:
|
|
47
59
|
"""List bucket's schemas."""
|
|
48
60
|
schemas = []
|
|
49
61
|
next_key = 0
|
|
50
62
|
exact_match = bool(name)
|
|
51
63
|
log.debug("list schemas param: schema=%s, exact_match=%s", name, exact_match)
|
|
52
64
|
while True:
|
|
53
|
-
|
|
65
|
+
_bucket_name, curr_schemas, next_key, is_truncated, _ = \
|
|
54
66
|
self.tx._rpc.api.list_schemas(bucket=self.name, next_key=next_key, txid=self.tx.txid,
|
|
55
67
|
name_prefix=name, exact_match=exact_match)
|
|
56
68
|
if not curr_schemas:
|
|
@@ -61,7 +73,7 @@ class Bucket:
|
|
|
61
73
|
|
|
62
74
|
return [schema.Schema(name=name, bucket=self) for name, *_ in schemas]
|
|
63
75
|
|
|
64
|
-
def snapshots(self) -> [Snapshot]:
|
|
76
|
+
def snapshots(self) -> List[Snapshot]:
|
|
65
77
|
"""List bucket's snapshots."""
|
|
66
78
|
snapshots = []
|
|
67
79
|
next_key = 0
|
|
@@ -1,15 +1,19 @@
|
|
|
1
|
-
import
|
|
1
|
+
import os
|
|
2
|
+
from pathlib import Path
|
|
2
3
|
|
|
3
|
-
import pytest
|
|
4
4
|
import boto3
|
|
5
|
-
import
|
|
5
|
+
import pytest
|
|
6
|
+
|
|
7
|
+
import vastdb
|
|
6
8
|
|
|
7
9
|
|
|
8
10
|
def pytest_addoption(parser):
|
|
9
|
-
parser.addoption("--tabular-bucket-name", help="Name of the S3 bucket with Tabular enabled", default
|
|
10
|
-
parser.addoption("--tabular-access-key", help="Access key with Tabular permissions (AWS_ACCESS_KEY_ID)", default
|
|
11
|
-
parser.addoption("--tabular-secret-key", help="Secret key with Tabular permissions (AWS_SECRET_ACCESS_KEY)"
|
|
12
|
-
parser.addoption("--tabular-endpoint-url", help="Tabular server endpoint", default
|
|
11
|
+
parser.addoption("--tabular-bucket-name", help="Name of the S3 bucket with Tabular enabled", default="vastdb")
|
|
12
|
+
parser.addoption("--tabular-access-key", help="Access key with Tabular permissions (AWS_ACCESS_KEY_ID)", default=os.environ.get("AWS_ACCESS_KEY_ID", None))
|
|
13
|
+
parser.addoption("--tabular-secret-key", help="Secret key with Tabular permissions (AWS_SECRET_ACCESS_KEY)", default=os.environ.get("AWS_SECRET_ACCESS_KEY", None))
|
|
14
|
+
parser.addoption("--tabular-endpoint-url", help="Tabular server endpoint", default="http://localhost:9090")
|
|
15
|
+
parser.addoption("--data-path", help="Data files location", default=None)
|
|
16
|
+
parser.addoption("--crater-path", help="Save benchmark results in a dedicated location", default=None)
|
|
13
17
|
|
|
14
18
|
|
|
15
19
|
@pytest.fixture(scope="session")
|
|
@@ -44,3 +48,13 @@ def s3(request):
|
|
|
44
48
|
aws_access_key_id=request.config.getoption("--tabular-access-key"),
|
|
45
49
|
aws_secret_access_key=request.config.getoption("--tabular-secret-key"),
|
|
46
50
|
endpoint_url=request.config.getoption("--tabular-endpoint-url"))
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
@pytest.fixture(scope="function")
|
|
54
|
+
def parquets_path(request):
|
|
55
|
+
return Path(request.config.getoption("--data-path"))
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
@pytest.fixture(scope="function")
|
|
59
|
+
def crater_path(request):
|
|
60
|
+
return request.config.getoption("--crater-path")
|
vastdb/errors.py
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
import requests
|
|
3
2
|
import xml.etree.ElementTree
|
|
4
|
-
|
|
5
|
-
from enum import Enum
|
|
6
3
|
from dataclasses import dataclass
|
|
4
|
+
from enum import Enum
|
|
5
|
+
|
|
6
|
+
import requests
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
class HttpStatus(Enum):
|
|
@@ -26,6 +26,7 @@ log = logging.getLogger(__name__)
|
|
|
26
26
|
class HttpError(Exception):
|
|
27
27
|
code: str
|
|
28
28
|
message: str
|
|
29
|
+
method: str
|
|
29
30
|
url: str
|
|
30
31
|
status: int # HTTP status
|
|
31
32
|
headers: requests.structures.CaseInsensitiveDict # HTTP response headers
|
|
@@ -88,6 +89,10 @@ class Missing(Exception):
|
|
|
88
89
|
pass
|
|
89
90
|
|
|
90
91
|
|
|
92
|
+
class MissingTransaction(Missing):
|
|
93
|
+
pass
|
|
94
|
+
|
|
95
|
+
|
|
91
96
|
@dataclass
|
|
92
97
|
class MissingBucket(Missing):
|
|
93
98
|
bucket: str
|
|
@@ -114,6 +119,23 @@ class MissingProjection(Missing):
|
|
|
114
119
|
projection: str
|
|
115
120
|
|
|
116
121
|
|
|
122
|
+
class Exists(Exception):
|
|
123
|
+
pass
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
@dataclass
|
|
127
|
+
class SchemaExists(Exists):
|
|
128
|
+
bucket: str
|
|
129
|
+
schema: str
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
@dataclass
|
|
133
|
+
class TableExists(Exists):
|
|
134
|
+
bucket: str
|
|
135
|
+
schema: str
|
|
136
|
+
table: str
|
|
137
|
+
|
|
138
|
+
|
|
117
139
|
ERROR_TYPES_MAP = {
|
|
118
140
|
HttpStatus.BAD_REQUEST: BadRequest,
|
|
119
141
|
HttpStatus.FOBIDDEN: Forbidden,
|
|
@@ -133,21 +155,22 @@ def from_response(res: requests.Response):
|
|
|
133
155
|
|
|
134
156
|
log.debug("response: url='%s', code=%s, headers=%s, body='%s'", res.request.url, res.status_code, res.headers, res.text)
|
|
135
157
|
# try to parse S3 XML response for the error details:
|
|
136
|
-
|
|
137
|
-
|
|
158
|
+
code_str = None
|
|
159
|
+
message_str = None
|
|
138
160
|
if res.text:
|
|
139
161
|
try:
|
|
140
162
|
root = xml.etree.ElementTree.fromstring(res.text)
|
|
141
163
|
code = root.find('Code')
|
|
142
|
-
|
|
164
|
+
code_str = code.text if code is not None else None
|
|
143
165
|
message = root.find('Message')
|
|
144
|
-
|
|
166
|
+
message_str = message.text if message is not None else None
|
|
145
167
|
except xml.etree.ElementTree.ParseError:
|
|
146
168
|
log.debug("invalid XML: %r", res.text)
|
|
147
169
|
|
|
148
170
|
kwargs = dict(
|
|
149
|
-
code=
|
|
150
|
-
message=
|
|
171
|
+
code=code_str,
|
|
172
|
+
message=message_str,
|
|
173
|
+
method=res.request.method,
|
|
151
174
|
url=res.request.url,
|
|
152
175
|
status=res.status_code,
|
|
153
176
|
headers=res.headers,
|