vastdb 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vastdb/__init__.py +6 -2
- vastdb/bench/test_perf.py +3 -3
- vastdb/bucket.py +29 -15
- vastdb/errors.py +40 -7
- vastdb/internal_commands.py +194 -233
- vastdb/schema.py +11 -6
- vastdb/session.py +16 -1
- vastdb/table.py +181 -77
- vastdb/tests/test_duckdb.py +61 -0
- vastdb/tests/test_imports.py +13 -1
- vastdb/tests/test_projections.py +1 -0
- vastdb/tests/test_sanity.py +2 -2
- vastdb/tests/test_schemas.py +3 -3
- vastdb/tests/test_tables.py +60 -50
- vastdb/tests/test_util.py +39 -0
- vastdb/tests/util.py +1 -4
- vastdb/transaction.py +32 -6
- vastdb/util.py +42 -6
- {vastdb-0.1.1.dist-info → vastdb-0.1.3.dist-info}/METADATA +2 -5
- {vastdb-0.1.1.dist-info → vastdb-0.1.3.dist-info}/RECORD +23 -21
- {vastdb-0.1.1.dist-info → vastdb-0.1.3.dist-info}/WHEEL +1 -1
- {vastdb-0.1.1.dist-info → vastdb-0.1.3.dist-info}/LICENSE +0 -0
- {vastdb-0.1.1.dist-info → vastdb-0.1.3.dist-info}/top_level.txt +0 -0
vastdb/__init__.py
CHANGED
|
@@ -1,7 +1,11 @@
|
|
|
1
1
|
"""VAST Database Python SDK."""
|
|
2
2
|
|
|
3
|
+
import functools
|
|
4
|
+
|
|
3
5
|
from . import session
|
|
4
6
|
|
|
7
|
+
|
|
5
8
|
# A helper function, useful as a short-hand for Session c-tor: `session = vastdb.connect(...)`
|
|
6
|
-
|
|
7
|
-
connect
|
|
9
|
+
@functools.wraps(session.Session)
|
|
10
|
+
def connect(*args, **kwargs): # noqa: D103
|
|
11
|
+
return session.Session(*args, **kwargs)
|
vastdb/bench/test_perf.py
CHANGED
|
@@ -12,7 +12,7 @@ log = logging.getLogger(__name__)
|
|
|
12
12
|
|
|
13
13
|
@pytest.mark.benchmark
|
|
14
14
|
def test_bench(session, clean_bucket_name, parquets_path, crater_path):
|
|
15
|
-
files = [str(parquets_path/f) for f in (parquets_path.glob('**/*.pq'))]
|
|
15
|
+
files = [str(parquets_path / f) for f in (parquets_path.glob('**/*.pq'))]
|
|
16
16
|
|
|
17
17
|
with session.transaction() as tx:
|
|
18
18
|
b = tx.bucket(clean_bucket_name)
|
|
@@ -22,8 +22,8 @@ def test_bench(session, clean_bucket_name, parquets_path, crater_path):
|
|
|
22
22
|
s = time.time()
|
|
23
23
|
pa_table = pa.Table.from_batches(t.select(columns=['sid'], predicate=t['sid'] == 10033007, config=config))
|
|
24
24
|
e = time.time()
|
|
25
|
-
log.info("'SELECT sid from TABLE WHERE sid = 10033007' returned in %s seconds.", e-s)
|
|
25
|
+
log.info("'SELECT sid from TABLE WHERE sid = 10033007' returned in %s seconds.", e - s)
|
|
26
26
|
if crater_path:
|
|
27
27
|
with open(f'{crater_path}/bench_results', 'a') as f:
|
|
28
|
-
f.write(f"'SELECT sid FROM TABLE WHERE sid = 10033007' returned in {e-s} seconds")
|
|
28
|
+
f.write(f"'SELECT sid FROM TABLE WHERE sid = 10033007' returned in {e - s} seconds")
|
|
29
29
|
assert pa_table.num_rows == 255_075
|
vastdb/bucket.py
CHANGED
|
@@ -6,18 +6,14 @@ It is possible to list and access VAST snapshots generated over a bucket.
|
|
|
6
6
|
|
|
7
7
|
import logging
|
|
8
8
|
from dataclasses import dataclass
|
|
9
|
+
from typing import TYPE_CHECKING, List, Optional
|
|
9
10
|
|
|
10
11
|
from . import errors, schema, transaction
|
|
11
12
|
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
@dataclass
|
|
16
|
-
class Snapshot:
|
|
17
|
-
"""VAST bucket-level snapshot."""
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
from .schema import Schema
|
|
18
15
|
|
|
19
|
-
|
|
20
|
-
bucket: "Bucket"
|
|
16
|
+
log = logging.getLogger(__name__)
|
|
21
17
|
|
|
22
18
|
|
|
23
19
|
@dataclass
|
|
@@ -27,7 +23,7 @@ class Bucket:
|
|
|
27
23
|
name: str
|
|
28
24
|
tx: "transaction.Transaction"
|
|
29
25
|
|
|
30
|
-
def create_schema(self, path: str, fail_if_exists=True) -> "
|
|
26
|
+
def create_schema(self, path: str, fail_if_exists=True) -> "Schema":
|
|
31
27
|
"""Create a new schema (a container of tables) under this bucket."""
|
|
32
28
|
if current := self.schema(path, fail_if_missing=False):
|
|
33
29
|
if fail_if_exists:
|
|
@@ -36,9 +32,9 @@ class Bucket:
|
|
|
36
32
|
return current
|
|
37
33
|
self.tx._rpc.api.create_schema(self.name, path, txid=self.tx.txid)
|
|
38
34
|
log.info("Created schema: %s", path)
|
|
39
|
-
return self.schema(path)
|
|
35
|
+
return self.schema(path) # type: ignore[return-value]
|
|
40
36
|
|
|
41
|
-
def schema(self, path: str, fail_if_missing=True) -> "
|
|
37
|
+
def schema(self, path: str, fail_if_missing=True) -> Optional["Schema"]:
|
|
42
38
|
"""Get a specific schema (a container of tables) under this bucket."""
|
|
43
39
|
s = self.schemas(path)
|
|
44
40
|
log.debug("schema: %s", s)
|
|
@@ -51,14 +47,14 @@ class Bucket:
|
|
|
51
47
|
log.debug("Found schema: %s", s[0].name)
|
|
52
48
|
return s[0]
|
|
53
49
|
|
|
54
|
-
def schemas(self, name: str = None) -> ["
|
|
50
|
+
def schemas(self, name: Optional[str] = None) -> List["Schema"]:
|
|
55
51
|
"""List bucket's schemas."""
|
|
56
52
|
schemas = []
|
|
57
53
|
next_key = 0
|
|
58
54
|
exact_match = bool(name)
|
|
59
55
|
log.debug("list schemas param: schema=%s, exact_match=%s", name, exact_match)
|
|
60
56
|
while True:
|
|
61
|
-
|
|
57
|
+
_bucket_name, curr_schemas, next_key, is_truncated, _ = \
|
|
62
58
|
self.tx._rpc.api.list_schemas(bucket=self.name, next_key=next_key, txid=self.tx.txid,
|
|
63
59
|
name_prefix=name, exact_match=exact_match)
|
|
64
60
|
if not curr_schemas:
|
|
@@ -69,7 +65,22 @@ class Bucket:
|
|
|
69
65
|
|
|
70
66
|
return [schema.Schema(name=name, bucket=self) for name, *_ in schemas]
|
|
71
67
|
|
|
72
|
-
def
|
|
68
|
+
def snapshot(self, name, fail_if_missing=True) -> Optional["Bucket"]:
|
|
69
|
+
"""Get snapshot by name (if exists)."""
|
|
70
|
+
snapshots, _is_truncated, _next_key = \
|
|
71
|
+
self.tx._rpc.api.list_snapshots(bucket=self.name, name_prefix=name, max_keys=1)
|
|
72
|
+
|
|
73
|
+
expected_name = f".snapshot/{name}"
|
|
74
|
+
exists = snapshots and snapshots[0] == expected_name + "/"
|
|
75
|
+
if not exists:
|
|
76
|
+
if fail_if_missing:
|
|
77
|
+
raise errors.MissingSnapshot(self.name, expected_name)
|
|
78
|
+
else:
|
|
79
|
+
return None
|
|
80
|
+
|
|
81
|
+
return Bucket(name=f'{self.name}/{expected_name}', tx=self.tx)
|
|
82
|
+
|
|
83
|
+
def snapshots(self) -> List["Bucket"]:
|
|
73
84
|
"""List bucket's snapshots."""
|
|
74
85
|
snapshots = []
|
|
75
86
|
next_key = 0
|
|
@@ -82,4 +93,7 @@ class Bucket:
|
|
|
82
93
|
if not is_truncated:
|
|
83
94
|
break
|
|
84
95
|
|
|
85
|
-
return [
|
|
96
|
+
return [
|
|
97
|
+
Bucket(name=f'{self.name}/{snapshot.strip("/")}', tx=self.tx)
|
|
98
|
+
for snapshot in snapshots
|
|
99
|
+
]
|
vastdb/errors.py
CHANGED
|
@@ -26,6 +26,7 @@ log = logging.getLogger(__name__)
|
|
|
26
26
|
class HttpError(Exception):
|
|
27
27
|
code: str
|
|
28
28
|
message: str
|
|
29
|
+
method: str
|
|
29
30
|
url: str
|
|
30
31
|
status: int # HTTP status
|
|
31
32
|
headers: requests.structures.CaseInsensitiveDict # HTTP response headers
|
|
@@ -84,15 +85,33 @@ class InvalidArgument(Exception):
|
|
|
84
85
|
pass
|
|
85
86
|
|
|
86
87
|
|
|
88
|
+
class TooWideRow(InvalidArgument):
|
|
89
|
+
pass
|
|
90
|
+
|
|
91
|
+
|
|
87
92
|
class Missing(Exception):
|
|
88
93
|
pass
|
|
89
94
|
|
|
90
95
|
|
|
96
|
+
class MissingTransaction(Missing):
|
|
97
|
+
pass
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
class NotSupported(Exception):
|
|
101
|
+
pass
|
|
102
|
+
|
|
103
|
+
|
|
91
104
|
@dataclass
|
|
92
105
|
class MissingBucket(Missing):
|
|
93
106
|
bucket: str
|
|
94
107
|
|
|
95
108
|
|
|
109
|
+
@dataclass
|
|
110
|
+
class MissingSnapshot(Missing):
|
|
111
|
+
bucket: str
|
|
112
|
+
snapshot: str
|
|
113
|
+
|
|
114
|
+
|
|
96
115
|
@dataclass
|
|
97
116
|
class MissingSchema(Missing):
|
|
98
117
|
bucket: str
|
|
@@ -131,6 +150,19 @@ class TableExists(Exists):
|
|
|
131
150
|
table: str
|
|
132
151
|
|
|
133
152
|
|
|
153
|
+
@dataclass
|
|
154
|
+
class NotSupportedCommand(NotSupported):
|
|
155
|
+
bucket: str
|
|
156
|
+
schema: str
|
|
157
|
+
table: str
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
@dataclass
|
|
161
|
+
class NotSupportedVersion(NotSupported):
|
|
162
|
+
err_msg: str
|
|
163
|
+
version: str
|
|
164
|
+
|
|
165
|
+
|
|
134
166
|
ERROR_TYPES_MAP = {
|
|
135
167
|
HttpStatus.BAD_REQUEST: BadRequest,
|
|
136
168
|
HttpStatus.FOBIDDEN: Forbidden,
|
|
@@ -150,21 +182,22 @@ def from_response(res: requests.Response):
|
|
|
150
182
|
|
|
151
183
|
log.debug("response: url='%s', code=%s, headers=%s, body='%s'", res.request.url, res.status_code, res.headers, res.text)
|
|
152
184
|
# try to parse S3 XML response for the error details:
|
|
153
|
-
|
|
154
|
-
|
|
185
|
+
code_str = None
|
|
186
|
+
message_str = None
|
|
155
187
|
if res.text:
|
|
156
188
|
try:
|
|
157
189
|
root = xml.etree.ElementTree.fromstring(res.text)
|
|
158
190
|
code = root.find('Code')
|
|
159
|
-
|
|
191
|
+
code_str = code.text if code is not None else None
|
|
160
192
|
message = root.find('Message')
|
|
161
|
-
|
|
193
|
+
message_str = message.text if message is not None else None
|
|
162
194
|
except xml.etree.ElementTree.ParseError:
|
|
163
195
|
log.debug("invalid XML: %r", res.text)
|
|
164
196
|
|
|
165
197
|
kwargs = dict(
|
|
166
|
-
code=
|
|
167
|
-
message=
|
|
198
|
+
code=code_str,
|
|
199
|
+
message=message_str,
|
|
200
|
+
method=res.request.method,
|
|
168
201
|
url=res.request.url,
|
|
169
202
|
status=res.status_code,
|
|
170
203
|
headers=res.headers,
|
|
@@ -172,4 +205,4 @@ def from_response(res: requests.Response):
|
|
|
172
205
|
log.warning("RPC failed: %s", kwargs)
|
|
173
206
|
status = HttpStatus(res.status_code)
|
|
174
207
|
error_type = ERROR_TYPES_MAP.get(status, UnexpectedError)
|
|
175
|
-
|
|
208
|
+
return error_type(**kwargs)
|