vastdb 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
vastdb/__init__.py CHANGED
@@ -1,7 +1,11 @@
1
1
  """VAST Database Python SDK."""
2
2
 
3
+ import functools
4
+
3
5
  from . import session
4
6
 
7
+
5
8
  # A helper function, useful as a short-hand for Session c-tor: `session = vastdb.connect(...)`
6
- connect = session.Session
7
- connect.__name__ = 'connect'
9
+ @functools.wraps(session.Session)
10
+ def connect(*args, **kwargs): # noqa: D103
11
+ return session.Session(*args, **kwargs)
File without changes
@@ -0,0 +1,29 @@
1
+ import logging
2
+ import time
3
+
4
+ import pyarrow as pa
5
+ import pytest
6
+
7
+ from vastdb import util
8
+ from vastdb.table import ImportConfig, QueryConfig
9
+
10
+ log = logging.getLogger(__name__)
11
+
12
+
13
+ @pytest.mark.benchmark
14
+ def test_bench(session, clean_bucket_name, parquets_path, crater_path):
15
+ files = [str(parquets_path / f) for f in (parquets_path.glob('**/*.pq'))]
16
+
17
+ with session.transaction() as tx:
18
+ b = tx.bucket(clean_bucket_name)
19
+ s = b.create_schema('s1')
20
+ t = util.create_table_from_files(s, 't1', files, config=ImportConfig(import_concurrency=8))
21
+ config = QueryConfig(num_splits=8, num_sub_splits=4)
22
+ s = time.time()
23
+ pa_table = pa.Table.from_batches(t.select(columns=['sid'], predicate=t['sid'] == 10033007, config=config))
24
+ e = time.time()
25
+ log.info("'SELECT sid from TABLE WHERE sid = 10033007' returned in %s seconds.", e - s)
26
+ if crater_path:
27
+ with open(f'{crater_path}/bench_results', 'a') as f:
28
+ f.write(f"'SELECT sid FROM TABLE WHERE sid = 10033007' returned in {e - s} seconds")
29
+ assert pa_table.num_rows == 255_075
vastdb/bucket.py CHANGED
@@ -4,10 +4,14 @@ VAST S3 buckets can be used to create Database schemas and tables.
4
4
  It is possible to list and access VAST snapshots generated over a bucket.
5
5
  """
6
6
 
7
+ import logging
8
+ from dataclasses import dataclass
9
+ from typing import TYPE_CHECKING, List, Optional
10
+
7
11
  from . import errors, schema, transaction
8
12
 
9
- from dataclasses import dataclass
10
- import logging
13
+ if TYPE_CHECKING:
14
+ from .schema import Schema
11
15
 
12
16
  log = logging.getLogger(__name__)
13
17
 
@@ -27,30 +31,38 @@ class Bucket:
27
31
  name: str
28
32
  tx: "transaction.Transaction"
29
33
 
30
- def create_schema(self, path: str) -> "schema.Schema":
34
+ def create_schema(self, path: str, fail_if_exists=True) -> "Schema":
31
35
  """Create a new schema (a container of tables) under this bucket."""
36
+ if current := self.schema(path, fail_if_missing=False):
37
+ if fail_if_exists:
38
+ raise errors.SchemaExists(self.name, path)
39
+ else:
40
+ return current
32
41
  self.tx._rpc.api.create_schema(self.name, path, txid=self.tx.txid)
33
42
  log.info("Created schema: %s", path)
34
- return self.schema(path)
43
+ return self.schema(path) # type: ignore[return-value]
35
44
 
36
- def schema(self, path: str) -> "schema.Schema":
45
+ def schema(self, path: str, fail_if_missing=True) -> Optional["Schema"]:
37
46
  """Get a specific schema (a container of tables) under this bucket."""
38
47
  s = self.schemas(path)
39
48
  log.debug("schema: %s", s)
40
49
  if not s:
41
- raise errors.MissingSchema(self.name, path)
50
+ if fail_if_missing:
51
+ raise errors.MissingSchema(self.name, path)
52
+ else:
53
+ return None
42
54
  assert len(s) == 1, f"Expected to receive only a single schema, but got: {len(s)}. ({s})"
43
55
  log.debug("Found schema: %s", s[0].name)
44
56
  return s[0]
45
57
 
46
- def schemas(self, name: str = None) -> ["schema.Schema"]:
58
+ def schemas(self, name: Optional[str] = None) -> List["Schema"]:
47
59
  """List bucket's schemas."""
48
60
  schemas = []
49
61
  next_key = 0
50
62
  exact_match = bool(name)
51
63
  log.debug("list schemas param: schema=%s, exact_match=%s", name, exact_match)
52
64
  while True:
53
- bucket_name, curr_schemas, next_key, is_truncated, _ = \
65
+ _bucket_name, curr_schemas, next_key, is_truncated, _ = \
54
66
  self.tx._rpc.api.list_schemas(bucket=self.name, next_key=next_key, txid=self.tx.txid,
55
67
  name_prefix=name, exact_match=exact_match)
56
68
  if not curr_schemas:
@@ -61,7 +73,7 @@ class Bucket:
61
73
 
62
74
  return [schema.Schema(name=name, bucket=self) for name, *_ in schemas]
63
75
 
64
- def snapshots(self) -> [Snapshot]:
76
+ def snapshots(self) -> List[Snapshot]:
65
77
  """List bucket's snapshots."""
66
78
  snapshots = []
67
79
  next_key = 0
@@ -1,15 +1,19 @@
1
- import vastdb
1
+ import os
2
+ from pathlib import Path
2
3
 
3
- import pytest
4
4
  import boto3
5
- import os
5
+ import pytest
6
+
7
+ import vastdb
6
8
 
7
9
 
8
10
  def pytest_addoption(parser):
9
- parser.addoption("--tabular-bucket-name", help="Name of the S3 bucket with Tabular enabled", default = "vastdb")
10
- parser.addoption("--tabular-access-key", help="Access key with Tabular permissions (AWS_ACCESS_KEY_ID)", default = os.environ.get("AWS_ACCESS_KEY_ID", None))
11
- parser.addoption("--tabular-secret-key", help="Secret key with Tabular permissions (AWS_SECRET_ACCESS_KEY)" , default = os.environ.get("AWS_SECRET_ACCESS_KEY", None))
12
- parser.addoption("--tabular-endpoint-url", help="Tabular server endpoint", default = "http://localhost:9090")
11
+ parser.addoption("--tabular-bucket-name", help="Name of the S3 bucket with Tabular enabled", default="vastdb")
12
+ parser.addoption("--tabular-access-key", help="Access key with Tabular permissions (AWS_ACCESS_KEY_ID)", default=os.environ.get("AWS_ACCESS_KEY_ID", None))
13
+ parser.addoption("--tabular-secret-key", help="Secret key with Tabular permissions (AWS_SECRET_ACCESS_KEY)", default=os.environ.get("AWS_SECRET_ACCESS_KEY", None))
14
+ parser.addoption("--tabular-endpoint-url", help="Tabular server endpoint", default="http://localhost:9090")
15
+ parser.addoption("--data-path", help="Data files location", default=None)
16
+ parser.addoption("--crater-path", help="Save benchmark results in a dedicated location", default=None)
13
17
 
14
18
 
15
19
  @pytest.fixture(scope="session")
@@ -44,3 +48,13 @@ def s3(request):
44
48
  aws_access_key_id=request.config.getoption("--tabular-access-key"),
45
49
  aws_secret_access_key=request.config.getoption("--tabular-secret-key"),
46
50
  endpoint_url=request.config.getoption("--tabular-endpoint-url"))
51
+
52
+
53
+ @pytest.fixture(scope="function")
54
+ def parquets_path(request):
55
+ return Path(request.config.getoption("--data-path"))
56
+
57
+
58
+ @pytest.fixture(scope="function")
59
+ def crater_path(request):
60
+ return request.config.getoption("--crater-path")
vastdb/errors.py CHANGED
@@ -1,9 +1,9 @@
1
1
  import logging
2
- import requests
3
2
  import xml.etree.ElementTree
4
-
5
- from enum import Enum
6
3
  from dataclasses import dataclass
4
+ from enum import Enum
5
+
6
+ import requests
7
7
 
8
8
 
9
9
  class HttpStatus(Enum):
@@ -26,6 +26,7 @@ log = logging.getLogger(__name__)
26
26
  class HttpError(Exception):
27
27
  code: str
28
28
  message: str
29
+ method: str
29
30
  url: str
30
31
  status: int # HTTP status
31
32
  headers: requests.structures.CaseInsensitiveDict # HTTP response headers
@@ -88,6 +89,10 @@ class Missing(Exception):
88
89
  pass
89
90
 
90
91
 
92
+ class MissingTransaction(Missing):
93
+ pass
94
+
95
+
91
96
  @dataclass
92
97
  class MissingBucket(Missing):
93
98
  bucket: str
@@ -114,6 +119,23 @@ class MissingProjection(Missing):
114
119
  projection: str
115
120
 
116
121
 
122
+ class Exists(Exception):
123
+ pass
124
+
125
+
126
+ @dataclass
127
+ class SchemaExists(Exists):
128
+ bucket: str
129
+ schema: str
130
+
131
+
132
+ @dataclass
133
+ class TableExists(Exists):
134
+ bucket: str
135
+ schema: str
136
+ table: str
137
+
138
+
117
139
  ERROR_TYPES_MAP = {
118
140
  HttpStatus.BAD_REQUEST: BadRequest,
119
141
  HttpStatus.FOBIDDEN: Forbidden,
@@ -133,21 +155,22 @@ def from_response(res: requests.Response):
133
155
 
134
156
  log.debug("response: url='%s', code=%s, headers=%s, body='%s'", res.request.url, res.status_code, res.headers, res.text)
135
157
  # try to parse S3 XML response for the error details:
136
- code = None
137
- message = None
158
+ code_str = None
159
+ message_str = None
138
160
  if res.text:
139
161
  try:
140
162
  root = xml.etree.ElementTree.fromstring(res.text)
141
163
  code = root.find('Code')
142
- code = code.text if code is not None else None
164
+ code_str = code.text if code is not None else None
143
165
  message = root.find('Message')
144
- message = message.text if message is not None else None
166
+ message_str = message.text if message is not None else None
145
167
  except xml.etree.ElementTree.ParseError:
146
168
  log.debug("invalid XML: %r", res.text)
147
169
 
148
170
  kwargs = dict(
149
- code=code,
150
- message=message,
171
+ code=code_str,
172
+ message=message_str,
173
+ method=res.request.method,
151
174
  url=res.request.url,
152
175
  status=res.status_code,
153
176
  headers=res.headers,