vastdb 0.1.8__py3-none-any.whl → 0.1.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vastdb/_internal.py +1 -1
- vastdb/bench/test_sample.py +196 -0
- vastdb/conftest.py +28 -6
- vastdb/errors.py +6 -0
- vastdb/schema.py +2 -1
- vastdb/table.py +10 -2
- vastdb/tests/metrics.py +42 -0
- vastdb/tests/test_tables.py +40 -0
- vastdb/util.py +13 -1
- vastdb/vast_tests/test_ha.py +1 -1
- {vastdb-0.1.8.dist-info → vastdb-0.1.10.dist-info}/METADATA +1 -1
- {vastdb-0.1.8.dist-info → vastdb-0.1.10.dist-info}/RECORD +15 -13
- {vastdb-0.1.8.dist-info → vastdb-0.1.10.dist-info}/WHEEL +1 -1
- {vastdb-0.1.8.dist-info → vastdb-0.1.10.dist-info}/LICENSE +0 -0
- {vastdb-0.1.8.dist-info → vastdb-0.1.10.dist-info}/top_level.txt +0 -0
vastdb/_internal.py
CHANGED
|
@@ -1807,7 +1807,7 @@ def _iter_query_data_response_columns(fileobj, stream_ids=None):
|
|
|
1807
1807
|
batches.append(batch)
|
|
1808
1808
|
except StopIteration: # we got an end-of-stream IPC message for a given stream ID
|
|
1809
1809
|
reader, batches = readers.pop(stream_id) # end of column
|
|
1810
|
-
table = pa.Table.from_batches(batches) # concatenate all column chunks (as a single)
|
|
1810
|
+
table = pa.Table.from_batches(batches=batches, schema=reader.schema) # concatenate all column chunks (as a single)
|
|
1811
1811
|
_logger.debug("stream_id=%d rows=%d column=%s", stream_id, len(table), table)
|
|
1812
1812
|
yield (stream_id, next_row_id, table)
|
|
1813
1813
|
|
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
import functools
|
|
4
|
+
import itertools
|
|
5
|
+
import logging
|
|
6
|
+
import os
|
|
7
|
+
import random
|
|
8
|
+
import threading
|
|
9
|
+
import time
|
|
10
|
+
from concurrent.futures import ProcessPoolExecutor, as_completed
|
|
11
|
+
|
|
12
|
+
import numpy as np
|
|
13
|
+
import pyarrow as pa
|
|
14
|
+
|
|
15
|
+
import vastdb.errors
|
|
16
|
+
from vastdb.table import INTERNAL_ROW_ID
|
|
17
|
+
from vastdb.tests import metrics
|
|
18
|
+
|
|
19
|
+
logging.basicConfig(
|
|
20
|
+
level="INFO",
|
|
21
|
+
format="%(asctime)s %(levelname)-10s %(process)d/%(thread)d %(filename)s:%(lineno)d %(message)s")
|
|
22
|
+
|
|
23
|
+
log = logging.getLogger()
|
|
24
|
+
|
|
25
|
+
log.info("Python SDK version: %s", vastdb.util.version())
|
|
26
|
+
|
|
27
|
+
NUM_COLUMNS = 10_000
|
|
28
|
+
COLUMNS_BATCH = 10
|
|
29
|
+
|
|
30
|
+
NUM_ROW_GROUPS = 100
|
|
31
|
+
ROW_GROUP_SIZE = 100_000
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
INTERNAL_ROWID_FIELD = pa.field(INTERNAL_ROW_ID, pa.uint64()) # used for UPDATE
|
|
35
|
+
EXTERNAL_ROWID_FIELD = pa.field("vastdb_rowid", pa.int64()) # used for INSERT & SELECT
|
|
36
|
+
|
|
37
|
+
SCHEMA = "perf"
|
|
38
|
+
TABLE = "sample"
|
|
39
|
+
|
|
40
|
+
SCHEMA_ARROW = pa.schema(
|
|
41
|
+
[pa.field(f'c{i}', pa.float32()) for i in range(NUM_COLUMNS)]
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def load_batch(bucket, session_kwargs, offset, limit):
|
|
46
|
+
log.info('loading into [%d..%d)', offset, limit)
|
|
47
|
+
|
|
48
|
+
# Iterate over all row-groups in this file
|
|
49
|
+
rowids_range = range(offset, limit)
|
|
50
|
+
rowids = pa.array(rowids_range, INTERNAL_ROWID_FIELD.type)
|
|
51
|
+
|
|
52
|
+
session = vastdb.connect(**session_kwargs)
|
|
53
|
+
metrics_rows = []
|
|
54
|
+
|
|
55
|
+
with session.transaction() as tx:
|
|
56
|
+
table = tx.bucket(bucket).schema(SCHEMA).table(TABLE)
|
|
57
|
+
|
|
58
|
+
col = table[EXTERNAL_ROWID_FIELD.name]
|
|
59
|
+
pred = (col >= rowids_range[0]) & (col <= rowids_range[-1])
|
|
60
|
+
count = sum(len(rb) for rb in table.select(columns=[], predicate=pred))
|
|
61
|
+
log.info("%d rows exist at %s", count, rowids_range)
|
|
62
|
+
if count == len(rowids_range):
|
|
63
|
+
# skip already loaded rows
|
|
64
|
+
log.info('skipping [%d..%d)', offset, limit)
|
|
65
|
+
|
|
66
|
+
total_nbytes = 0
|
|
67
|
+
calls = 0
|
|
68
|
+
t0 = time.time()
|
|
69
|
+
# Insert/update every chunk of columns in this rowgroup
|
|
70
|
+
for j in range(0, len(SCHEMA_ARROW), COLUMNS_BATCH):
|
|
71
|
+
cols_batch = list(SCHEMA_ARROW)[j:j + COLUMNS_BATCH]
|
|
72
|
+
arrays = [
|
|
73
|
+
pa.array(np.float32(np.random.uniform(size=[ROW_GROUP_SIZE])))
|
|
74
|
+
for _ in cols_batch
|
|
75
|
+
]
|
|
76
|
+
chunk = pa.table(data=arrays, schema=pa.schema(cols_batch))
|
|
77
|
+
nbytes = chunk.get_total_buffer_size()
|
|
78
|
+
start = time.perf_counter()
|
|
79
|
+
if j == 0:
|
|
80
|
+
chunk = chunk.add_column(0, EXTERNAL_ROWID_FIELD, rowids.cast(EXTERNAL_ROWID_FIELD.type))
|
|
81
|
+
op = 'insert'
|
|
82
|
+
table.insert(chunk)
|
|
83
|
+
else:
|
|
84
|
+
chunk = chunk.add_column(0, INTERNAL_ROWID_FIELD, rowids)
|
|
85
|
+
op = 'update'
|
|
86
|
+
table.update(chunk)
|
|
87
|
+
finish = time.perf_counter()
|
|
88
|
+
|
|
89
|
+
metrics_rows.append(metrics.Row(
|
|
90
|
+
start=start, finish=finish, table_path=table.path, op=op,
|
|
91
|
+
nbytes=nbytes, rows=len(chunk), cols=len(cols_batch),
|
|
92
|
+
pid=os.getpid(), tid=threading.get_native_id()))
|
|
93
|
+
|
|
94
|
+
total_nbytes += nbytes
|
|
95
|
+
calls += 1
|
|
96
|
+
log.debug("%s into %s: %d rows x %d cols, %.3f MB",
|
|
97
|
+
op, rowids_range, len(chunk), len(chunk.schema),
|
|
98
|
+
chunk.get_total_buffer_size() / 1e6)
|
|
99
|
+
|
|
100
|
+
dt = time.time() - t0
|
|
101
|
+
|
|
102
|
+
log.info('loaded into [%d..%d): %d rows x %d cols, %.3f MB, %d RPCs, %.3f seconds',
|
|
103
|
+
offset, limit, limit - offset, NUM_COLUMNS, total_nbytes / 1e6, calls, dt)
|
|
104
|
+
return metrics_rows
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def test_ingest(clean_bucket_name, session_kwargs, tabular_endpoint_urls, num_workers, perf_metrics_db):
|
|
108
|
+
session = vastdb.connect(**session_kwargs)
|
|
109
|
+
metrics_table = metrics.Table(perf_metrics_db, "ingest")
|
|
110
|
+
|
|
111
|
+
with session.transaction() as tx:
|
|
112
|
+
b = tx.bucket(clean_bucket_name)
|
|
113
|
+
try:
|
|
114
|
+
s = b.schema(SCHEMA)
|
|
115
|
+
except vastdb.errors.MissingSchema:
|
|
116
|
+
s = b.create_schema(SCHEMA)
|
|
117
|
+
|
|
118
|
+
try:
|
|
119
|
+
s.table(TABLE)
|
|
120
|
+
except vastdb.errors.MissingTable:
|
|
121
|
+
s.create_table(TABLE, pa.schema([EXTERNAL_ROWID_FIELD] + list(SCHEMA_ARROW)))
|
|
122
|
+
|
|
123
|
+
ranges = [
|
|
124
|
+
(i * ROW_GROUP_SIZE, (i + 1) * ROW_GROUP_SIZE)
|
|
125
|
+
for i in range(NUM_ROW_GROUPS)
|
|
126
|
+
]
|
|
127
|
+
|
|
128
|
+
with ProcessPoolExecutor(max_workers=num_workers) as executor:
|
|
129
|
+
futures = [
|
|
130
|
+
executor.submit(load_batch, clean_bucket_name, session_kwargs | {'endpoint': url}, offset, limit)
|
|
131
|
+
for (offset, limit), url in zip(ranges, itertools.cycle(tabular_endpoint_urls))
|
|
132
|
+
]
|
|
133
|
+
log.info("spawned %d futures", len(futures))
|
|
134
|
+
for future in as_completed(futures):
|
|
135
|
+
metrics_table.insert(future.result())
|
|
136
|
+
|
|
137
|
+
with session.transaction() as tx:
|
|
138
|
+
t = tx.bucket(clean_bucket_name).schema(SCHEMA).table(TABLE)
|
|
139
|
+
count = sum(len(rb) for rb in t.select([]))
|
|
140
|
+
log.info("%s has %d rows: %s", t, count, t.stats)
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def run_query(session_kwargs, i, bucket_name, endpoint_url):
|
|
144
|
+
num_columns = 2000
|
|
145
|
+
row_groups_per_query = 10
|
|
146
|
+
|
|
147
|
+
config = vastdb.table.QueryConfig(
|
|
148
|
+
num_sub_splits=1,
|
|
149
|
+
num_splits=1,
|
|
150
|
+
limit_rows_per_sub_split=ROW_GROUP_SIZE,
|
|
151
|
+
num_row_groups_per_sub_split=1)
|
|
152
|
+
|
|
153
|
+
row_group_indices = list(range(NUM_ROW_GROUPS))
|
|
154
|
+
r = random.Random(i)
|
|
155
|
+
r.shuffle(row_group_indices)
|
|
156
|
+
|
|
157
|
+
session = vastdb.connect(**(session_kwargs | {"endpoint": endpoint_url}))
|
|
158
|
+
with session.transaction() as tx:
|
|
159
|
+
t = tx.bucket(bucket_name).schema(SCHEMA).table(TABLE)
|
|
160
|
+
|
|
161
|
+
fields = list(t.arrow_schema)[1:]
|
|
162
|
+
r.shuffle(fields)
|
|
163
|
+
cols = [f.name for f in fields[:num_columns]]
|
|
164
|
+
|
|
165
|
+
vastdb_rowid = t['vastdb_rowid']
|
|
166
|
+
preds = []
|
|
167
|
+
for offset in range(0, len(row_group_indices), row_groups_per_query):
|
|
168
|
+
rowid_ranges = (
|
|
169
|
+
vastdb_rowid.between(j * ROW_GROUP_SIZE, (j + 1) * ROW_GROUP_SIZE - 1)
|
|
170
|
+
for j in row_group_indices[offset:offset + row_groups_per_query]
|
|
171
|
+
)
|
|
172
|
+
pred = functools.reduce((lambda x, y: x | y), rowid_ranges)
|
|
173
|
+
preds.append(pred)
|
|
174
|
+
|
|
175
|
+
for j, pred in enumerate(preds):
|
|
176
|
+
log.info("%d) starting query #%d on %s", i, j, endpoint_url)
|
|
177
|
+
t0 = time.time()
|
|
178
|
+
res = t.select(columns=cols, predicate=pred, config=config)
|
|
179
|
+
rows = 0
|
|
180
|
+
data = 0
|
|
181
|
+
for rb in res:
|
|
182
|
+
rows += len(rb)
|
|
183
|
+
data += rb.nbytes
|
|
184
|
+
dt = time.time() - t0
|
|
185
|
+
log.info("%d) got query #%d batch %.3f[s], %.3f[GB] %.3f[MB/s], %.3f[Mrows]", i, j, dt, data / 1e9, data / 1e6 / dt, rows / 1e6)
|
|
186
|
+
|
|
187
|
+
dt = time.time() - t0
|
|
188
|
+
log.info("%d) finished query #%d %.3f[s], %.3f[GB], %.3f[MB/s], %.3f[Mrows]", i, j, dt, data / 1e9, data / 1e6 / dt, rows / 1e6)
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def test_scan(test_bucket_name, session, num_workers, session_kwargs, tabular_endpoint_urls):
|
|
192
|
+
log.info("starting %d workers, endpoints=%s", num_workers, tabular_endpoint_urls)
|
|
193
|
+
with ProcessPoolExecutor(max_workers=num_workers) as executor:
|
|
194
|
+
for i, url in zip(range(num_workers), itertools.cycle(tabular_endpoint_urls)):
|
|
195
|
+
executor.submit(run_query, session_kwargs, i, test_bucket_name, url)
|
|
196
|
+
log.info("finished %d workers", num_workers)
|
vastdb/conftest.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import os
|
|
2
|
+
import sqlite3
|
|
2
3
|
from pathlib import Path
|
|
3
4
|
|
|
4
5
|
import boto3
|
|
@@ -13,27 +14,43 @@ def pytest_addoption(parser):
|
|
|
13
14
|
default=os.environ.get("AWS_ACCESS_KEY_ID", None))
|
|
14
15
|
parser.addoption("--tabular-secret-key", help="Secret key with Tabular permissions (AWS_SECRET_ACCESS_KEY)",
|
|
15
16
|
default=os.environ.get("AWS_SECRET_ACCESS_KEY", None))
|
|
16
|
-
parser.addoption("--tabular-endpoint-url", help="Tabular server endpoint", default="
|
|
17
|
+
parser.addoption("--tabular-endpoint-url", help="Tabular server endpoint", default=[], action="append")
|
|
17
18
|
parser.addoption("--data-path", help="Data files location", default=None)
|
|
18
19
|
parser.addoption("--crater-path", help="Save benchmark results in a dedicated location", default=None)
|
|
19
20
|
parser.addoption("--schema-name", help="Name of schema for the test to operate on", default=None)
|
|
20
21
|
parser.addoption("--table-name", help="Name of table for the test to operate on", default=None)
|
|
22
|
+
parser.addoption("--num-workers", help="Number of concurrent workers", default=1)
|
|
21
23
|
|
|
22
24
|
|
|
23
25
|
@pytest.fixture(scope="session")
|
|
24
|
-
def
|
|
25
|
-
return
|
|
26
|
+
def session_kwargs(request, tabular_endpoint_urls):
|
|
27
|
+
return dict(
|
|
26
28
|
access=request.config.getoption("--tabular-access-key"),
|
|
27
29
|
secret=request.config.getoption("--tabular-secret-key"),
|
|
28
|
-
endpoint=
|
|
30
|
+
endpoint=tabular_endpoint_urls[0],
|
|
29
31
|
)
|
|
30
32
|
|
|
31
33
|
|
|
34
|
+
@pytest.fixture(scope="session")
|
|
35
|
+
def session(session_kwargs):
|
|
36
|
+
return vastdb.connect(**session_kwargs)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@pytest.fixture(scope="session")
|
|
40
|
+
def num_workers(request):
|
|
41
|
+
return int(request.config.getoption("--num-workers"))
|
|
42
|
+
|
|
43
|
+
|
|
32
44
|
@pytest.fixture(scope="session")
|
|
33
45
|
def test_bucket_name(request):
|
|
34
46
|
return request.config.getoption("--tabular-bucket-name")
|
|
35
47
|
|
|
36
48
|
|
|
49
|
+
@pytest.fixture(scope="session")
|
|
50
|
+
def tabular_endpoint_urls(request):
|
|
51
|
+
return request.config.getoption("--tabular-endpoint-url") or ["http://localhost:9090"]
|
|
52
|
+
|
|
53
|
+
|
|
37
54
|
def iter_schemas(s):
|
|
38
55
|
"""Recusively scan all schemas."""
|
|
39
56
|
children = s.schemas()
|
|
@@ -55,12 +72,12 @@ def clean_bucket_name(request, test_bucket_name, session):
|
|
|
55
72
|
|
|
56
73
|
|
|
57
74
|
@pytest.fixture(scope="session")
|
|
58
|
-
def s3(request):
|
|
75
|
+
def s3(request, tabular_endpoint_urls):
|
|
59
76
|
return boto3.client(
|
|
60
77
|
's3',
|
|
61
78
|
aws_access_key_id=request.config.getoption("--tabular-access-key"),
|
|
62
79
|
aws_secret_access_key=request.config.getoption("--tabular-secret-key"),
|
|
63
|
-
endpoint_url=
|
|
80
|
+
endpoint_url=tabular_endpoint_urls[0])
|
|
64
81
|
|
|
65
82
|
|
|
66
83
|
@pytest.fixture(scope="function")
|
|
@@ -81,3 +98,8 @@ def schema_name(request):
|
|
|
81
98
|
@pytest.fixture(scope="function")
|
|
82
99
|
def table_name(request):
|
|
83
100
|
return request.config.getoption("--table-name")
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
@pytest.fixture(scope="function")
|
|
104
|
+
def perf_metrics_db(crater_path):
|
|
105
|
+
return sqlite3.connect(f"{crater_path}/metrics.sqlite")
|
vastdb/errors.py
CHANGED
|
@@ -3,6 +3,7 @@ import xml.etree.ElementTree
|
|
|
3
3
|
from dataclasses import dataclass
|
|
4
4
|
from enum import Enum
|
|
5
5
|
|
|
6
|
+
import pyarrow as pa
|
|
6
7
|
import requests
|
|
7
8
|
|
|
8
9
|
|
|
@@ -169,6 +170,11 @@ class NotSupportedCommand(NotSupported):
|
|
|
169
170
|
table: str
|
|
170
171
|
|
|
171
172
|
|
|
173
|
+
@dataclass
|
|
174
|
+
class NotSupportedType(NotSupported):
|
|
175
|
+
field: pa.Field
|
|
176
|
+
|
|
177
|
+
|
|
172
178
|
@dataclass
|
|
173
179
|
class NotSupportedVersion(NotSupported):
|
|
174
180
|
err_msg: str
|
vastdb/schema.py
CHANGED
|
@@ -10,7 +10,7 @@ from typing import TYPE_CHECKING, Iterable, List, Optional
|
|
|
10
10
|
|
|
11
11
|
import pyarrow as pa
|
|
12
12
|
|
|
13
|
-
from . import bucket, errors, schema, table
|
|
13
|
+
from . import bucket, errors, schema, table, util
|
|
14
14
|
|
|
15
15
|
if TYPE_CHECKING:
|
|
16
16
|
from .table import Table
|
|
@@ -86,6 +86,7 @@ class Schema:
|
|
|
86
86
|
if use_external_row_ids_allocation:
|
|
87
87
|
self.tx._rpc.features.check_external_row_ids_allocation()
|
|
88
88
|
|
|
89
|
+
util.check_supported_types(columns)
|
|
89
90
|
self.tx._rpc.api.create_table(self.bucket.name, self.name, table_name, columns, txid=self.tx.txid,
|
|
90
91
|
use_external_row_ids_allocation=use_external_row_ids_allocation)
|
|
91
92
|
log.info("Created table: %s", table_name)
|
vastdb/table.py
CHANGED
|
@@ -167,8 +167,13 @@ class Table:
|
|
|
167
167
|
"""Also, load columns' metadata."""
|
|
168
168
|
self.arrow_schema = self.columns()
|
|
169
169
|
|
|
170
|
-
|
|
171
|
-
self._ibis_table = ibis.table(ibis.Schema.from_pyarrow(self.arrow_schema),
|
|
170
|
+
self._table_path = f'{self.schema.bucket.name}/{self.schema.name}/{self.name}'
|
|
171
|
+
self._ibis_table = ibis.table(ibis.Schema.from_pyarrow(self.arrow_schema), self._table_path)
|
|
172
|
+
|
|
173
|
+
@property
|
|
174
|
+
def path(self):
|
|
175
|
+
"""Return table's path."""
|
|
176
|
+
return self._table_path
|
|
172
177
|
|
|
173
178
|
@property
|
|
174
179
|
def tx(self):
|
|
@@ -486,6 +491,7 @@ class Table:
|
|
|
486
491
|
raise errors.NotSupportedCommand(self.bucket.name, self.schema.name, self.name)
|
|
487
492
|
try:
|
|
488
493
|
row_ids = []
|
|
494
|
+
util.check_supported_types(rows.schema)
|
|
489
495
|
serialized_slices = util.iter_serialized_slices(rows, MAX_INSERT_ROWS_PER_PATCH)
|
|
490
496
|
for slice in serialized_slices:
|
|
491
497
|
res = self.tx._rpc.api.insert_rows(self.bucket.name, self.schema.name, self.name, record_batch=slice,
|
|
@@ -528,6 +534,7 @@ class Table:
|
|
|
528
534
|
|
|
529
535
|
update_rows_rb = util.sort_record_batch_if_needed(update_rows_rb, INTERNAL_ROW_ID)
|
|
530
536
|
|
|
537
|
+
util.check_supported_types(update_rows_rb.schema)
|
|
531
538
|
serialized_slices = util.iter_serialized_slices(update_rows_rb, MAX_ROWS_PER_BATCH)
|
|
532
539
|
for slice in serialized_slices:
|
|
533
540
|
self.tx._rpc.api.update_rows(self.bucket.name, self.schema.name, self.name, record_batch=slice,
|
|
@@ -572,6 +579,7 @@ class Table:
|
|
|
572
579
|
"""Add a new column."""
|
|
573
580
|
if self._imports_table:
|
|
574
581
|
raise errors.NotSupportedCommand(self.bucket.name, self.schema.name, self.name)
|
|
582
|
+
util.check_supported_types(new_column)
|
|
575
583
|
self.tx._rpc.api.add_columns(self.bucket.name, self.schema.name, self.name, new_column, txid=self.tx.txid)
|
|
576
584
|
log.info("Added column(s): %s", new_column)
|
|
577
585
|
self.arrow_schema = self.columns()
|
vastdb/tests/metrics.py
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
import dataclasses
|
|
2
|
+
import sqlite3
|
|
3
|
+
from typing import List
|
|
4
|
+
|
|
5
|
+
_MAP_SQLITE_TYPES = {
|
|
6
|
+
str: "TEXT",
|
|
7
|
+
float: "REAL",
|
|
8
|
+
int: "INTEGER",
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclasses.dataclass
|
|
13
|
+
class Row:
|
|
14
|
+
start: float
|
|
15
|
+
finish: float
|
|
16
|
+
table_path: str
|
|
17
|
+
op: str
|
|
18
|
+
nbytes: int
|
|
19
|
+
rows: int
|
|
20
|
+
cols: int
|
|
21
|
+
pid: int
|
|
22
|
+
tid: int
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class Table:
|
|
26
|
+
def __init__(self, conn: sqlite3.Connection, name: str):
|
|
27
|
+
self.fields = dataclasses.fields(Row)
|
|
28
|
+
self.conn = conn
|
|
29
|
+
self.name = name
|
|
30
|
+
columns = ", ".join(
|
|
31
|
+
f"{f.name} {_MAP_SQLITE_TYPES[f.type]}"
|
|
32
|
+
for f in self.fields
|
|
33
|
+
)
|
|
34
|
+
cmd = f"CREATE TABLE {self.name} ({columns})"
|
|
35
|
+
self.conn.execute(cmd).fetchall()
|
|
36
|
+
|
|
37
|
+
def insert(self, rows: List[Row]):
|
|
38
|
+
args = ", ".join(["?"] * len(self.fields))
|
|
39
|
+
cmd = f"INSERT INTO {self.name} VALUES ({args})"
|
|
40
|
+
data = [dataclasses.astuple(row) for row in rows]
|
|
41
|
+
self.conn.executemany(cmd, data).fetchall()
|
|
42
|
+
self.conn.commit()
|
vastdb/tests/test_tables.py
CHANGED
|
@@ -311,6 +311,46 @@ def test_types(session, clean_bucket_name):
|
|
|
311
311
|
assert select(t['ts9'] == ts_literal) == expected.filter(pc.field('ts9') == ts_literal)
|
|
312
312
|
|
|
313
313
|
|
|
314
|
+
TIMESTAMP_UNITS = ['s', 'ms', 'us', 'ns']
|
|
315
|
+
|
|
316
|
+
|
|
317
|
+
def test_unsupported_timezone(session, clean_bucket_name):
|
|
318
|
+
with session.transaction() as tx:
|
|
319
|
+
s = tx.bucket(clean_bucket_name).create_schema('s1')
|
|
320
|
+
for unit in TIMESTAMP_UNITS:
|
|
321
|
+
col_type = pa.timestamp(unit, 'UTC')
|
|
322
|
+
with pytest.raises(errors.NotSupportedType):
|
|
323
|
+
s.create_table('t1', pa.schema([('ts', col_type)]))
|
|
324
|
+
assert s.tables() == []
|
|
325
|
+
|
|
326
|
+
cols = [('c', pa.int64())]
|
|
327
|
+
t1 = s.create_table('t1', pa.schema(cols))
|
|
328
|
+
for unit in TIMESTAMP_UNITS:
|
|
329
|
+
col_type = pa.timestamp(unit, 'UTC')
|
|
330
|
+
with pytest.raises(errors.NotSupportedType):
|
|
331
|
+
t1.add_column(pa.schema([('ts', col_type)]))
|
|
332
|
+
|
|
333
|
+
cols = [(f'c_{unit}', pa.timestamp(unit)) for unit in TIMESTAMP_UNITS]
|
|
334
|
+
t2 = s.create_table('t2', pa.schema(cols))
|
|
335
|
+
|
|
336
|
+
for unit in TIMESTAMP_UNITS:
|
|
337
|
+
col_type = pa.timestamp(unit, 'UTC')
|
|
338
|
+
|
|
339
|
+
rb = pa.record_batch(
|
|
340
|
+
data=[[None]],
|
|
341
|
+
schema=pa.schema([(f'c_{unit}', col_type)]))
|
|
342
|
+
with pytest.raises(errors.NotSupportedType):
|
|
343
|
+
t2.insert(rb)
|
|
344
|
+
|
|
345
|
+
rb = pa.record_batch(
|
|
346
|
+
data=[[0], [None]],
|
|
347
|
+
schema=pa.schema([
|
|
348
|
+
(INTERNAL_ROW_ID, pa.uint64()),
|
|
349
|
+
(f'c_{unit}', col_type)]))
|
|
350
|
+
with pytest.raises(errors.NotSupportedType):
|
|
351
|
+
t2.update(rb)
|
|
352
|
+
|
|
353
|
+
|
|
314
354
|
def test_filters(session, clean_bucket_name):
|
|
315
355
|
columns = pa.schema([
|
|
316
356
|
('a', pa.int32()),
|
vastdb/util.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import importlib
|
|
1
2
|
import logging
|
|
2
3
|
import re
|
|
3
4
|
from typing import TYPE_CHECKING, Callable, List, Optional, Union
|
|
@@ -6,7 +7,7 @@ import pyarrow as pa
|
|
|
6
7
|
import pyarrow.compute as pc
|
|
7
8
|
import pyarrow.parquet as pq
|
|
8
9
|
|
|
9
|
-
from .errors import InvalidArgument, TooWideRow
|
|
10
|
+
from .errors import InvalidArgument, NotSupportedType, TooWideRow
|
|
10
11
|
|
|
11
12
|
log = logging.getLogger(__name__)
|
|
12
13
|
|
|
@@ -152,3 +153,14 @@ def sort_record_batch_if_needed(record_batch, sort_column):
|
|
|
152
153
|
return record_batch.sort_by(sort_column)
|
|
153
154
|
else:
|
|
154
155
|
return record_batch
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def check_supported_types(fields: pa.Schema):
|
|
159
|
+
for f in fields:
|
|
160
|
+
if isinstance(f.type, pa.TimestampType):
|
|
161
|
+
if f.type.tz:
|
|
162
|
+
raise NotSupportedType(f)
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def version():
|
|
166
|
+
return importlib.metadata.distribution('vastdb').version
|
vastdb/vast_tests/test_ha.py
CHANGED
|
@@ -10,7 +10,7 @@ logger = logging.getLogger(__name__)
|
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
@pytest.mark.ha
|
|
13
|
-
def test_ha_query(session,
|
|
13
|
+
def test_ha_query(session, test_bucket_name, schema_name, table_name):
|
|
14
14
|
# runs in parallel to ha scenario
|
|
15
15
|
times_to_query, records_in_table = 50, 100_000_000
|
|
16
16
|
arrow_array = pa.array(range(0, records_in_table), type=pa.int64())
|
|
@@ -149,31 +149,33 @@ vast_flatbuf/tabular/S3File.py,sha256=KC9c2oS5-JXwTTriUVFdjOvRG0B54Cq9kviSDZY3NI
|
|
|
149
149
|
vast_flatbuf/tabular/VipRange.py,sha256=_BJd1RRZAcK76T9vlsHzXKYVsPVaz6WTEAqStMQCAUQ,2069
|
|
150
150
|
vast_flatbuf/tabular/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
151
151
|
vastdb/__init__.py,sha256=8PLcZowy_vM0zuiYSQPXuxIEMcwHD7IRFpgcPK-03bk,386
|
|
152
|
-
vastdb/_internal.py,sha256=
|
|
152
|
+
vastdb/_internal.py,sha256=4vi6KgkfHnDOSZUrU3oQcNKKdeMH3alODLGk7Yt59Gk,90001
|
|
153
153
|
vastdb/bucket.py,sha256=5KuKhPjZOevznZqWHDVVocejvAy7dcwobPuV6BJCfPc,2544
|
|
154
|
-
vastdb/conftest.py,sha256=
|
|
155
|
-
vastdb/errors.py,sha256=
|
|
156
|
-
vastdb/schema.py,sha256=
|
|
154
|
+
vastdb/conftest.py,sha256=ePzQiEQmlNGcM2T4GZevE4XuvcnFWfnTSzr8IVZpVKk,3438
|
|
155
|
+
vastdb/errors.py,sha256=jER5RQYsBRlQsjym1ItQYRukggMypATOo_sKvsJtMbo,4278
|
|
156
|
+
vastdb/schema.py,sha256=yaueil92MSMYJf6bWseov_8fXTdW5zaKLXNjP5uuyzI,5963
|
|
157
157
|
vastdb/session.py,sha256=3YHhG7IamFOKuy-Fkq_IVtPNriSfI6IN_4z4arBFbDU,3349
|
|
158
|
-
vastdb/table.py,sha256=
|
|
158
|
+
vastdb/table.py,sha256=C0kgV8CJVgoRxVx83SPTn75mgbTz9OWgYwK_RzLPZ5Q,32994
|
|
159
159
|
vastdb/transaction.py,sha256=qu2rOlR7AS1ojMOzgWapQMpcorrutelZZLH1mLmTHxk,3186
|
|
160
|
-
vastdb/util.py,sha256=
|
|
160
|
+
vastdb/util.py,sha256=KQ2CjML-ipWxrJjwiaBbj4bxpTCtL24Pr2Co9woyw3Y,5983
|
|
161
161
|
vastdb/bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
162
162
|
vastdb/bench/test_perf.py,sha256=yn5gE7t_nzmJHBl9bCs1hxQOgzhvFphuYElsWGko8ts,1084
|
|
163
|
+
vastdb/bench/test_sample.py,sha256=0qsKPj3i88J-YTrOrGvsP19xsyWGZy_-ptIt3oXBbSw,7181
|
|
163
164
|
vastdb/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
165
|
+
vastdb/tests/metrics.py,sha256=f1oOPKDsu-BzBLin0IQvjG-ueRDHTY-Hzl357TuoxCQ,989
|
|
164
166
|
vastdb/tests/test_duckdb.py,sha256=STw_1PwTQR8Naz6s0p6lQTV1ZTKKhe3LPBUbhqzTCu0,1880
|
|
165
167
|
vastdb/tests/test_imports.py,sha256=xKub3-bisFjH0BsZM8COfiUWuMrtoOoQKprF6VQT9RI,5669
|
|
166
168
|
vastdb/tests/test_nested.py,sha256=22NAxBTm7Aq-Vn6AIYbi5Cb1ET8W0XeLK3pp4D8BYWI,3448
|
|
167
169
|
vastdb/tests/test_projections.py,sha256=3y1kubwVrzO-xoR0hyps7zrjOJI8niCYspaFTN16Q9w,4540
|
|
168
170
|
vastdb/tests/test_sanity.py,sha256=V6dO5Y44B6pG8Eet6atTTGGH1yPz75_k0ZybHY-IiF8,3039
|
|
169
171
|
vastdb/tests/test_schemas.py,sha256=l70YQMlx2UL1KRQhApriiG2ZM7GJF-IzWU31H3Yqn1U,3312
|
|
170
|
-
vastdb/tests/test_tables.py,sha256=
|
|
172
|
+
vastdb/tests/test_tables.py,sha256=RlwVfzs2hjfs2gchiRY0hnWoOAu4MV_9NbQCeHR6_us,31590
|
|
171
173
|
vastdb/tests/test_util.py,sha256=Ok_sAEBJsRGF5Voa_v5eu3eAd52GWu8jMjjQbadwW-s,1260
|
|
172
174
|
vastdb/tests/util.py,sha256=dpRJYbboDnlqL4qIdvScpp8--5fxRUBIcIYitrfcj9o,555
|
|
173
175
|
vastdb/vast_tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
174
|
-
vastdb/vast_tests/test_ha.py,sha256=
|
|
175
|
-
vastdb-0.1.
|
|
176
|
-
vastdb-0.1.
|
|
177
|
-
vastdb-0.1.
|
|
178
|
-
vastdb-0.1.
|
|
179
|
-
vastdb-0.1.
|
|
176
|
+
vastdb/vast_tests/test_ha.py,sha256=744P4G6VJ09RIkHhMQL4wlipCBJWQVMhyvUrSc4k1HQ,975
|
|
177
|
+
vastdb-0.1.10.dist-info/LICENSE,sha256=obffan7LYrq7hLHNrY7vHcn2pKUTBUYXMKu-VOAvDxU,11333
|
|
178
|
+
vastdb-0.1.10.dist-info/METADATA,sha256=Pjw1EZvwnKhfEjuRVVMR0DBOSkmVo5wcHftqddxqRNY,1351
|
|
179
|
+
vastdb-0.1.10.dist-info/WHEEL,sha256=y4mX-SOX4fYIkonsAGA5N0Oy-8_gI4FXw5HNI1xqvWg,91
|
|
180
|
+
vastdb-0.1.10.dist-info/top_level.txt,sha256=Vsj2MKtlhPg0J4so64slQtnwjhgoPmJgcG-6YcVAwVc,20
|
|
181
|
+
vastdb-0.1.10.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|