vastdb 1.0.0__py3-none-any.whl → 1.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vastdb/_internal.py +41 -10
- vastdb/bench/perf_bench/__init__.py +0 -0
- vastdb/bench/perf_bench/bench_repo/__init__.py +0 -0
- vastdb/bench/perf_bench/bench_repo/mega_combo.py +87 -0
- vastdb/bench/perf_bench/cli.py +225 -0
- vastdb/bench/perf_bench/common/__init__.py +0 -0
- vastdb/bench/perf_bench/common/constants.py +96 -0
- vastdb/bench/perf_bench/common/log_utils.py +67 -0
- vastdb/bench/perf_bench/common/types.py +34 -0
- vastdb/bench/perf_bench/common/utils.py +219 -0
- vastdb/bench/perf_bench/dataset/__init__.py +0 -0
- vastdb/bench/perf_bench/dataset/generate_secmaster.py +105 -0
- vastdb/bench/perf_bench/dataset/generate_stocks_dataset.py +242 -0
- vastdb/bench/perf_bench/dataset/schemas.py +101 -0
- vastdb/bench/perf_bench/dataset/secmaster.py +33 -0
- vastdb/bench/perf_bench/orchestrate/__init__.py +0 -0
- vastdb/bench/perf_bench/orchestrate/bench_spec.py +91 -0
- vastdb/bench/perf_bench/orchestrate/results_helpers.py +126 -0
- vastdb/bench/perf_bench/orchestrate/scenario.py +109 -0
- vastdb/bench/perf_bench/orchestrate/scenario_generator.py +144 -0
- vastdb/bench/perf_bench/query/__init__.py +0 -0
- vastdb/bench/perf_bench/query/arrow_common.py +59 -0
- vastdb/bench/perf_bench/query/query.py +42 -0
- vastdb/bench/perf_bench/query/query_pyarrow.py +70 -0
- vastdb/bench/perf_bench/query/query_vastdb.py +78 -0
- vastdb/bench/perf_bench/run.py +79 -0
- vastdb/bench/test_sample.py +4 -2
- vastdb/conftest.py +1 -1
- vastdb/session.py +0 -6
- vastdb/table.py +35 -35
- vastdb/tests/test_nested.py +58 -0
- vastdb/tests/test_tables.py +13 -0
- vastdb/transaction.py +4 -8
- vastdb/util.py +5 -0
- {vastdb-1.0.0.dist-info → vastdb-1.1.1.dist-info}/METADATA +3 -4
- {vastdb-1.0.0.dist-info → vastdb-1.1.1.dist-info}/RECORD +39 -14
- {vastdb-1.0.0.dist-info → vastdb-1.1.1.dist-info}/WHEEL +1 -1
- {vastdb-1.0.0.dist-info → vastdb-1.1.1.dist-info}/LICENSE +0 -0
- {vastdb-1.0.0.dist-info → vastdb-1.1.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
# ruff: noqa: F841, PLW0603
|
|
2
|
+
|
|
3
|
+
import datetime as dt
|
|
4
|
+
import logging
|
|
5
|
+
from typing import Any, Dict, Optional, Sequence
|
|
6
|
+
|
|
7
|
+
import pyarrow as pa
|
|
8
|
+
|
|
9
|
+
import vastdb.bench.perf_bench.common.log_utils
|
|
10
|
+
from vastdb.bench.perf_bench.common import utils as bu
|
|
11
|
+
from vastdb.bench.perf_bench.common.constants import VastConnDetails
|
|
12
|
+
from vastdb.bench.perf_bench.dataset import secmaster as sm
|
|
13
|
+
from vastdb.bench.perf_bench.dataset.schemas import BF
|
|
14
|
+
from vastdb.table import QueryConfig
|
|
15
|
+
|
|
16
|
+
LOG = vastdb.bench.perf_bench.common.log_utils.get_logger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
# noinspection PyUnusedLocal,PyArgumentList
|
|
20
|
+
def query_vastdb(
|
|
21
|
+
universe: Sequence[str],
|
|
22
|
+
columns: Optional[Sequence[str]] = None,
|
|
23
|
+
from_t: Optional[dt.datetime] = None,
|
|
24
|
+
to_t: Optional[dt.datetime] = None,
|
|
25
|
+
path: Optional[str] = None,
|
|
26
|
+
use_sid: bool = True,
|
|
27
|
+
logger: Optional[logging.Logger] = None,
|
|
28
|
+
backend_kwargs: Optional[Dict[str, Any]] = None,
|
|
29
|
+
) -> pa.Table:
|
|
30
|
+
# ------------------------------------------------------------
|
|
31
|
+
# Query via VastDB
|
|
32
|
+
# ------------------------------------------------------------
|
|
33
|
+
kwargs = backend_kwargs or {}
|
|
34
|
+
conn_details: Optional[VastConnDetails] = kwargs.pop("conn_details", None)
|
|
35
|
+
conn_details = conn_details or VastConnDetails()
|
|
36
|
+
ss = bu.get_vastdb_session(
|
|
37
|
+
access=conn_details.access,
|
|
38
|
+
secret=conn_details.secret,
|
|
39
|
+
vastdb_endpoint=conn_details.vastdb_endpoint,
|
|
40
|
+
ssl_verify=conn_details.vastdb_ssl_verify,
|
|
41
|
+
)
|
|
42
|
+
with ss.transaction() as tx:
|
|
43
|
+
b = tx.bucket(conn_details.vastdb_bucket)
|
|
44
|
+
s = b.schema(conn_details.vastdb_schema)
|
|
45
|
+
t = s.table(conn_details.vastdb_table)
|
|
46
|
+
|
|
47
|
+
if use_sid:
|
|
48
|
+
fld = BF.sid.value
|
|
49
|
+
sid_uni = sorted(sm.to_sid(s) for s in universe)
|
|
50
|
+
filters = t[fld].isin(sid_uni)
|
|
51
|
+
else:
|
|
52
|
+
fld = BF.ticker.value
|
|
53
|
+
filters = t[fld].isin(sorted(universe))
|
|
54
|
+
|
|
55
|
+
if from_t and to_t:
|
|
56
|
+
# noinspection PyTypedDict
|
|
57
|
+
filters = filters & (t["ts"] >= from_t) & (t["ts"] < to_t)
|
|
58
|
+
elif from_t:
|
|
59
|
+
filters = filters & (t["ts"] >= from_t)
|
|
60
|
+
elif to_t:
|
|
61
|
+
filters.append(t["ts"] < to_t)
|
|
62
|
+
|
|
63
|
+
# Cleanup the kwargs to be passed to the arrow scanner
|
|
64
|
+
kwargs.pop("arrow_kwargs", None)
|
|
65
|
+
if kwargs.pop("filter", None):
|
|
66
|
+
raise ValueError("Can't use filter with VastDB query")
|
|
67
|
+
|
|
68
|
+
# Perform the query
|
|
69
|
+
# noinspection PyTypeChecker
|
|
70
|
+
config = QueryConfig(**kwargs)
|
|
71
|
+
endpoint = conn_details.vastdb_endpoint
|
|
72
|
+
config.num_splits = 32
|
|
73
|
+
config.num_sub_splits = 8
|
|
74
|
+
config.data_endpoints = [endpoint] * config.num_splits
|
|
75
|
+
table = pa.Table.from_batches(
|
|
76
|
+
batches=t.select(predicate=filters, columns=columns, config=config)
|
|
77
|
+
)
|
|
78
|
+
return table
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
# ruff: noqa: PLW2901, C901
|
|
2
|
+
|
|
3
|
+
import datetime as dt
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import List, Optional, Sequence
|
|
7
|
+
|
|
8
|
+
import pandas as pd
|
|
9
|
+
|
|
10
|
+
from vastdb.bench.perf_bench.common.constants import (
|
|
11
|
+
DEFAULT_RESULTS_DIR,
|
|
12
|
+
)
|
|
13
|
+
from vastdb.bench.perf_bench.common.log_utils import get_logger
|
|
14
|
+
from vastdb.bench.perf_bench.common.types import PathLikeT
|
|
15
|
+
from vastdb.bench.perf_bench.orchestrate import results_helpers as bc
|
|
16
|
+
from vastdb.bench.perf_bench.orchestrate.bench_spec import BenchResult
|
|
17
|
+
from vastdb.bench.perf_bench.orchestrate.scenario import BenchScenario
|
|
18
|
+
|
|
19
|
+
LOG = get_logger(__name__)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass(frozen=True)
|
|
23
|
+
class RunnableBenchSession:
|
|
24
|
+
bench_name: str
|
|
25
|
+
scenarios: Sequence[BenchScenario]
|
|
26
|
+
runs_per_bench: int
|
|
27
|
+
parallelism: int
|
|
28
|
+
results_base_dir: Optional[PathLikeT]
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def run_scenarios(
|
|
32
|
+
scenarios: Sequence[BenchScenario],
|
|
33
|
+
runs_per_bench: int = 3,
|
|
34
|
+
parallelism: int = 1,
|
|
35
|
+
results_base_dir: Optional[str] = None,
|
|
36
|
+
):
|
|
37
|
+
results_path = (
|
|
38
|
+
Path(results_base_dir or DEFAULT_RESULTS_DIR)
|
|
39
|
+
/ f"results_{dt.datetime.now().strftime('%Y%m%d_%H%M%S')}_par{parallelism}.csv"
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
if (runs_per_bench := int(runs_per_bench)) < 1:
|
|
43
|
+
raise ValueError(f"runs_per_bench must be >= 1: {runs_per_bench=}")
|
|
44
|
+
if (parallelism := int(parallelism)) < 1:
|
|
45
|
+
raise ValueError(f"parallelism must be >= 1: {parallelism=}")
|
|
46
|
+
|
|
47
|
+
LOG.info("Will run '%d' scenarios", len(scenarios))
|
|
48
|
+
|
|
49
|
+
# noinspection PyShadowingNames
|
|
50
|
+
def worker_init():
|
|
51
|
+
import numpy as np # noqa: F401
|
|
52
|
+
import pandas as pd # noqa: F401
|
|
53
|
+
import pyarrow as pa # noqa: F401
|
|
54
|
+
|
|
55
|
+
from vastdb.bench.perf_bench.common.utils import get_logger
|
|
56
|
+
|
|
57
|
+
get_logger(__name__).info("Warmed up.")
|
|
58
|
+
|
|
59
|
+
results: List[BenchResult] = []
|
|
60
|
+
for i, scen in enumerate(scenarios):
|
|
61
|
+
LOG.info(f"Running scenario: {scen.key} [{i + 1} of {len(scenarios)}]")
|
|
62
|
+
scen.run(
|
|
63
|
+
n_runs=runs_per_bench,
|
|
64
|
+
discard_first_run=True,
|
|
65
|
+
parallelism=parallelism,
|
|
66
|
+
workers_init=worker_init,
|
|
67
|
+
)
|
|
68
|
+
results.extend(scen.results or ())
|
|
69
|
+
bc.save_results(results, results_path=results_path)
|
|
70
|
+
|
|
71
|
+
# Save the results from all workers and print stats
|
|
72
|
+
bc.save_results(results, results_path=results_path)
|
|
73
|
+
results_stats_df = bc.calculate_aggregate_stats(results=results)
|
|
74
|
+
LOG.info(f"\n{results_stats_df.to_string()}")
|
|
75
|
+
|
|
76
|
+
# Verify the results written
|
|
77
|
+
results_df = bc.results_to_df(results)
|
|
78
|
+
results_df2 = pd.read_csv(results_path).astype(results_df.dtypes)
|
|
79
|
+
pd.testing.assert_frame_equal(results_df, results_df2)
|
vastdb/bench/test_sample.py
CHANGED
|
@@ -109,7 +109,7 @@ def load_batch(bucket, session_kwargs, offset, limit):
|
|
|
109
109
|
|
|
110
110
|
def test_ingest(test_bucket_name, session_kwargs, tabular_endpoint_urls, num_workers, perf_metrics_db):
|
|
111
111
|
session = vastdb.connect(**session_kwargs)
|
|
112
|
-
metrics_table = metrics.Table(perf_metrics_db, "
|
|
112
|
+
metrics_table = metrics.Table(perf_metrics_db, "test_ingest")
|
|
113
113
|
|
|
114
114
|
with session.transaction() as tx:
|
|
115
115
|
b = tx.bucket(test_bucket_name)
|
|
@@ -201,9 +201,11 @@ def run_query(session_kwargs, i, bucket_name, endpoint_url):
|
|
|
201
201
|
nbytes=data, rows=rows, cols=len(cols),
|
|
202
202
|
pid=pid, tid=tid, sdk_version=sdk_version))
|
|
203
203
|
|
|
204
|
+
return metrics_rows
|
|
205
|
+
|
|
204
206
|
|
|
205
207
|
def test_scan(test_bucket_name, session, num_workers, session_kwargs, tabular_endpoint_urls, perf_metrics_db):
|
|
206
|
-
metrics_table = metrics.Table(perf_metrics_db, "
|
|
208
|
+
metrics_table = metrics.Table(perf_metrics_db, "test_scan")
|
|
207
209
|
|
|
208
210
|
log.info("starting %d workers, endpoints=%s", num_workers, tabular_endpoint_urls)
|
|
209
211
|
with ProcessPoolExecutor(max_workers=num_workers) as executor:
|
vastdb/conftest.py
CHANGED
vastdb/session.py
CHANGED
|
@@ -23,8 +23,6 @@ class Session:
|
|
|
23
23
|
timeout=None,
|
|
24
24
|
backoff_config: Optional["BackoffConfig"] = None):
|
|
25
25
|
"""Connect to a VAST Database endpoint, using specified credentials."""
|
|
26
|
-
import boto3
|
|
27
|
-
|
|
28
26
|
from . import _internal, features
|
|
29
27
|
|
|
30
28
|
if access is None:
|
|
@@ -42,10 +40,6 @@ class Session:
|
|
|
42
40
|
timeout=timeout,
|
|
43
41
|
backoff_config=backoff_config)
|
|
44
42
|
self.features = features.Features(self.api.vast_version)
|
|
45
|
-
self.s3 = boto3.client('s3',
|
|
46
|
-
aws_access_key_id=access,
|
|
47
|
-
aws_secret_access_key=secret,
|
|
48
|
-
endpoint_url=endpoint)
|
|
49
43
|
|
|
50
44
|
def __repr__(self):
|
|
51
45
|
"""Don't show the secret key."""
|
vastdb/table.py
CHANGED
|
@@ -227,22 +227,23 @@ class Table:
|
|
|
227
227
|
stop_event = Event()
|
|
228
228
|
num_files_in_batch = min(ceil(len(source_files) / len(endpoints)), max_batch_size)
|
|
229
229
|
|
|
230
|
-
def import_worker(q,
|
|
230
|
+
def import_worker(q, endpoint):
|
|
231
231
|
try:
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
232
|
+
with self.tx._rpc.api.with_endpoint(endpoint) as session:
|
|
233
|
+
while not q.empty():
|
|
234
|
+
if stop_event.is_set():
|
|
235
|
+
log.debug("stop_event is set, exiting")
|
|
236
|
+
break
|
|
237
|
+
files_batch = {}
|
|
238
|
+
try:
|
|
239
|
+
for _ in range(num_files_in_batch):
|
|
240
|
+
files_batch.update({q.get(block=False)})
|
|
241
|
+
except queue.Empty:
|
|
242
|
+
pass
|
|
243
|
+
if files_batch:
|
|
244
|
+
log.debug("Starting import batch of %s files", len(files_batch))
|
|
245
|
+
session.import_data(
|
|
246
|
+
self.bucket.name, self.schema.name, self.name, files_batch, txid=self.tx.txid)
|
|
246
247
|
except (Exception, KeyboardInterrupt) as e:
|
|
247
248
|
stop_event.set()
|
|
248
249
|
log.error("Got exception inside import_worker. exception: %s", e)
|
|
@@ -253,8 +254,7 @@ class Table:
|
|
|
253
254
|
max_workers=config.import_concurrency, thread_name_prefix='import_thread') as pool:
|
|
254
255
|
try:
|
|
255
256
|
for endpoint in endpoints:
|
|
256
|
-
|
|
257
|
-
futures.append(pool.submit(import_worker, files_queue, session))
|
|
257
|
+
futures.append(pool.submit(import_worker, files_queue, endpoint))
|
|
258
258
|
|
|
259
259
|
log.debug("Waiting for import workers to finish")
|
|
260
260
|
for future in concurrent.futures.as_completed(futures):
|
|
@@ -351,23 +351,23 @@ class Table:
|
|
|
351
351
|
|
|
352
352
|
def single_endpoint_worker(endpoint: str):
|
|
353
353
|
try:
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
354
|
+
with self.tx._rpc.api.with_endpoint(endpoint) as host_api:
|
|
355
|
+
backoff_decorator = self.tx._rpc.api._backoff_decorator
|
|
356
|
+
while True:
|
|
357
|
+
check_stop()
|
|
358
|
+
try:
|
|
359
|
+
split = splits_queue.get_nowait()
|
|
360
|
+
except queue.Empty:
|
|
361
|
+
log.debug("splits queue is empty")
|
|
362
|
+
break
|
|
363
|
+
|
|
364
|
+
split_state = SelectSplitState(query_data_request=query_data_request,
|
|
365
|
+
table=self,
|
|
366
|
+
split_id=split,
|
|
367
|
+
config=config)
|
|
368
|
+
|
|
369
|
+
process_with_retries = backoff_decorator(split_state.process_split)
|
|
370
|
+
process_with_retries(host_api, record_batches_queue, check_stop)
|
|
371
371
|
|
|
372
372
|
except StoppedException:
|
|
373
373
|
log.debug("stop signal.", exc_info=True)
|
|
@@ -439,7 +439,7 @@ class Table:
|
|
|
439
439
|
self.update(rows=column_record_batch, columns=columns_name_chunk)
|
|
440
440
|
return row_ids
|
|
441
441
|
|
|
442
|
-
def insert(self, rows: pa.RecordBatch):
|
|
442
|
+
def insert(self, rows: Union[pa.RecordBatch, pa.Table]):
|
|
443
443
|
"""Insert a RecordBatch into this table."""
|
|
444
444
|
if self._imports_table:
|
|
445
445
|
raise errors.NotSupportedCommand(self.bucket.name, self.schema.name, self.name)
|
vastdb/tests/test_nested.py
CHANGED
|
@@ -100,3 +100,61 @@ def test_nested_unsupported_filter(session, clean_bucket_name):
|
|
|
100
100
|
|
|
101
101
|
with pytest.raises(NotImplementedError):
|
|
102
102
|
list(t.select(predicate=(t['s'].isnull())))
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def test_nested_subfields_predicate_pushdown(session, clean_bucket_name):
|
|
106
|
+
columns = pa.schema([
|
|
107
|
+
('x', pa.int64()),
|
|
108
|
+
('l', pa.list_(pa.int8())),
|
|
109
|
+
('y', pa.int64()),
|
|
110
|
+
('m', pa.map_(pa.utf8(), pa.float64())),
|
|
111
|
+
('z', pa.int64()),
|
|
112
|
+
('s', pa.struct([
|
|
113
|
+
('x', pa.int16()),
|
|
114
|
+
('y', pa.int32()),
|
|
115
|
+
('q', pa.struct([
|
|
116
|
+
('q1', pa.utf8()),
|
|
117
|
+
('q2', pa.float32())
|
|
118
|
+
]))
|
|
119
|
+
])),
|
|
120
|
+
('w', pa.int64()),
|
|
121
|
+
])
|
|
122
|
+
expected = pa.table(schema=columns, data=[
|
|
123
|
+
[1, 2, 3, None],
|
|
124
|
+
[[1], [], [2, 3], None],
|
|
125
|
+
[1, 2, None, 3],
|
|
126
|
+
[None, {'a': 2.5}, {'b': 0.25, 'c': 0.025}, {}],
|
|
127
|
+
[1, None, 2, 3],
|
|
128
|
+
[
|
|
129
|
+
{'x': 1, 'y': None, 'q': {'q1': 'AAA', 'q2': 1.0}},
|
|
130
|
+
None,
|
|
131
|
+
{'x': 2, 'y': 3, 'q': {'q1': 'B', 'q2': 2.0}},
|
|
132
|
+
{'x': None, 'y': 4, 'q': {'q1': 'CC', 'q2': 2.0}}],
|
|
133
|
+
[None, 1, 2, 3],
|
|
134
|
+
])
|
|
135
|
+
|
|
136
|
+
with prepare_data(session, clean_bucket_name, 's', 't', expected) as t:
|
|
137
|
+
|
|
138
|
+
assert t.select(predicate=(t['s']['x'] == 1)).read_all() == expected.take([0])
|
|
139
|
+
assert t.select(predicate=(t['s']['y'].isnull())).read_all() == expected.take([0, 1])
|
|
140
|
+
assert t.select(predicate=(t['s']['q']['q1'] == 'AAA')).read_all() == expected.take([0])
|
|
141
|
+
assert t.select(predicate=(t['s']['q']['q1'] < 'B')).read_all() == expected.take([0])
|
|
142
|
+
assert t.select(predicate=(t['s']['q']['q1'] <= 'B')).read_all() == expected.take([0, 2])
|
|
143
|
+
assert t.select(predicate=(t['s']['q']['q2'] == 1.0)).read_all() == expected.take([0])
|
|
144
|
+
|
|
145
|
+
assert t.select(predicate=(t['s']['q']['q1'].isnull())).read_all() == expected.take([1])
|
|
146
|
+
assert t.select(predicate=(t['s']['q']['q2'].isnull())).read_all() == expected.take([1])
|
|
147
|
+
|
|
148
|
+
assert t.select(predicate=(t['s']['x'] == 2)).read_all() == expected.take([2])
|
|
149
|
+
assert t.select(predicate=(t['s']['y'] == 3)).read_all() == expected.take([2])
|
|
150
|
+
assert t.select(predicate=(t['s']['q']['q1'] == 'B')).read_all() == expected.take([2])
|
|
151
|
+
assert t.select(predicate=(t['s']['q']['q2'] == 2.0)).read_all() == expected.take([2, 3])
|
|
152
|
+
|
|
153
|
+
assert t.select(predicate=(t['s']['x'].isnull())).read_all() == expected.take([1, 3])
|
|
154
|
+
assert t.select(predicate=(t['s']['y'] == 4)).read_all() == expected.take([3])
|
|
155
|
+
assert t.select(predicate=(t['s']['q']['q1'] == 'CC')).read_all() == expected.take([3])
|
|
156
|
+
assert t.select(predicate=(t['s']['q']['q1'] > 'B')).read_all() == expected.take([3])
|
|
157
|
+
assert t.select(predicate=(t['s']['q']['q1'] >= 'B')).read_all() == expected.take([2, 3])
|
|
158
|
+
|
|
159
|
+
assert t.select(predicate=(t['s']['x'] == 1) | (t['s']['x'] == 2)).read_all() == expected.take([0, 2])
|
|
160
|
+
assert t.select(predicate=(t['s']['x'].isnull()) & (t['s']['y'].isnull())).read_all() == expected.take([1])
|
vastdb/tests/test_tables.py
CHANGED
|
@@ -81,6 +81,19 @@ def test_insert_wide_row(session, clean_bucket_name):
|
|
|
81
81
|
assert actual == expected
|
|
82
82
|
|
|
83
83
|
|
|
84
|
+
def test_multi_batch_table(session, clean_bucket_name):
|
|
85
|
+
columns = pa.schema([pa.field('s', pa.utf8())])
|
|
86
|
+
expected = pa.Table.from_batches([
|
|
87
|
+
pa.record_batch(schema=columns, data=[['a']]),
|
|
88
|
+
pa.record_batch(schema=columns, data=[['b']]),
|
|
89
|
+
pa.record_batch(schema=columns, data=[['c']]),
|
|
90
|
+
])
|
|
91
|
+
|
|
92
|
+
with prepare_data(session, clean_bucket_name, 's', 't', expected) as t:
|
|
93
|
+
actual = t.select().read_all()
|
|
94
|
+
assert actual == expected
|
|
95
|
+
|
|
96
|
+
|
|
84
97
|
def test_insert_empty(session, clean_bucket_name):
|
|
85
98
|
columns = pa.schema([('a', pa.int8()), ('b', pa.float32())])
|
|
86
99
|
data = [[None] * 5, [None] * 5]
|
vastdb/transaction.py
CHANGED
|
@@ -10,8 +10,6 @@ import logging
|
|
|
10
10
|
from dataclasses import dataclass
|
|
11
11
|
from typing import TYPE_CHECKING, Iterable, Optional
|
|
12
12
|
|
|
13
|
-
import botocore
|
|
14
|
-
|
|
15
13
|
from . import bucket, errors, schema, session
|
|
16
14
|
|
|
17
15
|
if TYPE_CHECKING:
|
|
@@ -64,12 +62,10 @@ class Transaction:
|
|
|
64
62
|
def bucket(self, name: str) -> "Bucket":
|
|
65
63
|
"""Return a VAST Bucket, if exists."""
|
|
66
64
|
try:
|
|
67
|
-
self._rpc.
|
|
68
|
-
except
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
raise errors.MissingBucket(name) from e
|
|
72
|
-
raise
|
|
65
|
+
self._rpc.api.head_bucket(name)
|
|
66
|
+
except errors.NotFound as e:
|
|
67
|
+
raise errors.MissingBucket(name) from e
|
|
68
|
+
|
|
73
69
|
return bucket.Bucket(name, self)
|
|
74
70
|
|
|
75
71
|
def catalog_snapshots(self) -> Iterable["Bucket"]:
|
vastdb/util.py
CHANGED
|
@@ -114,6 +114,11 @@ def iter_serialized_slices(batch: Union[pa.RecordBatch, pa.Table], max_rows_per_
|
|
|
114
114
|
|
|
115
115
|
def serialize_record_batch(batch: Union[pa.RecordBatch, pa.Table]):
|
|
116
116
|
"""Serialize a RecordBatch using Arrow IPC format."""
|
|
117
|
+
if isinstance(batch, pa.Table):
|
|
118
|
+
if len(batch.to_batches()) > 1:
|
|
119
|
+
# the server expects a single RecordBatch per request
|
|
120
|
+
batch = batch.combine_chunks()
|
|
121
|
+
|
|
117
122
|
sink = pa.BufferOutputStream()
|
|
118
123
|
with pa.ipc.new_stream(sink, batch.schema) as writer:
|
|
119
124
|
writer.write(batch)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: vastdb
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.1.1
|
|
4
4
|
Summary: VAST Data SDK
|
|
5
5
|
Home-page: https://github.com/vast-data/vastdb_sdk
|
|
6
6
|
Author: VAST DATA
|
|
@@ -19,13 +19,12 @@ Requires-Python: >=3.9.0
|
|
|
19
19
|
Description-Content-Type: text/markdown
|
|
20
20
|
License-File: LICENSE
|
|
21
21
|
Requires-Dist: aws-requests-auth
|
|
22
|
-
Requires-Dist: boto3
|
|
23
22
|
Requires-Dist: flatbuffers
|
|
24
|
-
Requires-Dist: ibis-framework
|
|
23
|
+
Requires-Dist: ibis-framework==9.0.0
|
|
25
24
|
Requires-Dist: pyarrow
|
|
26
25
|
Requires-Dist: requests
|
|
27
26
|
Requires-Dist: xmltodict
|
|
28
|
-
Requires-Dist: backoff
|
|
27
|
+
Requires-Dist: backoff==2.2.1
|
|
29
28
|
|
|
30
29
|
|
|
31
30
|
`vastdb` is a Python-based SDK designed for interacting
|
|
@@ -149,36 +149,61 @@ vast_flatbuf/tabular/S3File.py,sha256=KC9c2oS5-JXwTTriUVFdjOvRG0B54Cq9kviSDZY3NI
|
|
|
149
149
|
vast_flatbuf/tabular/VipRange.py,sha256=_BJd1RRZAcK76T9vlsHzXKYVsPVaz6WTEAqStMQCAUQ,2069
|
|
150
150
|
vast_flatbuf/tabular/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
151
151
|
vastdb/__init__.py,sha256=J1JjKiFkKC95BHowfh9kJfQFTjRce-QMsc6zF_FfxC0,432
|
|
152
|
-
vastdb/_internal.py,sha256=
|
|
152
|
+
vastdb/_internal.py,sha256=KRFNlB25Ckj4nowVUneC5tBSYXpnIv-MDdw4Vn6KYnc,91255
|
|
153
153
|
vastdb/bucket.py,sha256=5KuKhPjZOevznZqWHDVVocejvAy7dcwobPuV6BJCfPc,2544
|
|
154
154
|
vastdb/config.py,sha256=1tMYtzKXerGcIUjH4tIGEvZNWvO4fviCEdcNCnELJZo,2269
|
|
155
|
-
vastdb/conftest.py,sha256=
|
|
155
|
+
vastdb/conftest.py,sha256=X2kVveySPQYZlVBXUMoo7Oea5IsvmJzjdqq3fpH2kVw,3469
|
|
156
156
|
vastdb/errors.py,sha256=2XR1ko7J5nkfiHSAgwuVAADw0SsyqxOwSeFaGgKZEXM,4186
|
|
157
157
|
vastdb/features.py,sha256=DxV746LSkORwVSD6MP2hdXRfnyoLkJwtOwGmp1dnquo,1322
|
|
158
158
|
vastdb/schema.py,sha256=X7IRrogXH7Z0kes-DsDh1bRqIhvjH6owlFigGBXy7XQ,5913
|
|
159
|
-
vastdb/session.py,sha256=
|
|
160
|
-
vastdb/table.py,sha256=
|
|
161
|
-
vastdb/transaction.py,sha256=
|
|
162
|
-
vastdb/util.py,sha256=
|
|
159
|
+
vastdb/session.py,sha256=toMR0BXwTaECdWDKnIZky1F3MA1SmelRBiqCrqQ3GCM,2067
|
|
160
|
+
vastdb/table.py,sha256=XLNAwlRjT9sE8nPzYoBC2ehdYpjbit0gLyG0ieNvVfs,31094
|
|
161
|
+
vastdb/transaction.py,sha256=NlVkEowJ_pmtffjWBBDaKExYDKPekjSZyj_fK_bZPJE,3026
|
|
162
|
+
vastdb/util.py,sha256=eunfTuqbCrqQEFZEO9T15N-Bu8Fqpw7Zlqp2TAGfYaY,5870
|
|
163
163
|
vastdb/bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
164
164
|
vastdb/bench/test_perf.py,sha256=gZIqfHva6lNFpD-9bHAe7M8COBjUyrPkHu3E7F8J2L0,1072
|
|
165
|
-
vastdb/bench/test_sample.py,sha256=
|
|
165
|
+
vastdb/bench/test_sample.py,sha256=LgF4syzij09sH3Noiv1EyCAJ9pvrUE5bxR4RJTVEYag,7881
|
|
166
|
+
vastdb/bench/perf_bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
167
|
+
vastdb/bench/perf_bench/cli.py,sha256=NtaPEBTDI6PWgEtwI1wVbwmUeA5bwGqAj_Z_2lDJ28I,5931
|
|
168
|
+
vastdb/bench/perf_bench/run.py,sha256=2dKooybt7e8VqM7FzY1jrIu6wZYQuoAF9o7AHVh-WVQ,2632
|
|
169
|
+
vastdb/bench/perf_bench/bench_repo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
170
|
+
vastdb/bench/perf_bench/bench_repo/mega_combo.py,sha256=9d8quB3aO6WLNlcIGUL1vmNo3KqyJgEiXSnbQOCiK-U,2780
|
|
171
|
+
vastdb/bench/perf_bench/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
172
|
+
vastdb/bench/perf_bench/common/constants.py,sha256=HI6qQSrqkrqC3jjq3LcOKth6o0yXq1yIOEbp9Oqo1jA,2905
|
|
173
|
+
vastdb/bench/perf_bench/common/log_utils.py,sha256=V0Y6n2kOGTPMK2zPZMMnYwEFvmgOzMZQzk0uu4r-O3o,2015
|
|
174
|
+
vastdb/bench/perf_bench/common/types.py,sha256=tbrJ-gvFGcq-U2Ny89BVxM1ItGZtIxrubgYImXOHDuc,1085
|
|
175
|
+
vastdb/bench/perf_bench/common/utils.py,sha256=c6NoPMVwIQqIg2uDd4LtJIW7Hlis9QvlfbIyJc9sGS4,6280
|
|
176
|
+
vastdb/bench/perf_bench/dataset/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
177
|
+
vastdb/bench/perf_bench/dataset/generate_secmaster.py,sha256=zm0b6MIN_hQM7znb-GA0T7L316t8DPixeN7ptdNRLO8,2718
|
|
178
|
+
vastdb/bench/perf_bench/dataset/generate_stocks_dataset.py,sha256=WiYTK31gcaygrzF6Fbldlf7dwod4tioG4XSTv0fSxmc,8486
|
|
179
|
+
vastdb/bench/perf_bench/dataset/schemas.py,sha256=CvCAxCWHsWdI6jE9on2Mm6b0NTDhZXRIbfP61vMNVow,2575
|
|
180
|
+
vastdb/bench/perf_bench/dataset/secmaster.py,sha256=Y3yt8B_RsFvGlhMWKvDqax31UV_ShxZM-7CJO4YmxL0,188169
|
|
181
|
+
vastdb/bench/perf_bench/orchestrate/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
182
|
+
vastdb/bench/perf_bench/orchestrate/bench_spec.py,sha256=XMUVlKPyWuhkRkz_Z2-iKYxqDlMj0EKcA3N46MLIX2c,2469
|
|
183
|
+
vastdb/bench/perf_bench/orchestrate/results_helpers.py,sha256=u-GeDjbdKZLhAfDdecg9xYe7mnbJ0q_xu3xJ8rrRWG0,4172
|
|
184
|
+
vastdb/bench/perf_bench/orchestrate/scenario.py,sha256=DUsIWyVmoLyYbKqPcLpd4veNHVbJsmL9JE-RTdmmyiw,3482
|
|
185
|
+
vastdb/bench/perf_bench/orchestrate/scenario_generator.py,sha256=Plnij1hHqwmMndYpG4EA6L-HNAMJUB-M6-KXm3KZtlk,5978
|
|
186
|
+
vastdb/bench/perf_bench/query/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
187
|
+
vastdb/bench/perf_bench/query/arrow_common.py,sha256=eTQx_-ChEJVhP3g3HdQIP4vjIQrogPNh9UZ8nl9P5ew,1821
|
|
188
|
+
vastdb/bench/perf_bench/query/query.py,sha256=h98Ui6vUTw16LHMY0ufkLnyVO3QCR8f1cXUU8N6B2mE,1185
|
|
189
|
+
vastdb/bench/perf_bench/query/query_pyarrow.py,sha256=Dj5YPUvb4dAj7RskHfJcPijJnM-rdYIAItEF2dp4jfo,2305
|
|
190
|
+
vastdb/bench/perf_bench/query/query_vastdb.py,sha256=SZYem_EmsaynEftAa_VFobjSJZDAcli9BckyRS3SFvg,2810
|
|
166
191
|
vastdb/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
167
192
|
vastdb/tests/metrics.py,sha256=N6ELJUmZubhAMmUtDbisXr6TFhSDgVCTTU05gBVxHRA,1010
|
|
168
193
|
vastdb/tests/test_duckdb.py,sha256=STw_1PwTQR8Naz6s0p6lQTV1ZTKKhe3LPBUbhqzTCu0,1880
|
|
169
194
|
vastdb/tests/test_imports.py,sha256=xKub3-bisFjH0BsZM8COfiUWuMrtoOoQKprF6VQT9RI,5669
|
|
170
|
-
vastdb/tests/test_nested.py,sha256=
|
|
195
|
+
vastdb/tests/test_nested.py,sha256=LPU6uV3Ri23dBzAEMFQqRPbqapV5LfmiHSHkhILPIY0,6332
|
|
171
196
|
vastdb/tests/test_projections.py,sha256=3y1kubwVrzO-xoR0hyps7zrjOJI8niCYspaFTN16Q9w,4540
|
|
172
197
|
vastdb/tests/test_sanity.py,sha256=oiV2gb05aPyG5RMNUQZlyjNlg3T7Fig1_8OJzpAgcsk,3038
|
|
173
198
|
vastdb/tests/test_schemas.py,sha256=l70YQMlx2UL1KRQhApriiG2ZM7GJF-IzWU31H3Yqn1U,3312
|
|
174
|
-
vastdb/tests/test_tables.py,sha256=
|
|
199
|
+
vastdb/tests/test_tables.py,sha256=3YYytrcPZiflFCM6yZPOBI3pz5iXKy0sQbmUDFRVmdA,32000
|
|
175
200
|
vastdb/tests/test_util.py,sha256=Ok_sAEBJsRGF5Voa_v5eu3eAd52GWu8jMjjQbadwW-s,1260
|
|
176
201
|
vastdb/tests/util.py,sha256=dpRJYbboDnlqL4qIdvScpp8--5fxRUBIcIYitrfcj9o,555
|
|
177
202
|
vastdb/vast_tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
178
203
|
vastdb/vast_tests/test_ha.py,sha256=744P4G6VJ09RIkHhMQL4wlipCBJWQVMhyvUrSc4k1HQ,975
|
|
179
204
|
vastdb/vast_tests/test_scale.py,sha256=EpjCJmVAQrNBxVnHGJ-KHCoxevhqOcyqYFPMIIY9s60,2714
|
|
180
|
-
vastdb-1.
|
|
181
|
-
vastdb-1.
|
|
182
|
-
vastdb-1.
|
|
183
|
-
vastdb-1.
|
|
184
|
-
vastdb-1.
|
|
205
|
+
vastdb-1.1.1.dist-info/LICENSE,sha256=obffan7LYrq7hLHNrY7vHcn2pKUTBUYXMKu-VOAvDxU,11333
|
|
206
|
+
vastdb-1.1.1.dist-info/METADATA,sha256=C6KtrB0iOL3E8noTM0UqjYvurcant8lSNjwukCXnXTE,1340
|
|
207
|
+
vastdb-1.1.1.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
|
|
208
|
+
vastdb-1.1.1.dist-info/top_level.txt,sha256=Vsj2MKtlhPg0J4so64slQtnwjhgoPmJgcG-6YcVAwVc,20
|
|
209
|
+
vastdb-1.1.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|