vectordb-bench 0.0.28__py3-none-any.whl → 0.0.30__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vectordb_bench/__init__.py +3 -1
- vectordb_bench/backend/clients/__init__.py +16 -0
- vectordb_bench/backend/clients/aws_opensearch/aws_opensearch.py +180 -15
- vectordb_bench/backend/clients/aws_opensearch/cli.py +51 -21
- vectordb_bench/backend/clients/aws_opensearch/config.py +37 -14
- vectordb_bench/backend/clients/clickhouse/cli.py +1 -0
- vectordb_bench/backend/clients/clickhouse/clickhouse.py +3 -3
- vectordb_bench/backend/clients/clickhouse/config.py +2 -2
- vectordb_bench/backend/clients/lancedb/cli.py +62 -8
- vectordb_bench/backend/clients/lancedb/config.py +14 -1
- vectordb_bench/backend/clients/lancedb/lancedb.py +21 -3
- vectordb_bench/backend/clients/memorydb/memorydb.py +2 -2
- vectordb_bench/backend/clients/milvus/cli.py +30 -9
- vectordb_bench/backend/clients/milvus/config.py +2 -0
- vectordb_bench/backend/clients/milvus/milvus.py +7 -1
- vectordb_bench/backend/clients/qdrant_cloud/cli.py +43 -0
- vectordb_bench/backend/clients/qdrant_cloud/config.py +4 -4
- vectordb_bench/backend/clients/qdrant_local/cli.py +60 -0
- vectordb_bench/backend/clients/qdrant_local/config.py +47 -0
- vectordb_bench/backend/clients/qdrant_local/qdrant_local.py +232 -0
- vectordb_bench/backend/clients/weaviate_cloud/cli.py +29 -3
- vectordb_bench/backend/clients/weaviate_cloud/config.py +2 -0
- vectordb_bench/backend/clients/weaviate_cloud/weaviate_cloud.py +5 -0
- vectordb_bench/backend/runner/mp_runner.py +16 -5
- vectordb_bench/backend/task_runner.py +1 -0
- vectordb_bench/cli/batch_cli.py +121 -0
- vectordb_bench/cli/cli.py +13 -2
- vectordb_bench/cli/vectordbbench.py +6 -0
- vectordb_bench/config-files/batch_sample_config.yml +17 -0
- vectordb_bench/frontend/components/run_test/dbConfigSetting.py +10 -4
- vectordb_bench/frontend/config/dbCaseConfigs.py +113 -1
- vectordb_bench/models.py +13 -0
- {vectordb_bench-0.0.28.dist-info → vectordb_bench-0.0.30.dist-info}/METADATA +56 -5
- {vectordb_bench-0.0.28.dist-info → vectordb_bench-0.0.30.dist-info}/RECORD +38 -32
- {vectordb_bench-0.0.28.dist-info → vectordb_bench-0.0.30.dist-info}/WHEEL +1 -1
- {vectordb_bench-0.0.28.dist-info → vectordb_bench-0.0.30.dist-info}/entry_points.txt +0 -0
- {vectordb_bench-0.0.28.dist-info → vectordb_bench-0.0.30.dist-info}/licenses/LICENSE +0 -0
- {vectordb_bench-0.0.28.dist-info → vectordb_bench-0.0.30.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,232 @@
|
|
1
|
+
"""Wrapper around the Qdrant over VectorDB"""
|
2
|
+
|
3
|
+
import logging
|
4
|
+
import time
|
5
|
+
from collections.abc import Iterable
|
6
|
+
from contextlib import contextmanager
|
7
|
+
|
8
|
+
from qdrant_client import QdrantClient
|
9
|
+
from qdrant_client.http.models import (
|
10
|
+
Batch,
|
11
|
+
CollectionStatus,
|
12
|
+
FieldCondition,
|
13
|
+
Filter,
|
14
|
+
HnswConfigDiff,
|
15
|
+
OptimizersConfigDiff,
|
16
|
+
PayloadSchemaType,
|
17
|
+
Range,
|
18
|
+
SearchParams,
|
19
|
+
VectorParams,
|
20
|
+
)
|
21
|
+
|
22
|
+
from ..api import VectorDB
|
23
|
+
from .config import QdrantLocalIndexConfig
|
24
|
+
|
25
|
+
log = logging.getLogger(__name__)
|
26
|
+
|
27
|
+
SECONDS_WAITING_FOR_INDEXING_API_CALL = 5
|
28
|
+
QDRANT_BATCH_SIZE = 100
|
29
|
+
|
30
|
+
|
31
|
+
def qdrant_collection_exists(client: QdrantClient, collection_name: str) -> bool:
|
32
|
+
collection_exists = True
|
33
|
+
|
34
|
+
try:
|
35
|
+
client.get_collection(collection_name)
|
36
|
+
except Exception:
|
37
|
+
collection_exists = False
|
38
|
+
|
39
|
+
return collection_exists
|
40
|
+
|
41
|
+
|
42
|
+
class QdrantLocal(VectorDB):
|
43
|
+
def __init__(
|
44
|
+
self,
|
45
|
+
dim: int,
|
46
|
+
db_config: dict,
|
47
|
+
db_case_config: QdrantLocalIndexConfig,
|
48
|
+
collection_name: str = "QdrantLocalCollection",
|
49
|
+
drop_old: bool = False,
|
50
|
+
name: str = "QdrantLocal",
|
51
|
+
**kwargs,
|
52
|
+
):
|
53
|
+
"""Initialize wrapper around the qdrant."""
|
54
|
+
self.name = name
|
55
|
+
self.db_config = db_config
|
56
|
+
self.case_config = db_case_config
|
57
|
+
self.search_parameter = self.case_config.search_param()
|
58
|
+
self.collection_name = collection_name
|
59
|
+
self.client = None
|
60
|
+
|
61
|
+
self._primary_field = "pk"
|
62
|
+
self._vector_field = "vector"
|
63
|
+
|
64
|
+
client = QdrantClient(**self.db_config)
|
65
|
+
|
66
|
+
# Lets just print the parameters here for double check
|
67
|
+
log.info(f"Case config: {self.case_config.index_param()}")
|
68
|
+
log.info(f"Search parameter: {self.search_parameter}")
|
69
|
+
|
70
|
+
if drop_old and qdrant_collection_exists(client, self.collection_name):
|
71
|
+
log.info(f"{self.name} client drop_old collection: {self.collection_name}")
|
72
|
+
client.delete_collection(self.collection_name)
|
73
|
+
|
74
|
+
if not qdrant_collection_exists(client, self.collection_name):
|
75
|
+
log.info(f"{self.name} create collection: {self.collection_name}")
|
76
|
+
self._create_collection(dim, client)
|
77
|
+
|
78
|
+
client = None
|
79
|
+
|
80
|
+
@contextmanager
|
81
|
+
def init(self):
|
82
|
+
"""
|
83
|
+
Examples:
|
84
|
+
>>> with self.init():
|
85
|
+
>>> self.insert_embeddings()
|
86
|
+
>>> self.search_embedding()
|
87
|
+
"""
|
88
|
+
# create connection
|
89
|
+
self.client = QdrantClient(**self.db_config)
|
90
|
+
yield
|
91
|
+
self.client = None
|
92
|
+
del self.client
|
93
|
+
|
94
|
+
def _create_collection(self, dim: int, qdrant_client: QdrantClient):
|
95
|
+
log.info(f"Create collection: {self.collection_name}")
|
96
|
+
log.info(
|
97
|
+
f"Index parameters: m={self.case_config.index_param()['m']}, "
|
98
|
+
f"ef_construct={self.case_config.index_param()['ef_construct']}, "
|
99
|
+
f"on_disk={self.case_config.index_param()['on_disk']}"
|
100
|
+
)
|
101
|
+
|
102
|
+
# If the on_disk is true, we enable both on disk index and vectors.
|
103
|
+
try:
|
104
|
+
qdrant_client.create_collection(
|
105
|
+
collection_name=self.collection_name,
|
106
|
+
vectors_config=VectorParams(
|
107
|
+
size=dim,
|
108
|
+
distance=self.case_config.index_param()["distance"],
|
109
|
+
on_disk=self.case_config.index_param()["on_disk"],
|
110
|
+
),
|
111
|
+
hnsw_config=HnswConfigDiff(
|
112
|
+
m=self.case_config.index_param()["m"],
|
113
|
+
ef_construct=self.case_config.index_param()["ef_construct"],
|
114
|
+
on_disk=self.case_config.index_param()["on_disk"],
|
115
|
+
),
|
116
|
+
)
|
117
|
+
|
118
|
+
qdrant_client.create_payload_index(
|
119
|
+
collection_name=self.collection_name,
|
120
|
+
field_name=self._primary_field,
|
121
|
+
field_schema=PayloadSchemaType.INTEGER,
|
122
|
+
)
|
123
|
+
|
124
|
+
except Exception as e:
|
125
|
+
if "already exists!" in str(e):
|
126
|
+
return
|
127
|
+
log.warning(f"Failed to create collection: {self.collection_name} error: {e}")
|
128
|
+
raise e from None
|
129
|
+
|
130
|
+
def optimize(self, data_size: int | None = None):
|
131
|
+
assert self.client, "Please call self.init() before"
|
132
|
+
# wait for vectors to be fully indexed
|
133
|
+
try:
|
134
|
+
while True:
|
135
|
+
info = self.client.get_collection(self.collection_name)
|
136
|
+
time.sleep(SECONDS_WAITING_FOR_INDEXING_API_CALL)
|
137
|
+
if info.status != CollectionStatus.GREEN:
|
138
|
+
continue
|
139
|
+
if info.status == CollectionStatus.GREEN:
|
140
|
+
log.info(f"Finishing building index for collection: {self.collection_name}")
|
141
|
+
msg = (
|
142
|
+
f"Stored vectors: {info.vectors_count}, Indexed vectors: {info.indexed_vectors_count}, "
|
143
|
+
f"Collection status: {info.indexed_vectors_count}"
|
144
|
+
)
|
145
|
+
log.info(msg)
|
146
|
+
return
|
147
|
+
|
148
|
+
except Exception as e:
|
149
|
+
log.warning(f"QdrantCloud ready to search error: {e}")
|
150
|
+
raise e from None
|
151
|
+
|
152
|
+
def insert_embeddings(
|
153
|
+
self,
|
154
|
+
embeddings: Iterable[list[float]],
|
155
|
+
metadata: list[int],
|
156
|
+
**kwargs,
|
157
|
+
) -> tuple[int, Exception]:
|
158
|
+
"""Insert embeddings into the database.
|
159
|
+
|
160
|
+
Args:
|
161
|
+
embeddings(list[list[float]]): list of embeddings
|
162
|
+
metadata(list[int]): list of metadata
|
163
|
+
kwargs: other arguments
|
164
|
+
|
165
|
+
Returns:
|
166
|
+
tuple[int, Exception]: number of embeddings inserted and exception if any
|
167
|
+
"""
|
168
|
+
assert self.client is not None
|
169
|
+
assert len(embeddings) == len(metadata)
|
170
|
+
insert_count = 0
|
171
|
+
|
172
|
+
# disable indexing for quick insertion
|
173
|
+
self.client.update_collection(
|
174
|
+
collection_name=self.collection_name,
|
175
|
+
optimizer_config=OptimizersConfigDiff(indexing_threshold=0),
|
176
|
+
)
|
177
|
+
try:
|
178
|
+
for offset in range(0, len(embeddings), QDRANT_BATCH_SIZE):
|
179
|
+
vectors = embeddings[offset : offset + QDRANT_BATCH_SIZE]
|
180
|
+
ids = metadata[offset : offset + QDRANT_BATCH_SIZE]
|
181
|
+
payloads = [{self._primary_field: v} for v in ids]
|
182
|
+
_ = self.client.upsert(
|
183
|
+
collection_name=self.collection_name,
|
184
|
+
wait=True,
|
185
|
+
points=Batch(ids=ids, payloads=payloads, vectors=vectors),
|
186
|
+
)
|
187
|
+
insert_count += QDRANT_BATCH_SIZE
|
188
|
+
# enable indexing after insertion
|
189
|
+
self.client.update_collection(
|
190
|
+
collection_name=self.collection_name,
|
191
|
+
optimizer_config=OptimizersConfigDiff(indexing_threshold=100),
|
192
|
+
)
|
193
|
+
|
194
|
+
except Exception as e:
|
195
|
+
log.info(f"Failed to insert data, {e}")
|
196
|
+
return insert_count, e
|
197
|
+
else:
|
198
|
+
return insert_count, None
|
199
|
+
|
200
|
+
def search_embedding(
|
201
|
+
self,
|
202
|
+
query: list[float],
|
203
|
+
k: int = 100,
|
204
|
+
filters: dict | None = None,
|
205
|
+
timeout: int | None = None,
|
206
|
+
) -> list[int]:
|
207
|
+
"""Perform a search on a query embedding and return results with score.
|
208
|
+
Should call self.init() first.
|
209
|
+
"""
|
210
|
+
assert self.client is not None
|
211
|
+
|
212
|
+
f = None
|
213
|
+
if filters:
|
214
|
+
f = Filter(
|
215
|
+
must=[
|
216
|
+
FieldCondition(
|
217
|
+
key=self._primary_field,
|
218
|
+
range=Range(
|
219
|
+
gt=filters.get("id"),
|
220
|
+
),
|
221
|
+
),
|
222
|
+
],
|
223
|
+
)
|
224
|
+
res = self.client.query_points(
|
225
|
+
collection_name=self.collection_name,
|
226
|
+
query=query,
|
227
|
+
limit=k,
|
228
|
+
query_filter=f,
|
229
|
+
search_params=SearchParams(**self.search_parameter),
|
230
|
+
).points
|
231
|
+
|
232
|
+
return [result.id for result in res]
|
@@ -15,12 +15,33 @@ from .. import DB
|
|
15
15
|
class WeaviateTypedDict(CommonTypedDict):
|
16
16
|
api_key: Annotated[
|
17
17
|
str,
|
18
|
-
click.option("--api-key", type=str, help="Weaviate api key", required=
|
18
|
+
click.option("--api-key", type=str, help="Weaviate api key", required=False, default=""),
|
19
19
|
]
|
20
20
|
url: Annotated[
|
21
21
|
str,
|
22
22
|
click.option("--url", type=str, help="Weaviate url", required=True),
|
23
23
|
]
|
24
|
+
no_auth: Annotated[
|
25
|
+
bool,
|
26
|
+
click.option(
|
27
|
+
"--no-auth",
|
28
|
+
is_flag=True,
|
29
|
+
help="Do not use api-key, set it to true if you are using a local setup. Default is False.",
|
30
|
+
default=False,
|
31
|
+
),
|
32
|
+
]
|
33
|
+
m: Annotated[
|
34
|
+
int,
|
35
|
+
click.option("--m", type=int, default=16, help="HNSW index parameter m."),
|
36
|
+
]
|
37
|
+
ef_construct: Annotated[
|
38
|
+
int,
|
39
|
+
click.option("--ef-construction", type=int, default=256, help="HNSW index parameter ef_construction"),
|
40
|
+
]
|
41
|
+
ef: Annotated[
|
42
|
+
int,
|
43
|
+
click.option("--ef", type=int, default=256, help="HNSW index parameter ef for search"),
|
44
|
+
]
|
24
45
|
|
25
46
|
|
26
47
|
@cli.command()
|
@@ -32,9 +53,14 @@ def Weaviate(**parameters: Unpack[WeaviateTypedDict]):
|
|
32
53
|
db=DB.WeaviateCloud,
|
33
54
|
db_config=WeaviateConfig(
|
34
55
|
db_label=parameters["db_label"],
|
35
|
-
api_key=SecretStr(parameters["api_key"]),
|
56
|
+
api_key=SecretStr(parameters["api_key"]) if parameters["api_key"] != "" else SecretStr("-"),
|
36
57
|
url=SecretStr(parameters["url"]),
|
58
|
+
no_auth=parameters["no_auth"],
|
59
|
+
),
|
60
|
+
db_case_config=WeaviateIndexConfig(
|
61
|
+
efConstruction=parameters["ef_construction"],
|
62
|
+
maxConnections=parameters["m"],
|
63
|
+
ef=parameters["ef"],
|
37
64
|
),
|
38
|
-
db_case_config=WeaviateIndexConfig(ef=256, efConstruction=256, maxConnections=16),
|
39
65
|
**parameters,
|
40
66
|
)
|
@@ -6,11 +6,13 @@ from ..api import DBCaseConfig, DBConfig, MetricType
|
|
6
6
|
class WeaviateConfig(DBConfig):
|
7
7
|
url: SecretStr
|
8
8
|
api_key: SecretStr
|
9
|
+
no_auth: bool | None = False
|
9
10
|
|
10
11
|
def to_dict(self) -> dict:
|
11
12
|
return {
|
12
13
|
"url": self.url.get_secret_value(),
|
13
14
|
"auth_client_secret": self.api_key.get_secret_value(),
|
15
|
+
"no_auth": self.no_auth,
|
14
16
|
}
|
15
17
|
|
16
18
|
|
@@ -38,6 +38,11 @@ class WeaviateCloud(VectorDB):
|
|
38
38
|
self._vector_field = "vector"
|
39
39
|
self._index_name = "vector_idx"
|
40
40
|
|
41
|
+
# If local setup is used, we
|
42
|
+
if db_config["no_auth"]:
|
43
|
+
del db_config["auth_client_secret"]
|
44
|
+
del db_config["no_auth"]
|
45
|
+
|
41
46
|
from weaviate import Client
|
42
47
|
|
43
48
|
client = Client(**db_config)
|
@@ -5,10 +5,12 @@ import random
|
|
5
5
|
import time
|
6
6
|
import traceback
|
7
7
|
from collections.abc import Iterable
|
8
|
+
from multiprocessing.queues import Queue
|
8
9
|
|
9
10
|
import numpy as np
|
10
11
|
|
11
12
|
from ... import config
|
13
|
+
from ...models import ConcurrencySlotTimeoutError
|
12
14
|
from ..clients import api
|
13
15
|
|
14
16
|
NUM_PER_BATCH = config.NUM_PER_BATCH
|
@@ -28,16 +30,18 @@ class MultiProcessingSearchRunner:
|
|
28
30
|
self,
|
29
31
|
db: api.VectorDB,
|
30
32
|
test_data: list[list[float]],
|
31
|
-
k: int =
|
33
|
+
k: int = config.K_DEFAULT,
|
32
34
|
filters: dict | None = None,
|
33
35
|
concurrencies: Iterable[int] = config.NUM_CONCURRENCY,
|
34
|
-
duration: int =
|
36
|
+
duration: int = config.CONCURRENCY_DURATION,
|
37
|
+
concurrency_timeout: int = config.CONCURRENCY_TIMEOUT,
|
35
38
|
):
|
36
39
|
self.db = db
|
37
40
|
self.k = k
|
38
41
|
self.filters = filters
|
39
42
|
self.concurrencies = concurrencies
|
40
43
|
self.duration = duration
|
44
|
+
self.concurrency_timeout = concurrency_timeout
|
41
45
|
|
42
46
|
self.test_data = test_data
|
43
47
|
log.debug(f"test dataset columns: {len(test_data)}")
|
@@ -114,9 +118,7 @@ class MultiProcessingSearchRunner:
|
|
114
118
|
log.info(f"Start search {self.duration}s in concurrency {conc}, filters: {self.filters}")
|
115
119
|
future_iter = [executor.submit(self.search, self.test_data, q, cond) for i in range(conc)]
|
116
120
|
# Sync all processes
|
117
|
-
|
118
|
-
sleep_t = conc if conc < 10 else 10
|
119
|
-
time.sleep(sleep_t)
|
121
|
+
self._wait_for_queue_fill(q, size=conc)
|
120
122
|
|
121
123
|
with cond:
|
122
124
|
cond.notify_all()
|
@@ -160,6 +162,15 @@ class MultiProcessingSearchRunner:
|
|
160
162
|
conc_latency_avg_list,
|
161
163
|
)
|
162
164
|
|
165
|
+
def _wait_for_queue_fill(self, q: Queue, size: int):
|
166
|
+
wait_t = 0
|
167
|
+
while q.qsize() < size:
|
168
|
+
sleep_t = size if size < 10 else 10
|
169
|
+
wait_t += sleep_t
|
170
|
+
if wait_t > self.concurrency_timeout > 0:
|
171
|
+
raise ConcurrencySlotTimeoutError
|
172
|
+
time.sleep(sleep_t)
|
173
|
+
|
163
174
|
def run(self) -> float:
|
164
175
|
"""
|
165
176
|
Returns:
|
@@ -275,6 +275,7 @@ class CaseRunner(BaseModel):
|
|
275
275
|
filters=self.ca.filters,
|
276
276
|
concurrencies=self.config.case_config.concurrency_search_config.num_concurrency,
|
277
277
|
duration=self.config.case_config.concurrency_search_config.concurrency_duration,
|
278
|
+
concurrency_timeout=self.config.case_config.concurrency_search_config.concurrency_timeout,
|
278
279
|
k=self.config.case_config.k,
|
279
280
|
)
|
280
281
|
|
@@ -0,0 +1,121 @@
|
|
1
|
+
import logging
|
2
|
+
import time
|
3
|
+
from collections.abc import MutableMapping
|
4
|
+
from concurrent.futures import wait
|
5
|
+
from pathlib import Path
|
6
|
+
from typing import Annotated, Any, TypedDict
|
7
|
+
|
8
|
+
import click
|
9
|
+
from click.testing import CliRunner
|
10
|
+
from yaml import Loader, load
|
11
|
+
|
12
|
+
from .. import config
|
13
|
+
from ..cli.cli import (
|
14
|
+
cli,
|
15
|
+
click_parameter_decorators_from_typed_dict,
|
16
|
+
)
|
17
|
+
|
18
|
+
log = logging.getLogger(__name__)
|
19
|
+
|
20
|
+
|
21
|
+
def click_get_defaults_from_file(ctx, param, value): # noqa: ANN001, ARG001
|
22
|
+
if not value:
|
23
|
+
raise click.MissingParameter
|
24
|
+
path = Path(value)
|
25
|
+
input_file = path if path.exists() else Path(config.CONFIG_LOCAL_DIR, path)
|
26
|
+
try:
|
27
|
+
with input_file.open() as f:
|
28
|
+
_config: dict[str, list[dict[str, Any]]] = load(f.read(), Loader=Loader) # noqa: S506
|
29
|
+
ctx.default_map = _config
|
30
|
+
except Exception as e:
|
31
|
+
msg = f"Failed to load batch config file: {e}"
|
32
|
+
raise click.BadParameter(msg) from e
|
33
|
+
return value
|
34
|
+
|
35
|
+
|
36
|
+
class BatchCliTypedDict(TypedDict):
|
37
|
+
batch_config_file: Annotated[
|
38
|
+
bool,
|
39
|
+
click.option(
|
40
|
+
"--batch-config-file",
|
41
|
+
type=click.Path(),
|
42
|
+
callback=click_get_defaults_from_file,
|
43
|
+
is_eager=True,
|
44
|
+
expose_value=False,
|
45
|
+
help="Read batch configuration from yaml file",
|
46
|
+
),
|
47
|
+
]
|
48
|
+
|
49
|
+
|
50
|
+
def build_sub_cmd_args(batch_config: MutableMapping[str, Any] | None):
|
51
|
+
bool_options = {
|
52
|
+
"drop_old": True,
|
53
|
+
"load": True,
|
54
|
+
"search_serial": True,
|
55
|
+
"search_concurrent": True,
|
56
|
+
"dry_run": False,
|
57
|
+
"custom_dataset_use_shuffled": True,
|
58
|
+
"custom_dataset_with_gt": True,
|
59
|
+
}
|
60
|
+
|
61
|
+
def format_option(key: str, value: Any):
|
62
|
+
opt_name = key.replace("_", "-")
|
63
|
+
|
64
|
+
if key in bool_options:
|
65
|
+
return format_bool_option(opt_name, value, skip=False)
|
66
|
+
|
67
|
+
if key.startswith("skip_"):
|
68
|
+
raw_key = key[5:]
|
69
|
+
raw_opt = raw_key.replace("_", "-")
|
70
|
+
return format_bool_option(raw_opt, value, skip=True, raw_key=raw_key)
|
71
|
+
|
72
|
+
return [f"--{opt_name}", str(value)]
|
73
|
+
|
74
|
+
def format_bool_option(opt_name: str, value: Any, skip: bool = False, raw_key: str | None = None):
|
75
|
+
if isinstance(value, bool):
|
76
|
+
if skip:
|
77
|
+
if bool_options.get(raw_key, False):
|
78
|
+
return [f"--skip-{opt_name}"] if value else [f"--{opt_name}"]
|
79
|
+
return [f"--{opt_name}", str(value)]
|
80
|
+
if value:
|
81
|
+
return [f"--{opt_name}"]
|
82
|
+
if bool_options.get(opt_name.replace("-", "_"), False):
|
83
|
+
return [f"--skip-{opt_name}"]
|
84
|
+
return []
|
85
|
+
return [f"--{opt_name}", str(value)]
|
86
|
+
|
87
|
+
args_arr = []
|
88
|
+
for sub_cmd_key, sub_cmd_config_list in batch_config.items():
|
89
|
+
for sub_cmd_args in sub_cmd_config_list:
|
90
|
+
args = [sub_cmd_key]
|
91
|
+
for k, v in sub_cmd_args.items():
|
92
|
+
args.extend(format_option(k, v))
|
93
|
+
args_arr.append(args)
|
94
|
+
|
95
|
+
return args_arr
|
96
|
+
|
97
|
+
|
98
|
+
@cli.command()
|
99
|
+
@click_parameter_decorators_from_typed_dict(BatchCliTypedDict)
|
100
|
+
def BatchCli():
|
101
|
+
ctx = click.get_current_context()
|
102
|
+
batch_config = ctx.default_map
|
103
|
+
|
104
|
+
runner = CliRunner()
|
105
|
+
|
106
|
+
args_arr = build_sub_cmd_args(batch_config)
|
107
|
+
|
108
|
+
for args in args_arr:
|
109
|
+
log.info(f"got batch config: {' '.join(args)}")
|
110
|
+
|
111
|
+
for args in args_arr:
|
112
|
+
result = runner.invoke(cli, args)
|
113
|
+
time.sleep(5)
|
114
|
+
|
115
|
+
from ..interface import global_result_future
|
116
|
+
|
117
|
+
if global_result_future:
|
118
|
+
wait([global_result_future])
|
119
|
+
|
120
|
+
if result.exception:
|
121
|
+
log.exception(f"failed to run sub command: {args[0]}", exc_info=result.exception)
|
vectordb_bench/cli/cli.py
CHANGED
@@ -17,10 +17,9 @@ from typing import (
|
|
17
17
|
import click
|
18
18
|
from yaml import load
|
19
19
|
|
20
|
-
from vectordb_bench.backend.clients.api import MetricType
|
21
|
-
|
22
20
|
from .. import config
|
23
21
|
from ..backend.clients import DB
|
22
|
+
from ..backend.clients.api import MetricType
|
24
23
|
from ..interface import benchmark_runner, global_result_future
|
25
24
|
from ..models import (
|
26
25
|
CaseConfig,
|
@@ -303,6 +302,17 @@ class CommonTypedDict(TypedDict):
|
|
303
302
|
callback=lambda *args: list(map(int, click_arg_split(*args))),
|
304
303
|
),
|
305
304
|
]
|
305
|
+
concurrency_timeout: Annotated[
|
306
|
+
int,
|
307
|
+
click.option(
|
308
|
+
"--concurrency-timeout",
|
309
|
+
type=int,
|
310
|
+
default=config.CONCURRENCY_TIMEOUT,
|
311
|
+
show_default=True,
|
312
|
+
help="Timeout (in seconds) to wait for a concurrency slot before failing. "
|
313
|
+
"Set to a negative value to wait indefinitely.",
|
314
|
+
),
|
315
|
+
]
|
306
316
|
custom_case_name: Annotated[
|
307
317
|
str,
|
308
318
|
click.option(
|
@@ -490,6 +500,7 @@ def run(
|
|
490
500
|
concurrency_search_config=ConcurrencySearchConfig(
|
491
501
|
concurrency_duration=parameters["concurrency_duration"],
|
492
502
|
num_concurrency=[int(s) for s in parameters["num_concurrency"]],
|
503
|
+
concurrency_timeout=parameters["concurrency_timeout"],
|
493
504
|
),
|
494
505
|
custom_case=get_custom_case_config(parameters),
|
495
506
|
),
|
@@ -9,12 +9,15 @@ from ..backend.clients.pgdiskann.cli import PgDiskAnn
|
|
9
9
|
from ..backend.clients.pgvecto_rs.cli import PgVectoRSHNSW, PgVectoRSIVFFlat
|
10
10
|
from ..backend.clients.pgvector.cli import PgVectorHNSW
|
11
11
|
from ..backend.clients.pgvectorscale.cli import PgVectorScaleDiskAnn
|
12
|
+
from ..backend.clients.qdrant_cloud.cli import QdrantCloud
|
13
|
+
from ..backend.clients.qdrant_local.cli import QdrantLocal
|
12
14
|
from ..backend.clients.redis.cli import Redis
|
13
15
|
from ..backend.clients.test.cli import Test
|
14
16
|
from ..backend.clients.tidb.cli import TiDB
|
15
17
|
from ..backend.clients.vespa.cli import Vespa
|
16
18
|
from ..backend.clients.weaviate_cloud.cli import Weaviate
|
17
19
|
from ..backend.clients.zilliz_cloud.cli import ZillizAutoIndex
|
20
|
+
from .batch_cli import BatchCli
|
18
21
|
from .cli import cli
|
19
22
|
|
20
23
|
cli.add_command(PgVectorHNSW)
|
@@ -35,6 +38,9 @@ cli.add_command(TiDB)
|
|
35
38
|
cli.add_command(Clickhouse)
|
36
39
|
cli.add_command(Vespa)
|
37
40
|
cli.add_command(LanceDB)
|
41
|
+
cli.add_command(QdrantCloud)
|
42
|
+
cli.add_command(QdrantLocal)
|
43
|
+
cli.add_command(BatchCli)
|
38
44
|
|
39
45
|
|
40
46
|
if __name__ == "__main__":
|
@@ -0,0 +1,17 @@
|
|
1
|
+
pgvectorhnsw:
|
2
|
+
- db_label: pgConfigTest
|
3
|
+
user_name: vectordbbench
|
4
|
+
db_name: vectordbbench
|
5
|
+
host: localhost
|
6
|
+
m: 16
|
7
|
+
ef_construction: 128
|
8
|
+
ef_search: 128
|
9
|
+
milvushnsw:
|
10
|
+
- skip_search_serial: True
|
11
|
+
case_type: Performance1536D50K
|
12
|
+
uri: http://localhost:19530
|
13
|
+
m: 16
|
14
|
+
ef_construction: 128
|
15
|
+
ef_search: 128
|
16
|
+
drop_old: False
|
17
|
+
load: False
|
@@ -36,21 +36,27 @@ def dbConfigSettingItem(st, activeDb: DB):
|
|
36
36
|
columns = st.columns(DB_CONFIG_SETTING_COLUMNS)
|
37
37
|
|
38
38
|
dbConfigClass = activeDb.config_cls
|
39
|
-
|
39
|
+
schema = dbConfigClass.schema()
|
40
|
+
property_items = schema.get("properties").items()
|
41
|
+
required_fields = set(schema.get("required", []))
|
40
42
|
dbConfig = {}
|
41
43
|
idx = 0
|
42
44
|
|
43
45
|
# db config (unique)
|
44
|
-
for key, property in
|
46
|
+
for key, property in property_items:
|
45
47
|
if key not in dbConfigClass.common_short_configs() and key not in dbConfigClass.common_long_configs():
|
46
48
|
column = columns[idx % DB_CONFIG_SETTING_COLUMNS]
|
47
49
|
idx += 1
|
48
|
-
|
50
|
+
input_value = column.text_input(
|
49
51
|
key,
|
50
|
-
key="
|
52
|
+
key=f"{activeDb.name}-{key}",
|
51
53
|
value=property.get("default", ""),
|
52
54
|
type="password" if inputIsPassword(key) else "default",
|
55
|
+
placeholder="optional" if key not in required_fields else None,
|
53
56
|
)
|
57
|
+
if key in required_fields or input_value:
|
58
|
+
dbConfig[key] = input_value
|
59
|
+
|
54
60
|
# db config (common short labels)
|
55
61
|
for key in dbConfigClass.common_short_configs():
|
56
62
|
column = columns[idx % DB_CONFIG_SETTING_COLUMNS]
|