vectordb-bench 0.0.27__py3-none-any.whl → 0.0.29__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vectordb_bench/__init__.py +3 -1
- vectordb_bench/backend/clients/__init__.py +18 -2
- vectordb_bench/backend/clients/api.py +1 -0
- vectordb_bench/backend/clients/clickhouse/cli.py +1 -0
- vectordb_bench/backend/clients/clickhouse/clickhouse.py +3 -3
- vectordb_bench/backend/clients/clickhouse/config.py +2 -2
- vectordb_bench/backend/clients/lancedb/cli.py +92 -0
- vectordb_bench/backend/clients/lancedb/config.py +103 -0
- vectordb_bench/backend/clients/lancedb/lancedb.py +97 -0
- vectordb_bench/backend/clients/qdrant_cloud/cli.py +43 -0
- vectordb_bench/backend/clients/qdrant_cloud/config.py +4 -4
- vectordb_bench/backend/runner/mp_runner.py +16 -5
- vectordb_bench/backend/task_runner.py +1 -0
- vectordb_bench/cli/cli.py +16 -3
- vectordb_bench/cli/vectordbbench.py +4 -0
- vectordb_bench/frontend/components/run_test/dbConfigSetting.py +10 -4
- vectordb_bench/frontend/config/dbCaseConfigs.py +125 -0
- vectordb_bench/frontend/config/styles.py +1 -0
- vectordb_bench/models.py +21 -5
- {vectordb_bench-0.0.27.dist-info → vectordb_bench-0.0.29.dist-info}/METADATA +16 -8
- {vectordb_bench-0.0.27.dist-info → vectordb_bench-0.0.29.dist-info}/RECORD +25 -21
- {vectordb_bench-0.0.27.dist-info → vectordb_bench-0.0.29.dist-info}/WHEEL +1 -1
- {vectordb_bench-0.0.27.dist-info → vectordb_bench-0.0.29.dist-info}/entry_points.txt +0 -0
- {vectordb_bench-0.0.27.dist-info → vectordb_bench-0.0.29.dist-info}/licenses/LICENSE +0 -0
- {vectordb_bench-0.0.27.dist-info → vectordb_bench-0.0.29.dist-info}/top_level.txt +0 -0
vectordb_bench/__init__.py
CHANGED
@@ -6,7 +6,7 @@ import environs
|
|
6
6
|
from . import log_util
|
7
7
|
|
8
8
|
env = environs.Env()
|
9
|
-
env.read_env(".env", False)
|
9
|
+
env.read_env(path=".env", recurse=False)
|
10
10
|
|
11
11
|
|
12
12
|
class config:
|
@@ -52,6 +52,8 @@ class config:
|
|
52
52
|
|
53
53
|
CONCURRENCY_DURATION = 30
|
54
54
|
|
55
|
+
CONCURRENCY_TIMEOUT = 3600
|
56
|
+
|
55
57
|
RESULTS_LOCAL_DIR = env.path(
|
56
58
|
"RESULTS_LOCAL_DIR",
|
57
59
|
pathlib.Path(__file__).parent.joinpath("results"),
|
@@ -45,9 +45,10 @@ class DB(Enum):
|
|
45
45
|
TiDB = "TiDB"
|
46
46
|
Clickhouse = "Clickhouse"
|
47
47
|
Vespa = "Vespa"
|
48
|
+
LanceDB = "LanceDB"
|
48
49
|
|
49
50
|
@property
|
50
|
-
def init_cls(self) -> type[VectorDB]: # noqa: PLR0911, PLR0912, C901
|
51
|
+
def init_cls(self) -> type[VectorDB]: # noqa: PLR0911, PLR0912, C901, PLR0915
|
51
52
|
"""Import while in use"""
|
52
53
|
if self == DB.Milvus:
|
53
54
|
from .milvus.milvus import Milvus
|
@@ -164,11 +165,16 @@ class DB(Enum):
|
|
164
165
|
|
165
166
|
return Vespa
|
166
167
|
|
168
|
+
if self == DB.LanceDB:
|
169
|
+
from .lancedb.lancedb import LanceDB
|
170
|
+
|
171
|
+
return LanceDB
|
172
|
+
|
167
173
|
msg = f"Unknown DB: {self.name}"
|
168
174
|
raise ValueError(msg)
|
169
175
|
|
170
176
|
@property
|
171
|
-
def config_cls(self) -> type[DBConfig]: # noqa: PLR0911, PLR0912, C901
|
177
|
+
def config_cls(self) -> type[DBConfig]: # noqa: PLR0911, PLR0912, C901, PLR0915
|
172
178
|
"""Import while in use"""
|
173
179
|
if self == DB.Milvus:
|
174
180
|
from .milvus.config import MilvusConfig
|
@@ -285,6 +291,11 @@ class DB(Enum):
|
|
285
291
|
|
286
292
|
return VespaConfig
|
287
293
|
|
294
|
+
if self == DB.LanceDB:
|
295
|
+
from .lancedb.config import LanceDBConfig
|
296
|
+
|
297
|
+
return LanceDBConfig
|
298
|
+
|
288
299
|
msg = f"Unknown DB: {self.name}"
|
289
300
|
raise ValueError(msg)
|
290
301
|
|
@@ -382,6 +393,11 @@ class DB(Enum):
|
|
382
393
|
|
383
394
|
return VespaHNSWConfig
|
384
395
|
|
396
|
+
if self == DB.LanceDB:
|
397
|
+
from .lancedb.config import _lancedb_case_config
|
398
|
+
|
399
|
+
return _lancedb_case_config.get(index_type)
|
400
|
+
|
385
401
|
# DB.Pinecone, DB.Chroma, DB.Redis
|
386
402
|
return EmptyDBCaseConfig
|
387
403
|
|
@@ -51,6 +51,7 @@ def Clickhouse(**parameters: Unpack[ClickhouseHNSWTypedDict]):
|
|
51
51
|
db=DB.Clickhouse,
|
52
52
|
db_config=ClickhouseConfig(
|
53
53
|
db_label=parameters["db_label"],
|
54
|
+
user=parameters["user"],
|
54
55
|
password=SecretStr(parameters["password"]) if parameters["password"] else None,
|
55
56
|
host=parameters["host"],
|
56
57
|
port=parameters["port"],
|
@@ -106,7 +106,7 @@ class Clickhouse(VectorDB):
|
|
106
106
|
query = f"""
|
107
107
|
ALTER TABLE {self.db_config["database"]}.{self.table_name}
|
108
108
|
ADD INDEX {self._index_name} {self._vector_field}
|
109
|
-
TYPE vector_similarity('hnsw', '{self.index_param["metric_type"]}',
|
109
|
+
TYPE vector_similarity('hnsw', '{self.index_param["metric_type"]}',{self.dim},
|
110
110
|
'{self.index_param["quantization"]}',
|
111
111
|
{self.index_param["params"]["M"]}, {self.index_param["params"]["efConstruction"]})
|
112
112
|
GRANULARITY {self.index_param["granularity"]}
|
@@ -115,7 +115,7 @@ class Clickhouse(VectorDB):
|
|
115
115
|
query = f"""
|
116
116
|
ALTER TABLE {self.db_config["database"]}.{self.table_name}
|
117
117
|
ADD INDEX {self._index_name} {self._vector_field}
|
118
|
-
TYPE vector_similarity('hnsw', '{self.index_param["metric_type"]}')
|
118
|
+
TYPE vector_similarity('hnsw', '{self.index_param["metric_type"]}', {self.dim})
|
119
119
|
GRANULARITY {self.index_param["granularity"]}
|
120
120
|
"""
|
121
121
|
self.conn.command(cmd=query)
|
@@ -186,7 +186,7 @@ class Clickhouse(VectorDB):
|
|
186
186
|
"vector_field": self._vector_field,
|
187
187
|
"schema": self.db_config["database"],
|
188
188
|
"table": self.table_name,
|
189
|
-
"gt": filters.get("id"),
|
189
|
+
"gt": 0 if filters is None else filters.get("id", 0),
|
190
190
|
"k": k,
|
191
191
|
"metric_type": self.search_param["metric_type"],
|
192
192
|
"query": query,
|
@@ -16,7 +16,7 @@ class ClickhouseConfigDict(TypedDict):
|
|
16
16
|
|
17
17
|
|
18
18
|
class ClickhouseConfig(DBConfig):
|
19
|
-
|
19
|
+
user: str = "clickhouse"
|
20
20
|
password: SecretStr
|
21
21
|
host: str = "localhost"
|
22
22
|
port: int = 8123
|
@@ -29,7 +29,7 @@ class ClickhouseConfig(DBConfig):
|
|
29
29
|
"host": self.host,
|
30
30
|
"port": self.port,
|
31
31
|
"database": self.db_name,
|
32
|
-
"user": self.
|
32
|
+
"user": self.user,
|
33
33
|
"password": pwd_str,
|
34
34
|
"secure": self.secure,
|
35
35
|
}
|
@@ -0,0 +1,92 @@
|
|
1
|
+
from typing import Annotated, Unpack
|
2
|
+
|
3
|
+
import click
|
4
|
+
from pydantic import SecretStr
|
5
|
+
|
6
|
+
from ....cli.cli import (
|
7
|
+
CommonTypedDict,
|
8
|
+
cli,
|
9
|
+
click_parameter_decorators_from_typed_dict,
|
10
|
+
run,
|
11
|
+
)
|
12
|
+
from .. import DB
|
13
|
+
from ..api import IndexType
|
14
|
+
|
15
|
+
|
16
|
+
class LanceDBTypedDict(CommonTypedDict):
|
17
|
+
uri: Annotated[
|
18
|
+
str,
|
19
|
+
click.option("--uri", type=str, help="URI connection string", required=True),
|
20
|
+
]
|
21
|
+
token: Annotated[
|
22
|
+
str | None,
|
23
|
+
click.option("--token", type=str, help="Authentication token", required=False),
|
24
|
+
]
|
25
|
+
|
26
|
+
|
27
|
+
@cli.command()
|
28
|
+
@click_parameter_decorators_from_typed_dict(LanceDBTypedDict)
|
29
|
+
def LanceDB(**parameters: Unpack[LanceDBTypedDict]):
|
30
|
+
from .config import LanceDBConfig, _lancedb_case_config
|
31
|
+
|
32
|
+
run(
|
33
|
+
db=DB.LanceDB,
|
34
|
+
db_config=LanceDBConfig(
|
35
|
+
db_label=parameters["db_label"],
|
36
|
+
uri=parameters["uri"],
|
37
|
+
token=SecretStr(parameters["token"]) if parameters.get("token") else None,
|
38
|
+
),
|
39
|
+
db_case_config=_lancedb_case_config.get("NONE")(),
|
40
|
+
**parameters,
|
41
|
+
)
|
42
|
+
|
43
|
+
|
44
|
+
@cli.command()
|
45
|
+
@click_parameter_decorators_from_typed_dict(LanceDBTypedDict)
|
46
|
+
def LanceDBAutoIndex(**parameters: Unpack[LanceDBTypedDict]):
|
47
|
+
from .config import LanceDBConfig, _lancedb_case_config
|
48
|
+
|
49
|
+
run(
|
50
|
+
db=DB.LanceDB,
|
51
|
+
db_config=LanceDBConfig(
|
52
|
+
db_label=parameters["db_label"],
|
53
|
+
uri=parameters["uri"],
|
54
|
+
token=SecretStr(parameters["token"]) if parameters.get("token") else None,
|
55
|
+
),
|
56
|
+
db_case_config=_lancedb_case_config.get(IndexType.AUTOINDEX)(),
|
57
|
+
**parameters,
|
58
|
+
)
|
59
|
+
|
60
|
+
|
61
|
+
@cli.command()
|
62
|
+
@click_parameter_decorators_from_typed_dict(LanceDBTypedDict)
|
63
|
+
def LanceDBIVFPQ(**parameters: Unpack[LanceDBTypedDict]):
|
64
|
+
from .config import LanceDBConfig, _lancedb_case_config
|
65
|
+
|
66
|
+
run(
|
67
|
+
db=DB.LanceDB,
|
68
|
+
db_config=LanceDBConfig(
|
69
|
+
db_label=parameters["db_label"],
|
70
|
+
uri=parameters["uri"],
|
71
|
+
token=SecretStr(parameters["token"]) if parameters.get("token") else None,
|
72
|
+
),
|
73
|
+
db_case_config=_lancedb_case_config.get(IndexType.IVFPQ)(),
|
74
|
+
**parameters,
|
75
|
+
)
|
76
|
+
|
77
|
+
|
78
|
+
@cli.command()
|
79
|
+
@click_parameter_decorators_from_typed_dict(LanceDBTypedDict)
|
80
|
+
def LanceDBHNSW(**parameters: Unpack[LanceDBTypedDict]):
|
81
|
+
from .config import LanceDBConfig, _lancedb_case_config
|
82
|
+
|
83
|
+
run(
|
84
|
+
db=DB.LanceDB,
|
85
|
+
db_config=LanceDBConfig(
|
86
|
+
db_label=parameters["db_label"],
|
87
|
+
uri=parameters["uri"],
|
88
|
+
token=SecretStr(parameters["token"]) if parameters.get("token") else None,
|
89
|
+
),
|
90
|
+
db_case_config=_lancedb_case_config.get(IndexType.HNSW)(),
|
91
|
+
**parameters,
|
92
|
+
)
|
@@ -0,0 +1,103 @@
|
|
1
|
+
from pydantic import BaseModel, SecretStr
|
2
|
+
|
3
|
+
from ..api import DBCaseConfig, DBConfig, IndexType, MetricType
|
4
|
+
|
5
|
+
|
6
|
+
class LanceDBConfig(DBConfig):
|
7
|
+
"""LanceDB connection configuration."""
|
8
|
+
|
9
|
+
db_label: str
|
10
|
+
uri: str
|
11
|
+
token: SecretStr | None = None
|
12
|
+
|
13
|
+
def to_dict(self) -> dict:
|
14
|
+
return {
|
15
|
+
"uri": self.uri,
|
16
|
+
"token": self.token.get_secret_value() if self.token else None,
|
17
|
+
}
|
18
|
+
|
19
|
+
|
20
|
+
class LanceDBIndexConfig(BaseModel, DBCaseConfig):
|
21
|
+
index: IndexType = IndexType.IVFPQ
|
22
|
+
metric_type: MetricType = MetricType.L2
|
23
|
+
num_partitions: int = 0
|
24
|
+
num_sub_vectors: int = 0
|
25
|
+
nbits: int = 8 # Must be 4 or 8
|
26
|
+
sample_rate: int = 256
|
27
|
+
max_iterations: int = 50
|
28
|
+
|
29
|
+
def index_param(self) -> dict:
|
30
|
+
if self.index not in [
|
31
|
+
IndexType.IVFPQ,
|
32
|
+
IndexType.HNSW,
|
33
|
+
IndexType.AUTOINDEX,
|
34
|
+
IndexType.NONE,
|
35
|
+
]:
|
36
|
+
msg = f"Index type {self.index} is not supported for LanceDB!"
|
37
|
+
raise ValueError(msg)
|
38
|
+
|
39
|
+
# See https://lancedb.github.io/lancedb/python/python/#lancedb.table.Table.create_index
|
40
|
+
params = {
|
41
|
+
"metric": self.parse_metric(),
|
42
|
+
"num_bits": self.nbits,
|
43
|
+
"sample_rate": self.sample_rate,
|
44
|
+
"max_iterations": self.max_iterations,
|
45
|
+
}
|
46
|
+
|
47
|
+
if self.num_partitions > 0:
|
48
|
+
params["num_partitions"] = self.num_partitions
|
49
|
+
if self.num_sub_vectors > 0:
|
50
|
+
params["num_sub_vectors"] = self.num_sub_vectors
|
51
|
+
|
52
|
+
return params
|
53
|
+
|
54
|
+
def search_param(self) -> dict:
|
55
|
+
pass
|
56
|
+
|
57
|
+
def parse_metric(self) -> str:
|
58
|
+
if self.metric_type in [MetricType.L2, MetricType.COSINE]:
|
59
|
+
return self.metric_type.value.lower()
|
60
|
+
if self.metric_type in [MetricType.IP, MetricType.DP]:
|
61
|
+
return "dot"
|
62
|
+
msg = f"Metric type {self.metric_type} is not supported for LanceDB!"
|
63
|
+
raise ValueError(msg)
|
64
|
+
|
65
|
+
|
66
|
+
class LanceDBNoIndexConfig(LanceDBIndexConfig):
|
67
|
+
index: IndexType = IndexType.NONE
|
68
|
+
|
69
|
+
def index_param(self) -> dict:
|
70
|
+
return {}
|
71
|
+
|
72
|
+
|
73
|
+
class LanceDBAutoIndexConfig(LanceDBIndexConfig):
|
74
|
+
index: IndexType = IndexType.AUTOINDEX
|
75
|
+
|
76
|
+
def index_param(self) -> dict:
|
77
|
+
return {}
|
78
|
+
|
79
|
+
|
80
|
+
class LanceDBHNSWIndexConfig(LanceDBIndexConfig):
|
81
|
+
index: IndexType = IndexType.HNSW
|
82
|
+
m: int = 0
|
83
|
+
ef_construction: int = 0
|
84
|
+
|
85
|
+
def index_param(self) -> dict:
|
86
|
+
params = LanceDBIndexConfig.index_param(self)
|
87
|
+
|
88
|
+
# See https://lancedb.github.io/lancedb/python/python/#lancedb.index.HnswSq
|
89
|
+
params["index_type"] = "IVF_HNSW_SQ"
|
90
|
+
if self.m > 0:
|
91
|
+
params["m"] = self.m
|
92
|
+
if self.ef_construction > 0:
|
93
|
+
params["ef_construction"] = self.ef_construction
|
94
|
+
|
95
|
+
return params
|
96
|
+
|
97
|
+
|
98
|
+
_lancedb_case_config = {
|
99
|
+
IndexType.IVFPQ: LanceDBIndexConfig,
|
100
|
+
IndexType.AUTOINDEX: LanceDBAutoIndexConfig,
|
101
|
+
IndexType.HNSW: LanceDBHNSWIndexConfig,
|
102
|
+
IndexType.NONE: LanceDBNoIndexConfig,
|
103
|
+
}
|
@@ -0,0 +1,97 @@
|
|
1
|
+
import logging
|
2
|
+
from contextlib import contextmanager
|
3
|
+
|
4
|
+
import lancedb
|
5
|
+
import pyarrow as pa
|
6
|
+
from lancedb.pydantic import LanceModel
|
7
|
+
|
8
|
+
from ..api import IndexType, VectorDB
|
9
|
+
from .config import LanceDBConfig, LanceDBIndexConfig
|
10
|
+
|
11
|
+
log = logging.getLogger(__name__)
|
12
|
+
|
13
|
+
|
14
|
+
class VectorModel(LanceModel):
|
15
|
+
id: int
|
16
|
+
vector: list[float]
|
17
|
+
|
18
|
+
|
19
|
+
class LanceDB(VectorDB):
|
20
|
+
def __init__(
|
21
|
+
self,
|
22
|
+
dim: int,
|
23
|
+
db_config: LanceDBConfig,
|
24
|
+
db_case_config: LanceDBIndexConfig,
|
25
|
+
collection_name: str = "vector_bench_test",
|
26
|
+
drop_old: bool = False,
|
27
|
+
**kwargs,
|
28
|
+
):
|
29
|
+
self.name = "LanceDB"
|
30
|
+
self.db_config = db_config
|
31
|
+
self.case_config = db_case_config
|
32
|
+
self.table_name = collection_name
|
33
|
+
self.dim = dim
|
34
|
+
self.uri = db_config["uri"]
|
35
|
+
|
36
|
+
db = lancedb.connect(self.uri)
|
37
|
+
|
38
|
+
if drop_old:
|
39
|
+
try:
|
40
|
+
db.drop_table(self.table_name)
|
41
|
+
except Exception as e:
|
42
|
+
log.warning(f"Failed to drop table {self.table_name}: {e}")
|
43
|
+
|
44
|
+
try:
|
45
|
+
db.open_table(self.table_name)
|
46
|
+
except Exception:
|
47
|
+
schema = pa.schema(
|
48
|
+
[pa.field("id", pa.int64()), pa.field("vector", pa.list_(pa.float64(), list_size=self.dim))]
|
49
|
+
)
|
50
|
+
db.create_table(self.table_name, schema=schema, mode="overwrite")
|
51
|
+
|
52
|
+
@contextmanager
|
53
|
+
def init(self):
|
54
|
+
self.db = lancedb.connect(self.uri)
|
55
|
+
self.table = self.db.open_table(self.table_name)
|
56
|
+
yield
|
57
|
+
self.db = None
|
58
|
+
self.table = None
|
59
|
+
|
60
|
+
def insert_embeddings(
|
61
|
+
self,
|
62
|
+
embeddings: list[list[float]],
|
63
|
+
metadata: list[int],
|
64
|
+
) -> tuple[int, Exception | None]:
|
65
|
+
try:
|
66
|
+
data = [{"id": meta, "vector": emb} for meta, emb in zip(metadata, embeddings, strict=False)]
|
67
|
+
self.table.add(data)
|
68
|
+
return len(metadata), None
|
69
|
+
except Exception as e:
|
70
|
+
log.warning(f"Failed to insert data into LanceDB table ({self.table_name}), error: {e}")
|
71
|
+
return 0, e
|
72
|
+
|
73
|
+
def search_embedding(
|
74
|
+
self,
|
75
|
+
query: list[float],
|
76
|
+
k: int = 100,
|
77
|
+
filters: dict | None = None,
|
78
|
+
) -> list[int]:
|
79
|
+
if filters:
|
80
|
+
results = (
|
81
|
+
self.table.search(query)
|
82
|
+
.select(["id"])
|
83
|
+
.where(f"id >= {filters['id']}", prefilter=True)
|
84
|
+
.limit(k)
|
85
|
+
.to_list()
|
86
|
+
)
|
87
|
+
else:
|
88
|
+
results = self.table.search(query).select(["id"]).limit(k).to_list()
|
89
|
+
return [int(result["id"]) for result in results]
|
90
|
+
|
91
|
+
def optimize(self, data_size: int | None = None):
|
92
|
+
if self.table and hasattr(self, "case_config") and self.case_config.index != IndexType.NONE:
|
93
|
+
log.info(f"Creating index for LanceDB table ({self.table_name})")
|
94
|
+
self.table.create_index(**self.case_config.index_param())
|
95
|
+
# Better recall with IVF_PQ (though still bad) but breaks HNSW: https://github.com/lancedb/lancedb/issues/2369
|
96
|
+
if self.case_config.index in (IndexType.IVFPQ, IndexType.AUTOINDEX):
|
97
|
+
self.table.optimize()
|
@@ -0,0 +1,43 @@
|
|
1
|
+
from typing import Annotated, Unpack
|
2
|
+
|
3
|
+
import click
|
4
|
+
from pydantic import SecretStr
|
5
|
+
|
6
|
+
from ....cli.cli import (
|
7
|
+
CommonTypedDict,
|
8
|
+
cli,
|
9
|
+
click_parameter_decorators_from_typed_dict,
|
10
|
+
run,
|
11
|
+
)
|
12
|
+
from .. import DB
|
13
|
+
|
14
|
+
|
15
|
+
class QdrantTypedDict(CommonTypedDict):
|
16
|
+
url: Annotated[
|
17
|
+
str,
|
18
|
+
click.option("--url", type=str, help="URL connection string", required=True),
|
19
|
+
]
|
20
|
+
api_key: Annotated[
|
21
|
+
str | None,
|
22
|
+
click.option("--api-key", type=str, help="API key for authentication", required=False),
|
23
|
+
]
|
24
|
+
|
25
|
+
|
26
|
+
@cli.command()
|
27
|
+
@click_parameter_decorators_from_typed_dict(QdrantTypedDict)
|
28
|
+
def QdrantCloud(**parameters: Unpack[QdrantTypedDict]):
|
29
|
+
from .config import QdrantConfig, QdrantIndexConfig
|
30
|
+
|
31
|
+
config_params = {
|
32
|
+
"db_label": parameters["db_label"],
|
33
|
+
"url": SecretStr(parameters["url"]),
|
34
|
+
}
|
35
|
+
|
36
|
+
config_params["api_key"] = SecretStr(parameters["api_key"]) if parameters["api_key"] else None
|
37
|
+
|
38
|
+
run(
|
39
|
+
db=DB.QdrantCloud,
|
40
|
+
db_config=QdrantConfig(**config_params),
|
41
|
+
db_case_config=QdrantIndexConfig(),
|
42
|
+
**parameters,
|
43
|
+
)
|
@@ -6,14 +6,14 @@ from ..api import DBCaseConfig, DBConfig, MetricType
|
|
6
6
|
# Allowing `api_key` to be left empty, to ensure compatibility with the open-source Qdrant.
|
7
7
|
class QdrantConfig(DBConfig):
|
8
8
|
url: SecretStr
|
9
|
-
api_key: SecretStr
|
9
|
+
api_key: SecretStr | None = None
|
10
10
|
|
11
11
|
def to_dict(self) -> dict:
|
12
|
-
|
13
|
-
if
|
12
|
+
api_key_value = self.api_key.get_secret_value() if self.api_key else None
|
13
|
+
if api_key_value:
|
14
14
|
return {
|
15
15
|
"url": self.url.get_secret_value(),
|
16
|
-
"api_key":
|
16
|
+
"api_key": api_key_value,
|
17
17
|
"prefer_grpc": True,
|
18
18
|
}
|
19
19
|
return {
|
@@ -5,10 +5,12 @@ import random
|
|
5
5
|
import time
|
6
6
|
import traceback
|
7
7
|
from collections.abc import Iterable
|
8
|
+
from multiprocessing.queues import Queue
|
8
9
|
|
9
10
|
import numpy as np
|
10
11
|
|
11
12
|
from ... import config
|
13
|
+
from ...models import ConcurrencySlotTimeoutError
|
12
14
|
from ..clients import api
|
13
15
|
|
14
16
|
NUM_PER_BATCH = config.NUM_PER_BATCH
|
@@ -28,16 +30,18 @@ class MultiProcessingSearchRunner:
|
|
28
30
|
self,
|
29
31
|
db: api.VectorDB,
|
30
32
|
test_data: list[list[float]],
|
31
|
-
k: int =
|
33
|
+
k: int = config.K_DEFAULT,
|
32
34
|
filters: dict | None = None,
|
33
35
|
concurrencies: Iterable[int] = config.NUM_CONCURRENCY,
|
34
|
-
duration: int =
|
36
|
+
duration: int = config.CONCURRENCY_DURATION,
|
37
|
+
concurrency_timeout: int = config.CONCURRENCY_TIMEOUT,
|
35
38
|
):
|
36
39
|
self.db = db
|
37
40
|
self.k = k
|
38
41
|
self.filters = filters
|
39
42
|
self.concurrencies = concurrencies
|
40
43
|
self.duration = duration
|
44
|
+
self.concurrency_timeout = concurrency_timeout
|
41
45
|
|
42
46
|
self.test_data = test_data
|
43
47
|
log.debug(f"test dataset columns: {len(test_data)}")
|
@@ -114,9 +118,7 @@ class MultiProcessingSearchRunner:
|
|
114
118
|
log.info(f"Start search {self.duration}s in concurrency {conc}, filters: {self.filters}")
|
115
119
|
future_iter = [executor.submit(self.search, self.test_data, q, cond) for i in range(conc)]
|
116
120
|
# Sync all processes
|
117
|
-
|
118
|
-
sleep_t = conc if conc < 10 else 10
|
119
|
-
time.sleep(sleep_t)
|
121
|
+
self._wait_for_queue_fill(q, size=conc)
|
120
122
|
|
121
123
|
with cond:
|
122
124
|
cond.notify_all()
|
@@ -160,6 +162,15 @@ class MultiProcessingSearchRunner:
|
|
160
162
|
conc_latency_avg_list,
|
161
163
|
)
|
162
164
|
|
165
|
+
def _wait_for_queue_fill(self, q: Queue, size: int):
|
166
|
+
wait_t = 0
|
167
|
+
while q.qsize() < size:
|
168
|
+
sleep_t = size if size < 10 else 10
|
169
|
+
wait_t += sleep_t
|
170
|
+
if wait_t > self.concurrency_timeout > 0:
|
171
|
+
raise ConcurrencySlotTimeoutError
|
172
|
+
time.sleep(sleep_t)
|
173
|
+
|
163
174
|
def run(self) -> float:
|
164
175
|
"""
|
165
176
|
Returns:
|
@@ -275,6 +275,7 @@ class CaseRunner(BaseModel):
|
|
275
275
|
filters=self.ca.filters,
|
276
276
|
concurrencies=self.config.case_config.concurrency_search_config.num_concurrency,
|
277
277
|
duration=self.config.case_config.concurrency_search_config.concurrency_duration,
|
278
|
+
concurrency_timeout=self.config.case_config.concurrency_search_config.concurrency_timeout,
|
278
279
|
k=self.config.case_config.k,
|
279
280
|
)
|
280
281
|
|
vectordb_bench/cli/cli.py
CHANGED
@@ -17,10 +17,9 @@ from typing import (
|
|
17
17
|
import click
|
18
18
|
from yaml import load
|
19
19
|
|
20
|
-
from vectordb_bench.backend.clients.api import MetricType
|
21
|
-
|
22
20
|
from .. import config
|
23
21
|
from ..backend.clients import DB
|
22
|
+
from ..backend.clients.api import MetricType
|
24
23
|
from ..interface import benchmark_runner, global_result_future
|
25
24
|
from ..models import (
|
26
25
|
CaseConfig,
|
@@ -303,6 +302,17 @@ class CommonTypedDict(TypedDict):
|
|
303
302
|
callback=lambda *args: list(map(int, click_arg_split(*args))),
|
304
303
|
),
|
305
304
|
]
|
305
|
+
concurrency_timeout: Annotated[
|
306
|
+
int,
|
307
|
+
click.option(
|
308
|
+
"--concurrency-timeout",
|
309
|
+
type=int,
|
310
|
+
default=config.CONCURRENCY_TIMEOUT,
|
311
|
+
show_default=True,
|
312
|
+
help="Timeout (in seconds) to wait for a concurrency slot before failing. "
|
313
|
+
"Set to a negative value to wait indefinitely.",
|
314
|
+
),
|
315
|
+
]
|
306
316
|
custom_case_name: Annotated[
|
307
317
|
str,
|
308
318
|
click.option(
|
@@ -405,6 +415,7 @@ class CommonTypedDict(TypedDict):
|
|
405
415
|
show_default=True,
|
406
416
|
),
|
407
417
|
]
|
418
|
+
task_label: Annotated[str, click.option("--task-label", help="Task label")]
|
408
419
|
|
409
420
|
|
410
421
|
class HNSWBaseTypedDict(TypedDict):
|
@@ -489,6 +500,7 @@ def run(
|
|
489
500
|
concurrency_search_config=ConcurrencySearchConfig(
|
490
501
|
concurrency_duration=parameters["concurrency_duration"],
|
491
502
|
num_concurrency=[int(s) for s in parameters["num_concurrency"]],
|
503
|
+
concurrency_timeout=parameters["concurrency_timeout"],
|
492
504
|
),
|
493
505
|
custom_case=get_custom_case_config(parameters),
|
494
506
|
),
|
@@ -499,10 +511,11 @@ def run(
|
|
499
511
|
parameters["search_concurrent"],
|
500
512
|
),
|
501
513
|
)
|
514
|
+
task_label = parameters["task_label"]
|
502
515
|
|
503
516
|
log.info(f"Task:\n{pformat(task)}\n")
|
504
517
|
if not parameters["dry_run"]:
|
505
|
-
benchmark_runner.run([task])
|
518
|
+
benchmark_runner.run([task], task_label)
|
506
519
|
time.sleep(5)
|
507
520
|
if global_result_future:
|
508
521
|
wait([global_result_future])
|
@@ -1,6 +1,7 @@
|
|
1
1
|
from ..backend.clients.alloydb.cli import AlloyDBScaNN
|
2
2
|
from ..backend.clients.aws_opensearch.cli import AWSOpenSearch
|
3
3
|
from ..backend.clients.clickhouse.cli import Clickhouse
|
4
|
+
from ..backend.clients.lancedb.cli import LanceDB
|
4
5
|
from ..backend.clients.mariadb.cli import MariaDBHNSW
|
5
6
|
from ..backend.clients.memorydb.cli import MemoryDB
|
6
7
|
from ..backend.clients.milvus.cli import MilvusAutoIndex
|
@@ -8,6 +9,7 @@ from ..backend.clients.pgdiskann.cli import PgDiskAnn
|
|
8
9
|
from ..backend.clients.pgvecto_rs.cli import PgVectoRSHNSW, PgVectoRSIVFFlat
|
9
10
|
from ..backend.clients.pgvector.cli import PgVectorHNSW
|
10
11
|
from ..backend.clients.pgvectorscale.cli import PgVectorScaleDiskAnn
|
12
|
+
from ..backend.clients.qdrant_cloud.cli import QdrantCloud
|
11
13
|
from ..backend.clients.redis.cli import Redis
|
12
14
|
from ..backend.clients.test.cli import Test
|
13
15
|
from ..backend.clients.tidb.cli import TiDB
|
@@ -33,6 +35,8 @@ cli.add_command(MariaDBHNSW)
|
|
33
35
|
cli.add_command(TiDB)
|
34
36
|
cli.add_command(Clickhouse)
|
35
37
|
cli.add_command(Vespa)
|
38
|
+
cli.add_command(LanceDB)
|
39
|
+
cli.add_command(QdrantCloud)
|
36
40
|
|
37
41
|
|
38
42
|
if __name__ == "__main__":
|
@@ -36,21 +36,27 @@ def dbConfigSettingItem(st, activeDb: DB):
|
|
36
36
|
columns = st.columns(DB_CONFIG_SETTING_COLUMNS)
|
37
37
|
|
38
38
|
dbConfigClass = activeDb.config_cls
|
39
|
-
|
39
|
+
schema = dbConfigClass.schema()
|
40
|
+
property_items = schema.get("properties").items()
|
41
|
+
required_fields = set(schema.get("required", []))
|
40
42
|
dbConfig = {}
|
41
43
|
idx = 0
|
42
44
|
|
43
45
|
# db config (unique)
|
44
|
-
for key, property in
|
46
|
+
for key, property in property_items:
|
45
47
|
if key not in dbConfigClass.common_short_configs() and key not in dbConfigClass.common_long_configs():
|
46
48
|
column = columns[idx % DB_CONFIG_SETTING_COLUMNS]
|
47
49
|
idx += 1
|
48
|
-
|
50
|
+
input_value = column.text_input(
|
49
51
|
key,
|
50
|
-
key="
|
52
|
+
key=f"{activeDb.name}-{key}",
|
51
53
|
value=property.get("default", ""),
|
52
54
|
type="password" if inputIsPassword(key) else "default",
|
55
|
+
placeholder="optional" if key not in required_fields else None,
|
53
56
|
)
|
57
|
+
if key in required_fields or input_value:
|
58
|
+
dbConfig[key] = input_value
|
59
|
+
|
54
60
|
# db config (common short labels)
|
55
61
|
for key in dbConfigClass.common_short_configs():
|
56
62
|
column = columns[idx % DB_CONFIG_SETTING_COLUMNS]
|
@@ -1491,6 +1491,127 @@ VespaLoadingConfig = [
|
|
1491
1491
|
]
|
1492
1492
|
VespaPerformanceConfig = VespaLoadingConfig
|
1493
1493
|
|
1494
|
+
CaseConfigParamInput_IndexType_LanceDB = CaseConfigInput(
|
1495
|
+
label=CaseConfigParamType.IndexType,
|
1496
|
+
inputHelp="AUTOINDEX = IVFPQ with default parameters",
|
1497
|
+
inputType=InputType.Option,
|
1498
|
+
inputConfig={
|
1499
|
+
"options": [
|
1500
|
+
IndexType.NONE.value,
|
1501
|
+
IndexType.AUTOINDEX.value,
|
1502
|
+
IndexType.IVFPQ.value,
|
1503
|
+
IndexType.HNSW.value,
|
1504
|
+
],
|
1505
|
+
},
|
1506
|
+
)
|
1507
|
+
|
1508
|
+
CaseConfigParamInput_num_partitions_LanceDB = CaseConfigInput(
|
1509
|
+
label=CaseConfigParamType.num_partitions,
|
1510
|
+
displayLabel="Number of Partitions",
|
1511
|
+
inputHelp="Number of partitions (clusters) for IVF_PQ. Default (when 0): sqrt(num_rows)",
|
1512
|
+
inputType=InputType.Number,
|
1513
|
+
inputConfig={
|
1514
|
+
"min": 0,
|
1515
|
+
"max": 10000,
|
1516
|
+
"value": 0,
|
1517
|
+
},
|
1518
|
+
isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None) == IndexType.IVFPQ.value
|
1519
|
+
or config.get(CaseConfigParamType.IndexType, None) == IndexType.HNSW.value,
|
1520
|
+
)
|
1521
|
+
|
1522
|
+
CaseConfigParamInput_num_sub_vectors_LanceDB = CaseConfigInput(
|
1523
|
+
label=CaseConfigParamType.num_sub_vectors,
|
1524
|
+
displayLabel="Number of Sub-vectors",
|
1525
|
+
inputHelp="Number of sub-vectors for PQ. Default (when 0): dim/16 or dim/8",
|
1526
|
+
inputType=InputType.Number,
|
1527
|
+
inputConfig={
|
1528
|
+
"min": 0,
|
1529
|
+
"max": 1000,
|
1530
|
+
"value": 0,
|
1531
|
+
},
|
1532
|
+
isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None) == IndexType.IVFPQ.value
|
1533
|
+
or config.get(CaseConfigParamType.IndexType, None) == IndexType.HNSW.value,
|
1534
|
+
)
|
1535
|
+
|
1536
|
+
CaseConfigParamInput_num_bits_LanceDB = CaseConfigInput(
|
1537
|
+
label=CaseConfigParamType.nbits,
|
1538
|
+
displayLabel="Number of Bits",
|
1539
|
+
inputHelp="Number of bits per sub-vector.",
|
1540
|
+
inputType=InputType.Option,
|
1541
|
+
inputConfig={
|
1542
|
+
"options": [4, 8],
|
1543
|
+
},
|
1544
|
+
isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None) == IndexType.IVFPQ.value
|
1545
|
+
or config.get(CaseConfigParamType.IndexType, None) == IndexType.HNSW.value,
|
1546
|
+
)
|
1547
|
+
|
1548
|
+
CaseConfigParamInput_sample_rate_LanceDB = CaseConfigInput(
|
1549
|
+
label=CaseConfigParamType.sample_rate,
|
1550
|
+
displayLabel="Sample Rate",
|
1551
|
+
inputHelp="Sample rate for training. Higher values are more accurate but slower",
|
1552
|
+
inputType=InputType.Number,
|
1553
|
+
inputConfig={
|
1554
|
+
"min": 16,
|
1555
|
+
"max": 1024,
|
1556
|
+
"value": 256,
|
1557
|
+
},
|
1558
|
+
isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None) == IndexType.IVFPQ.value
|
1559
|
+
or config.get(CaseConfigParamType.IndexType, None) == IndexType.HNSW.value,
|
1560
|
+
)
|
1561
|
+
|
1562
|
+
CaseConfigParamInput_max_iterations_LanceDB = CaseConfigInput(
|
1563
|
+
label=CaseConfigParamType.max_iterations,
|
1564
|
+
displayLabel="Max Iterations",
|
1565
|
+
inputHelp="Maximum iterations for k-means clustering",
|
1566
|
+
inputType=InputType.Number,
|
1567
|
+
inputConfig={
|
1568
|
+
"min": 10,
|
1569
|
+
"max": 200,
|
1570
|
+
"value": 50,
|
1571
|
+
},
|
1572
|
+
isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None) == IndexType.IVFPQ.value
|
1573
|
+
or config.get(CaseConfigParamType.IndexType, None) == IndexType.HNSW.value,
|
1574
|
+
)
|
1575
|
+
|
1576
|
+
CaseConfigParamInput_m_LanceDB = CaseConfigInput(
|
1577
|
+
label=CaseConfigParamType.m,
|
1578
|
+
displayLabel="m",
|
1579
|
+
inputHelp="m parameter in HNSW",
|
1580
|
+
inputType=InputType.Number,
|
1581
|
+
inputConfig={
|
1582
|
+
"min": 0,
|
1583
|
+
"max": 1000,
|
1584
|
+
"value": 0,
|
1585
|
+
},
|
1586
|
+
isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None) == IndexType.HNSW.value,
|
1587
|
+
)
|
1588
|
+
|
1589
|
+
CaseConfigParamInput_ef_construction_LanceDB = CaseConfigInput(
|
1590
|
+
label=CaseConfigParamType.ef_construction,
|
1591
|
+
displayLabel="ef_construction",
|
1592
|
+
inputHelp="ef_construction parameter in HNSW",
|
1593
|
+
inputType=InputType.Number,
|
1594
|
+
inputConfig={
|
1595
|
+
"min": 0,
|
1596
|
+
"max": 1000,
|
1597
|
+
"value": 0,
|
1598
|
+
},
|
1599
|
+
isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None) == IndexType.HNSW.value,
|
1600
|
+
)
|
1601
|
+
|
1602
|
+
LanceDBLoadConfig = [
|
1603
|
+
CaseConfigParamInput_IndexType_LanceDB,
|
1604
|
+
CaseConfigParamInput_num_partitions_LanceDB,
|
1605
|
+
CaseConfigParamInput_num_sub_vectors_LanceDB,
|
1606
|
+
CaseConfigParamInput_num_bits_LanceDB,
|
1607
|
+
CaseConfigParamInput_sample_rate_LanceDB,
|
1608
|
+
CaseConfigParamInput_max_iterations_LanceDB,
|
1609
|
+
CaseConfigParamInput_m_LanceDB,
|
1610
|
+
CaseConfigParamInput_ef_construction_LanceDB,
|
1611
|
+
]
|
1612
|
+
|
1613
|
+
LanceDBPerformanceConfig = LanceDBLoadConfig
|
1614
|
+
|
1494
1615
|
CASE_CONFIG_MAP = {
|
1495
1616
|
DB.Milvus: {
|
1496
1617
|
CaseLabel.Load: MilvusLoadConfig,
|
@@ -1551,4 +1672,8 @@ CASE_CONFIG_MAP = {
|
|
1551
1672
|
CaseLabel.Load: VespaLoadingConfig,
|
1552
1673
|
CaseLabel.Performance: VespaPerformanceConfig,
|
1553
1674
|
},
|
1675
|
+
DB.LanceDB: {
|
1676
|
+
CaseLabel.Load: LanceDBLoadConfig,
|
1677
|
+
CaseLabel.Performance: LanceDBPerformanceConfig,
|
1678
|
+
},
|
1554
1679
|
}
|
@@ -49,6 +49,7 @@ DB_TO_ICON = {
|
|
49
49
|
DB.AWSOpenSearch: "https://assets.zilliz.com/opensearch_1eee37584e.jpeg",
|
50
50
|
DB.TiDB: "https://img2.pingcap.com/forms/3/d/3d7fd5f9767323d6f037795704211ac44b4923d6.png",
|
51
51
|
DB.Vespa: "https://vespa.ai/vespa-content/uploads/2025/01/Vespa-symbol-green-rgb.png.webp",
|
52
|
+
DB.LanceDB: "https://raw.githubusercontent.com/lancedb/lancedb/main/docs/src/assets/logo.png",
|
52
53
|
}
|
53
54
|
|
54
55
|
# RedisCloud color: #0D6EFD
|
vectordb_bench/models.py
CHANGED
@@ -12,6 +12,7 @@ from .backend.clients import (
|
|
12
12
|
DB,
|
13
13
|
DBCaseConfig,
|
14
14
|
DBConfig,
|
15
|
+
EmptyDBCaseConfig,
|
15
16
|
)
|
16
17
|
from .base import BaseModel
|
17
18
|
from .metric import Metric
|
@@ -29,6 +30,11 @@ class PerformanceTimeoutError(TimeoutError):
|
|
29
30
|
super().__init__("Performance case optimize timeout")
|
30
31
|
|
31
32
|
|
33
|
+
class ConcurrencySlotTimeoutError(TimeoutError):
|
34
|
+
def __init__(self):
|
35
|
+
super().__init__("Timeout while waiting for a concurrency slot to become available")
|
36
|
+
|
37
|
+
|
32
38
|
class CaseConfigParamType(Enum):
|
33
39
|
"""
|
34
40
|
Value will be the key of CaseConfig.params and displayed in UI
|
@@ -96,6 +102,9 @@ class CaseConfigParamType(Enum):
|
|
96
102
|
maxNumPrefetchDatasets = "max_num_prefetch_datasets"
|
97
103
|
storage_engine = "storage_engine"
|
98
104
|
max_cache_size = "max_cache_size"
|
105
|
+
num_partitions = "num_partitions"
|
106
|
+
num_sub_vectors = "num_sub_vectors"
|
107
|
+
sample_rate = "sample_rate"
|
99
108
|
|
100
109
|
# mongodb params
|
101
110
|
mongodb_quantization_type = "quantization"
|
@@ -109,6 +118,7 @@ class CustomizedCase(BaseModel):
|
|
109
118
|
class ConcurrencySearchConfig(BaseModel):
|
110
119
|
num_concurrency: list[int] = config.NUM_CONCURRENCY
|
111
120
|
concurrency_duration: int = config.CONCURRENCY_DURATION
|
121
|
+
concurrency_timeout: int = config.CONCURRENCY_TIMEOUT
|
112
122
|
|
113
123
|
|
114
124
|
class CaseConfig(BaseModel):
|
@@ -247,13 +257,19 @@ class TestResult(BaseModel):
|
|
247
257
|
test_result["task_label"] = test_result["run_id"]
|
248
258
|
|
249
259
|
for case_result in test_result["results"]:
|
250
|
-
task_config = case_result
|
251
|
-
db = DB(task_config
|
260
|
+
task_config = case_result["task_config"]
|
261
|
+
db = DB(task_config["db"])
|
252
262
|
|
253
263
|
task_config["db_config"] = db.config_cls(**task_config["db_config"])
|
254
|
-
|
255
|
-
|
256
|
-
|
264
|
+
|
265
|
+
# Safely instantiate DBCaseConfig (fallback to EmptyDBCaseConfig on None)
|
266
|
+
raw_case_cfg = task_config.get("db_case_config") or {}
|
267
|
+
index_value = raw_case_cfg.get("index", None)
|
268
|
+
try:
|
269
|
+
task_config["db_case_config"] = db.case_config_cls(index_type=index_value)(**raw_case_cfg)
|
270
|
+
except Exception:
|
271
|
+
log.exception(f"Couldn't get class for index '{index_value}' ({full_path})")
|
272
|
+
task_config["db_case_config"] = EmptyDBCaseConfig(**raw_case_cfg)
|
257
273
|
|
258
274
|
case_result["task_config"] = task_config
|
259
275
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: vectordb-bench
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.29
|
4
4
|
Summary: VectorDBBench is not just an offering of benchmark results for mainstream vector databases and cloud services, it's your go-to tool for the ultimate performance and cost-effectiveness comparison. Designed with ease-of-use in mind, VectorDBBench is devised to help users, even non-professionals, reproduce results or test new systems, making the hunt for the optimal choice amongst a plethora of cloud services and open-source vector databases a breeze.
|
5
5
|
Author-email: XuanYang-cn <xuan.yang@zilliz.com>
|
6
6
|
Project-URL: repository, https://github.com/zilliztech/VectorDBBench
|
@@ -52,6 +52,7 @@ Requires-Dist: mariadb; extra == "all"
|
|
52
52
|
Requires-Dist: PyMySQL; extra == "all"
|
53
53
|
Requires-Dist: clickhouse-connect; extra == "all"
|
54
54
|
Requires-Dist: pyvespa; extra == "all"
|
55
|
+
Requires-Dist: lancedb; extra == "all"
|
55
56
|
Provides-Extra: qdrant
|
56
57
|
Requires-Dist: qdrant-client; extra == "qdrant"
|
57
58
|
Provides-Extra: pinecone
|
@@ -87,15 +88,17 @@ Provides-Extra: clickhouse
|
|
87
88
|
Requires-Dist: clickhouse-connect; extra == "clickhouse"
|
88
89
|
Provides-Extra: vespa
|
89
90
|
Requires-Dist: pyvespa; extra == "vespa"
|
91
|
+
Provides-Extra: lancedb
|
92
|
+
Requires-Dist: lancedb; extra == "lancedb"
|
90
93
|
Dynamic: license-file
|
91
94
|
|
92
|
-
# VectorDBBench: A Benchmark Tool for VectorDB
|
95
|
+
# VectorDBBench(VDBBench): A Benchmark Tool for VectorDB
|
93
96
|
|
94
97
|
[](https://pypi.org/project/vectordb-bench/)
|
95
98
|
[](https://pepy.tech/project/vectordb-bench)
|
96
99
|
|
97
100
|
## What is VectorDBBench
|
98
|
-
VectorDBBench is not just an offering of benchmark results for mainstream vector databases and cloud services, it's your go-to tool for the ultimate performance and cost-effectiveness comparison. Designed with ease-of-use in mind, VectorDBBench is devised to help users, even non-professionals, reproduce results or test new systems, making the hunt for the optimal choice amongst a plethora of cloud services and open-source vector databases a breeze.
|
101
|
+
VectorDBBench(VDBBench) is not just an offering of benchmark results for mainstream vector databases and cloud services, it's your go-to tool for the ultimate performance and cost-effectiveness comparison. Designed with ease-of-use in mind, VectorDBBench is devised to help users, even non-professionals, reproduce results or test new systems, making the hunt for the optimal choice amongst a plethora of cloud services and open-source vector databases a breeze.
|
99
102
|
|
100
103
|
Understanding the importance of user experience, we provide an intuitive visual interface. This not only empowers users to initiate benchmarks at ease, but also to view comparative result reports, thereby reproducing benchmark results effortlessly.
|
101
104
|
To add more relevance and practicality, we provide cost-effectiveness reports particularly for cloud services. This allows for a more realistic and applicable benchmarking process.
|
@@ -205,6 +208,10 @@ Options:
|
|
205
208
|
--num-concurrency TEXT Comma-separated list of concurrency values
|
206
209
|
to test during concurrent search [default:
|
207
210
|
1,10,20]
|
211
|
+
--concurrency-timeout INTEGER Timeout (in seconds) to wait for a
|
212
|
+
concurrency slot before failing. Set to a
|
213
|
+
negative value to wait indefinitely.
|
214
|
+
[default: 3600]
|
208
215
|
--user-name TEXT Db username [required]
|
209
216
|
--password TEXT Db password [required]
|
210
217
|
--host TEXT Db host [required]
|
@@ -291,7 +298,8 @@ Options:
|
|
291
298
|
# Memory Management
|
292
299
|
--cb-threshold TEXT k-NN Memory circuit breaker threshold
|
293
300
|
|
294
|
-
--help Show this message and exit
|
301
|
+
--help Show this message and exit.
|
302
|
+
```
|
295
303
|
|
296
304
|
#### Using a configuration file.
|
297
305
|
|
@@ -358,13 +366,13 @@ pip install -e '.[pinecone]'
|
|
358
366
|
```
|
359
367
|
### Run test server
|
360
368
|
```
|
361
|
-
|
369
|
+
python -m vectordb_bench
|
362
370
|
```
|
363
371
|
|
364
372
|
OR:
|
365
373
|
|
366
374
|
```shell
|
367
|
-
|
375
|
+
init_bench
|
368
376
|
```
|
369
377
|
|
370
378
|
OR:
|
@@ -381,13 +389,13 @@ After reopen the repository in container, run `python -m vectordb_bench` in the
|
|
381
389
|
|
382
390
|
### Check coding styles
|
383
391
|
```shell
|
384
|
-
|
392
|
+
make lint
|
385
393
|
```
|
386
394
|
|
387
395
|
To fix the coding styles automatically
|
388
396
|
|
389
397
|
```shell
|
390
|
-
|
398
|
+
make format
|
391
399
|
```
|
392
400
|
|
393
401
|
## How does it work?
|
@@ -1,20 +1,20 @@
|
|
1
|
-
vectordb_bench/__init__.py,sha256=
|
1
|
+
vectordb_bench/__init__.py,sha256=PBGSIdgzof6UMeWbgjFUjTRgUcbu0Tg5njbGo0oU88g,2420
|
2
2
|
vectordb_bench/__main__.py,sha256=cyYbVSU-zA1AgzneGKcRRuzR4ftRDr9sIi9Ei9NZnhI,858
|
3
3
|
vectordb_bench/base.py,sha256=AgavIF0P9ku_RmCRk1KKziba-wI4ZpA2aJvjJzNhRSs,129
|
4
4
|
vectordb_bench/interface.py,sha256=XaCjTgUeI17uVjsgOauPeVlkvnkuCyQOWyOaWhrgCt8,9811
|
5
5
|
vectordb_bench/log_util.py,sha256=wDNaU_JBBOfKi_Z4vq7LDa0kOlLjoNNzDX3VZQn_Dxo,3239
|
6
6
|
vectordb_bench/metric.py,sha256=pj-AxQHyIRHTaJY-wTIkTbC6TqEqMzt3kcEmMWEv71w,2063
|
7
|
-
vectordb_bench/models.py,sha256=
|
7
|
+
vectordb_bench/models.py,sha256=b-DaUopaf6qwuuEbl9wAHKZjuosmOi6gpebYz6iWvBU,12221
|
8
8
|
vectordb_bench/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
9
9
|
vectordb_bench/backend/assembler.py,sha256=6GInRT7yBgfTaIPmo-XMkYX4pA8PJQmjMQInynwaunE,2047
|
10
10
|
vectordb_bench/backend/cases.py,sha256=obDdY6g3p9Z2fog7qDwLLDuRMwo3LGQKMHsP66QZd2M,16296
|
11
11
|
vectordb_bench/backend/data_source.py,sha256=bfa_Zg4O9fRP2ENmVZ_2-NISKozoFN-TocyxOlw1JtE,5524
|
12
12
|
vectordb_bench/backend/dataset.py,sha256=lH2Q01AEJxA-sYfZHzH2BM019mwuy9mB_i0VLhIgDJ8,9020
|
13
13
|
vectordb_bench/backend/result_collector.py,sha256=mpROVdZ-HChKBVyMV5TZ5v7YGRb69bvfT7Gezn5F5sY,819
|
14
|
-
vectordb_bench/backend/task_runner.py,sha256=
|
14
|
+
vectordb_bench/backend/task_runner.py,sha256=HYZ5B9-qOKAKmrsk-nwVhmXEddf451o4P3xQuSiCTt8,11595
|
15
15
|
vectordb_bench/backend/utils.py,sha256=R6THuJdZhiQYSSJTqv0Uegl2B20taV_QjwvFrun2yxE,1949
|
16
|
-
vectordb_bench/backend/clients/__init__.py,sha256=
|
17
|
-
vectordb_bench/backend/clients/api.py,sha256=
|
16
|
+
vectordb_bench/backend/clients/__init__.py,sha256=4P4Y7qOIYBJqJENsfMNzD5L0C651ypcPr05M1-ph0LU,10549
|
17
|
+
vectordb_bench/backend/clients/api.py,sha256=3AfO-EPNzosaIBfYX3U9HeOMO7Uw0muOZ0x4cqqSH34,6534
|
18
18
|
vectordb_bench/backend/clients/aliyun_elasticsearch/aliyun_elasticsearch.py,sha256=7yPYaWoHeHNxDMtpReGXsdEPFD1e4vQblFor7TmLq5o,770
|
19
19
|
vectordb_bench/backend/clients/aliyun_elasticsearch/config.py,sha256=d9RCgfCgauKvy6z9ig_wBormgwiGtkh8POyoHloHnJA,505
|
20
20
|
vectordb_bench/backend/clients/aliyun_opensearch/aliyun_opensearch.py,sha256=rwa4rtbbP2Kaczh7Bf0bc_lE_sGG5w9PhtfdFu7rQNs,13237
|
@@ -28,11 +28,14 @@ vectordb_bench/backend/clients/aws_opensearch/config.py,sha256=9meXQUOVFlk3UOAhv
|
|
28
28
|
vectordb_bench/backend/clients/aws_opensearch/run.py,sha256=Ry5aAlielWjq0hx7LnbdShfOwzZhz3Gq9WYu5U43x9s,5001
|
29
29
|
vectordb_bench/backend/clients/chroma/chroma.py,sha256=Aqo6AlSWd0TG0SR4cr9AEoLzXtOJ5VNhbIucHnm8NxY,3619
|
30
30
|
vectordb_bench/backend/clients/chroma/config.py,sha256=8nXpPdecQ5HrNqcsQwAVgacSz6uLgI-BI7v4tB8CeDk,347
|
31
|
-
vectordb_bench/backend/clients/clickhouse/cli.py,sha256=
|
32
|
-
vectordb_bench/backend/clients/clickhouse/clickhouse.py,sha256=
|
33
|
-
vectordb_bench/backend/clients/clickhouse/config.py,sha256
|
31
|
+
vectordb_bench/backend/clients/clickhouse/cli.py,sha256=6I0AwUOrqfjQbN_3aSTJHUYE-PAAMAQ4AIZC_8GqoEw,2054
|
32
|
+
vectordb_bench/backend/clients/clickhouse/clickhouse.py,sha256=1i-64mzluloJ3fXT7J3_HXzkUtJ4re7HwuRwiqtGOck,8956
|
33
|
+
vectordb_bench/backend/clients/clickhouse/config.py,sha256=-waHUHrT9WwuSNjHYE7T5j8s8RTsHNTDFuzmqT4nQWI,2649
|
34
34
|
vectordb_bench/backend/clients/elastic_cloud/config.py,sha256=_5Cz3__CbMU7zCizkhK1pGhH3TLJacn8efVueUZ0lnQ,1573
|
35
35
|
vectordb_bench/backend/clients/elastic_cloud/elastic_cloud.py,sha256=FSslLDH2Yi9ZdUwaCbKC_IXxFbMvW-L1xB3YMU08MVI,5448
|
36
|
+
vectordb_bench/backend/clients/lancedb/cli.py,sha256=j5eqb-_CSWF1rdxAj2Byut6albHEj0JF51wCruaJsu8,2688
|
37
|
+
vectordb_bench/backend/clients/lancedb/config.py,sha256=Udd39FrYmIa9ZztmfAC0BLj0rBaPv3yd9XaF5VkCziU,2950
|
38
|
+
vectordb_bench/backend/clients/lancedb/lancedb.py,sha256=9hFHtj_Ry44nVY1vX9FSnB_WAL6ih86Rx2qFiZgEkX0,3148
|
36
39
|
vectordb_bench/backend/clients/mariadb/cli.py,sha256=nqV9V-gOSKGQ1y6VmxOMxGz0a3jz860Va55x7JBcuPk,2727
|
37
40
|
vectordb_bench/backend/clients/mariadb/config.py,sha256=DNxo0i1c0wIfii78Luv9GeOFq-74yvkkg3Np9sNUyFI,1870
|
38
41
|
vectordb_bench/backend/clients/mariadb/mariadb.py,sha256=O2PY7pP3dYdp-aTOQLDVckdNabCZscw5Xup7Z8LnWIg,7137
|
@@ -58,7 +61,8 @@ vectordb_bench/backend/clients/pgvectorscale/config.py,sha256=ZMcRQPyCMzMJLXw56z
|
|
58
61
|
vectordb_bench/backend/clients/pgvectorscale/pgvectorscale.py,sha256=NONFdcE-b-mt6GsRTru6UbMMu8iqX8PfRF43fY_AODw,10136
|
59
62
|
vectordb_bench/backend/clients/pinecone/config.py,sha256=hzPX1lxDpYI9IdpNs7RYB1vAn2uMlCw9NH4FonQEmfQ,294
|
60
63
|
vectordb_bench/backend/clients/pinecone/pinecone.py,sha256=SeJ-XnuIZxFDYhgO8FlRNYN65lPXDW2HEQuu5s5Na5Q,3591
|
61
|
-
vectordb_bench/backend/clients/qdrant_cloud/
|
64
|
+
vectordb_bench/backend/clients/qdrant_cloud/cli.py,sha256=QoJ8t76mJmXrj-VJYn6-Atc1EryFhAApvtWUxei0wuo,1095
|
65
|
+
vectordb_bench/backend/clients/qdrant_cloud/config.py,sha256=UWFctRQ03suEyASlbSg76dEi0s58tp5ERE-d5A9LuLg,1098
|
62
66
|
vectordb_bench/backend/clients/qdrant_cloud/qdrant_cloud.py,sha256=VvE96WlEqbXCytwUGxLGt8AbuRvu1psF1weydb8MW_4,5431
|
63
67
|
vectordb_bench/backend/clients/redis/cli.py,sha256=tFLXzNyvh_GYUZihqMvj65C5vBKPVVAYIXtbzGaVCcU,2167
|
64
68
|
vectordb_bench/backend/clients/redis/config.py,sha256=xVSVC6xjjAKsiwJuJoLguCGhiiUT9w13Db_Up5ZqljY,1241
|
@@ -80,14 +84,14 @@ vectordb_bench/backend/clients/zilliz_cloud/cli.py,sha256=3_eD3ZG-FeTw1cenhbBFni
|
|
80
84
|
vectordb_bench/backend/clients/zilliz_cloud/config.py,sha256=-Qb50m-Hcz86OcMURU21n61Rz-RpFqKfUsmjna85OR8,909
|
81
85
|
vectordb_bench/backend/clients/zilliz_cloud/zilliz_cloud.py,sha256=B9EUDmK11oQ2GIslVkbRVAitHT-NbRGxQD_Weia-vhY,681
|
82
86
|
vectordb_bench/backend/runner/__init__.py,sha256=mF8YnErTa7MVG37zZb0KFXBSrmMw_afttuiqWcwrVls,228
|
83
|
-
vectordb_bench/backend/runner/mp_runner.py,sha256=
|
87
|
+
vectordb_bench/backend/runner/mp_runner.py,sha256=n8IiRs7JUJGQVXwGlVMdvcpotikF9VsjXGFHMMylsS0,10119
|
84
88
|
vectordb_bench/backend/runner/rate_runner.py,sha256=2coO7qalEh6ZbVKUkyFvip4JWjs1yJM-iiExSrjEp9c,4306
|
85
89
|
vectordb_bench/backend/runner/read_write_runner.py,sha256=CXYBXEEkS1S7-NurdzN5Wh6N0Vx-rprM9Qehk1WKwl8,7822
|
86
90
|
vectordb_bench/backend/runner/serial_runner.py,sha256=Y4Y2mSK8nU3hml7gliiF6BXUaW49sD-Ueci0xn62IL0,10290
|
87
91
|
vectordb_bench/backend/runner/util.py,sha256=tjTFUxth6hNnVrlU82TqkHhfeZo4ymj7WlyK4zFyPTg,522
|
88
92
|
vectordb_bench/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
89
|
-
vectordb_bench/cli/cli.py,sha256=
|
90
|
-
vectordb_bench/cli/vectordbbench.py,sha256=
|
93
|
+
vectordb_bench/cli/cli.py,sha256=1bZzK7uCwAi9ILtvlZiFAAMwJfwQec1HF3RRSpbqxKY,16000
|
94
|
+
vectordb_bench/cli/vectordbbench.py,sha256=EA0x0vKDGIszAKstbnDtJz26U6_Wnl11W07PDukLNzo,1604
|
91
95
|
vectordb_bench/config-files/sample_config.yml,sha256=yw9ZgHczNi9PedNuTVxZKiOTI6AVoQS1h8INNgoDjPk,340
|
92
96
|
vectordb_bench/custom/custom_case.json,sha256=uKo7NJgXDPPLtf_V6y1uc5w1aIcjLp-GCJEYOCty1As,475
|
93
97
|
vectordb_bench/frontend/utils.py,sha256=8eb4I9F0cQdnPQiFX0gMEk1e2fdgultgTKzzY5zS0Q0,489
|
@@ -109,16 +113,16 @@ vectordb_bench/frontend/components/custom/initStyle.py,sha256=ortsoUNqH-vVq9ECiw
|
|
109
113
|
vectordb_bench/frontend/components/get_results/saveAsImage.py,sha256=POaFiwKoCGqrY-zhanWC7-tubE64bV_JjqI4lgIuMts,1459
|
110
114
|
vectordb_bench/frontend/components/run_test/autoRefresh.py,sha256=mjIa43VQQmNjYPkEbOtKNlJ1UfGPcqRKvc2Jh4kx8U0,289
|
111
115
|
vectordb_bench/frontend/components/run_test/caseSelector.py,sha256=ea3u-NDtCX32Au9YkfqGA8mhF6K_Av9HZvp0Mem3C0o,5328
|
112
|
-
vectordb_bench/frontend/components/run_test/dbConfigSetting.py,sha256=
|
116
|
+
vectordb_bench/frontend/components/run_test/dbConfigSetting.py,sha256=k0tGoJokTVvI3zofArNxH9NYUu9Hzo1uyobbZ_h9HfM,2895
|
113
117
|
vectordb_bench/frontend/components/run_test/dbSelector.py,sha256=hzMEIL1DzvpP8xkL6JhELTdcml0ysC70Gw-WLr8vW9A,1123
|
114
118
|
vectordb_bench/frontend/components/run_test/generateTasks.py,sha256=3y8NHtWJMNqoP2SvoWuR7kj84g0OEg68IULebimzz7E,741
|
115
119
|
vectordb_bench/frontend/components/run_test/hideSidebar.py,sha256=vb5kzIMmbMqWX67qFEHek21X4sGO_tPyn_uPqUEtp3Q,234
|
116
120
|
vectordb_bench/frontend/components/run_test/initStyle.py,sha256=osPUgfFfH7rRlVNHSMumvmZxvKWlLxmZiNqgnMiUJEU,723
|
117
121
|
vectordb_bench/frontend/components/run_test/submitTask.py,sha256=VZjkopkCBNhqLwGqsoM0hbPEeF6Q5UOQcdFUaegerxc,4094
|
118
122
|
vectordb_bench/frontend/components/tables/data.py,sha256=5DdnC64BB7Aj2z9acht2atsPB4NabzQCZKALfIUnqtQ,1233
|
119
|
-
vectordb_bench/frontend/config/dbCaseConfigs.py,sha256=
|
123
|
+
vectordb_bench/frontend/config/dbCaseConfigs.py,sha256=DQrSuBVuTCjwS_I1hVNTnYygDu6Zkka7PLfLi7TNN3E,51023
|
120
124
|
vectordb_bench/frontend/config/dbPrices.py,sha256=10aBKjVcEg8y7TPSda28opmBM1KmXNrvbU9WM_BsZcE,176
|
121
|
-
vectordb_bench/frontend/config/styles.py,sha256=
|
125
|
+
vectordb_bench/frontend/config/styles.py,sha256=y-vYXCF4_o0-88BNzbKNKvfhvVxmz8BSr4v_E_Qv37E,2643
|
122
126
|
vectordb_bench/frontend/pages/concurrent.py,sha256=bvoSafRSIsRzBQkI3uBwwrdg8jnhRUQG-epZbrJhGiE,2082
|
123
127
|
vectordb_bench/frontend/pages/custom.py,sha256=j7oJ2FHBv5O50D7YbzXTLRuIDgwkGt0iEd0FRHHkYLw,2436
|
124
128
|
vectordb_bench/frontend/pages/quries_per_dollar.py,sha256=BDukiFwxyqQK_btCSsRR5D_a17PMu0yI8Muw3eRLz6Y,2461
|
@@ -142,9 +146,9 @@ vectordb_bench/results/WeaviateCloud/result_20230808_standard_weaviatecloud.json
|
|
142
146
|
vectordb_bench/results/ZillizCloud/result_20230727_standard_zillizcloud.json,sha256=-Mdm4By65XDRCrmVOCF8yQXjcZtH4Xo4shcjoDoBUKU,18293
|
143
147
|
vectordb_bench/results/ZillizCloud/result_20230808_standard_zillizcloud.json,sha256=77XlHT5zM_K7mG5HfDQKwXZnSCuR37VUbt6-P3J_amI,15737
|
144
148
|
vectordb_bench/results/ZillizCloud/result_20240105_standard_202401_zillizcloud.json,sha256=TualfJ0664Hs-vdIW68bdkqAEYyzotXmu2P0yIN-GHk,42526
|
145
|
-
vectordb_bench-0.0.
|
146
|
-
vectordb_bench-0.0.
|
147
|
-
vectordb_bench-0.0.
|
148
|
-
vectordb_bench-0.0.
|
149
|
-
vectordb_bench-0.0.
|
150
|
-
vectordb_bench-0.0.
|
149
|
+
vectordb_bench-0.0.29.dist-info/licenses/LICENSE,sha256=HXbxhrb5u5SegVzeLNF_voVgRsJMavcLaOmD1N0lZkM,1067
|
150
|
+
vectordb_bench-0.0.29.dist-info/METADATA,sha256=8ekrKUsItuE-dEvCBGOk1ktXNF19qSw8Qat9FRGG-o8,38448
|
151
|
+
vectordb_bench-0.0.29.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
|
152
|
+
vectordb_bench-0.0.29.dist-info/entry_points.txt,sha256=Qzw6gVx96ui8esG21H6yHsI6nboEohRmV424TYhQNrA,113
|
153
|
+
vectordb_bench-0.0.29.dist-info/top_level.txt,sha256=jnhZFZAuKX1J60yt-XOeBZ__ctiZMvoC_s0RFq29lpM,15
|
154
|
+
vectordb_bench-0.0.29.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|