vectordb-bench 0.0.27__py3-none-any.whl → 0.0.28__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vectordb_bench/backend/clients/__init__.py +18 -2
- vectordb_bench/backend/clients/api.py +1 -0
- vectordb_bench/backend/clients/lancedb/cli.py +92 -0
- vectordb_bench/backend/clients/lancedb/config.py +103 -0
- vectordb_bench/backend/clients/lancedb/lancedb.py +91 -0
- vectordb_bench/cli/cli.py +3 -1
- vectordb_bench/cli/vectordbbench.py +2 -0
- vectordb_bench/frontend/config/dbCaseConfigs.py +125 -0
- vectordb_bench/frontend/config/styles.py +1 -0
- vectordb_bench/models.py +15 -5
- {vectordb_bench-0.0.27.dist-info → vectordb_bench-0.0.28.dist-info}/METADATA +8 -5
- {vectordb_bench-0.0.27.dist-info → vectordb_bench-0.0.28.dist-info}/RECORD +16 -13
- {vectordb_bench-0.0.27.dist-info → vectordb_bench-0.0.28.dist-info}/WHEEL +1 -1
- {vectordb_bench-0.0.27.dist-info → vectordb_bench-0.0.28.dist-info}/entry_points.txt +0 -0
- {vectordb_bench-0.0.27.dist-info → vectordb_bench-0.0.28.dist-info}/licenses/LICENSE +0 -0
- {vectordb_bench-0.0.27.dist-info → vectordb_bench-0.0.28.dist-info}/top_level.txt +0 -0
@@ -45,9 +45,10 @@ class DB(Enum):
|
|
45
45
|
TiDB = "TiDB"
|
46
46
|
Clickhouse = "Clickhouse"
|
47
47
|
Vespa = "Vespa"
|
48
|
+
LanceDB = "LanceDB"
|
48
49
|
|
49
50
|
@property
|
50
|
-
def init_cls(self) -> type[VectorDB]: # noqa: PLR0911, PLR0912, C901
|
51
|
+
def init_cls(self) -> type[VectorDB]: # noqa: PLR0911, PLR0912, C901, PLR0915
|
51
52
|
"""Import while in use"""
|
52
53
|
if self == DB.Milvus:
|
53
54
|
from .milvus.milvus import Milvus
|
@@ -164,11 +165,16 @@ class DB(Enum):
|
|
164
165
|
|
165
166
|
return Vespa
|
166
167
|
|
168
|
+
if self == DB.LanceDB:
|
169
|
+
from .lancedb.lancedb import LanceDB
|
170
|
+
|
171
|
+
return LanceDB
|
172
|
+
|
167
173
|
msg = f"Unknown DB: {self.name}"
|
168
174
|
raise ValueError(msg)
|
169
175
|
|
170
176
|
@property
|
171
|
-
def config_cls(self) -> type[DBConfig]: # noqa: PLR0911, PLR0912, C901
|
177
|
+
def config_cls(self) -> type[DBConfig]: # noqa: PLR0911, PLR0912, C901, PLR0915
|
172
178
|
"""Import while in use"""
|
173
179
|
if self == DB.Milvus:
|
174
180
|
from .milvus.config import MilvusConfig
|
@@ -285,6 +291,11 @@ class DB(Enum):
|
|
285
291
|
|
286
292
|
return VespaConfig
|
287
293
|
|
294
|
+
if self == DB.LanceDB:
|
295
|
+
from .lancedb.config import LanceDBConfig
|
296
|
+
|
297
|
+
return LanceDBConfig
|
298
|
+
|
288
299
|
msg = f"Unknown DB: {self.name}"
|
289
300
|
raise ValueError(msg)
|
290
301
|
|
@@ -382,6 +393,11 @@ class DB(Enum):
|
|
382
393
|
|
383
394
|
return VespaHNSWConfig
|
384
395
|
|
396
|
+
if self == DB.LanceDB:
|
397
|
+
from .lancedb.config import _lancedb_case_config
|
398
|
+
|
399
|
+
return _lancedb_case_config.get(index_type)
|
400
|
+
|
385
401
|
# DB.Pinecone, DB.Chroma, DB.Redis
|
386
402
|
return EmptyDBCaseConfig
|
387
403
|
|
@@ -0,0 +1,92 @@
|
|
1
|
+
from typing import Annotated, Unpack
|
2
|
+
|
3
|
+
import click
|
4
|
+
from pydantic import SecretStr
|
5
|
+
|
6
|
+
from ....cli.cli import (
|
7
|
+
CommonTypedDict,
|
8
|
+
cli,
|
9
|
+
click_parameter_decorators_from_typed_dict,
|
10
|
+
run,
|
11
|
+
)
|
12
|
+
from .. import DB
|
13
|
+
from ..api import IndexType
|
14
|
+
|
15
|
+
|
16
|
+
class LanceDBTypedDict(CommonTypedDict):
|
17
|
+
uri: Annotated[
|
18
|
+
str,
|
19
|
+
click.option("--uri", type=str, help="URI connection string", required=True),
|
20
|
+
]
|
21
|
+
token: Annotated[
|
22
|
+
str | None,
|
23
|
+
click.option("--token", type=str, help="Authentication token", required=False),
|
24
|
+
]
|
25
|
+
|
26
|
+
|
27
|
+
@cli.command()
|
28
|
+
@click_parameter_decorators_from_typed_dict(LanceDBTypedDict)
|
29
|
+
def LanceDB(**parameters: Unpack[LanceDBTypedDict]):
|
30
|
+
from .config import LanceDBConfig, _lancedb_case_config
|
31
|
+
|
32
|
+
run(
|
33
|
+
db=DB.LanceDB,
|
34
|
+
db_config=LanceDBConfig(
|
35
|
+
db_label=parameters["db_label"],
|
36
|
+
uri=parameters["uri"],
|
37
|
+
token=SecretStr(parameters["token"]) if parameters.get("token") else None,
|
38
|
+
),
|
39
|
+
db_case_config=_lancedb_case_config.get("NONE")(),
|
40
|
+
**parameters,
|
41
|
+
)
|
42
|
+
|
43
|
+
|
44
|
+
@cli.command()
|
45
|
+
@click_parameter_decorators_from_typed_dict(LanceDBTypedDict)
|
46
|
+
def LanceDBAutoIndex(**parameters: Unpack[LanceDBTypedDict]):
|
47
|
+
from .config import LanceDBConfig, _lancedb_case_config
|
48
|
+
|
49
|
+
run(
|
50
|
+
db=DB.LanceDB,
|
51
|
+
db_config=LanceDBConfig(
|
52
|
+
db_label=parameters["db_label"],
|
53
|
+
uri=parameters["uri"],
|
54
|
+
token=SecretStr(parameters["token"]) if parameters.get("token") else None,
|
55
|
+
),
|
56
|
+
db_case_config=_lancedb_case_config.get(IndexType.AUTOINDEX)(),
|
57
|
+
**parameters,
|
58
|
+
)
|
59
|
+
|
60
|
+
|
61
|
+
@cli.command()
|
62
|
+
@click_parameter_decorators_from_typed_dict(LanceDBTypedDict)
|
63
|
+
def LanceDBIVFPQ(**parameters: Unpack[LanceDBTypedDict]):
|
64
|
+
from .config import LanceDBConfig, _lancedb_case_config
|
65
|
+
|
66
|
+
run(
|
67
|
+
db=DB.LanceDB,
|
68
|
+
db_config=LanceDBConfig(
|
69
|
+
db_label=parameters["db_label"],
|
70
|
+
uri=parameters["uri"],
|
71
|
+
token=SecretStr(parameters["token"]) if parameters.get("token") else None,
|
72
|
+
),
|
73
|
+
db_case_config=_lancedb_case_config.get(IndexType.IVFPQ)(),
|
74
|
+
**parameters,
|
75
|
+
)
|
76
|
+
|
77
|
+
|
78
|
+
@cli.command()
|
79
|
+
@click_parameter_decorators_from_typed_dict(LanceDBTypedDict)
|
80
|
+
def LanceDBHNSW(**parameters: Unpack[LanceDBTypedDict]):
|
81
|
+
from .config import LanceDBConfig, _lancedb_case_config
|
82
|
+
|
83
|
+
run(
|
84
|
+
db=DB.LanceDB,
|
85
|
+
db_config=LanceDBConfig(
|
86
|
+
db_label=parameters["db_label"],
|
87
|
+
uri=parameters["uri"],
|
88
|
+
token=SecretStr(parameters["token"]) if parameters.get("token") else None,
|
89
|
+
),
|
90
|
+
db_case_config=_lancedb_case_config.get(IndexType.HNSW)(),
|
91
|
+
**parameters,
|
92
|
+
)
|
@@ -0,0 +1,103 @@
|
|
1
|
+
from pydantic import BaseModel, SecretStr
|
2
|
+
|
3
|
+
from ..api import DBCaseConfig, DBConfig, IndexType, MetricType
|
4
|
+
|
5
|
+
|
6
|
+
class LanceDBConfig(DBConfig):
|
7
|
+
"""LanceDB connection configuration."""
|
8
|
+
|
9
|
+
db_label: str
|
10
|
+
uri: str
|
11
|
+
token: SecretStr | None = None
|
12
|
+
|
13
|
+
def to_dict(self) -> dict:
|
14
|
+
return {
|
15
|
+
"uri": self.uri,
|
16
|
+
"token": self.token.get_secret_value() if self.token else None,
|
17
|
+
}
|
18
|
+
|
19
|
+
|
20
|
+
class LanceDBIndexConfig(BaseModel, DBCaseConfig):
|
21
|
+
index: IndexType = IndexType.IVFPQ
|
22
|
+
metric_type: MetricType = MetricType.L2
|
23
|
+
num_partitions: int = 0
|
24
|
+
num_sub_vectors: int = 0
|
25
|
+
nbits: int = 8 # Must be 4 or 8
|
26
|
+
sample_rate: int = 256
|
27
|
+
max_iterations: int = 50
|
28
|
+
|
29
|
+
def index_param(self) -> dict:
|
30
|
+
if self.index not in [
|
31
|
+
IndexType.IVFPQ,
|
32
|
+
IndexType.HNSW,
|
33
|
+
IndexType.AUTOINDEX,
|
34
|
+
IndexType.NONE,
|
35
|
+
]:
|
36
|
+
msg = f"Index type {self.index} is not supported for LanceDB!"
|
37
|
+
raise ValueError(msg)
|
38
|
+
|
39
|
+
# See https://lancedb.github.io/lancedb/python/python/#lancedb.table.Table.create_index
|
40
|
+
params = {
|
41
|
+
"metric": self.parse_metric(),
|
42
|
+
"num_bits": self.nbits,
|
43
|
+
"sample_rate": self.sample_rate,
|
44
|
+
"max_iterations": self.max_iterations,
|
45
|
+
}
|
46
|
+
|
47
|
+
if self.num_partitions > 0:
|
48
|
+
params["num_partitions"] = self.num_partitions
|
49
|
+
if self.num_sub_vectors > 0:
|
50
|
+
params["num_sub_vectors"] = self.num_sub_vectors
|
51
|
+
|
52
|
+
return params
|
53
|
+
|
54
|
+
def search_param(self) -> dict:
|
55
|
+
pass
|
56
|
+
|
57
|
+
def parse_metric(self) -> str:
|
58
|
+
if self.metric_type in [MetricType.L2, MetricType.COSINE]:
|
59
|
+
return self.metric_type.value.lower()
|
60
|
+
if self.metric_type in [MetricType.IP, MetricType.DP]:
|
61
|
+
return "dot"
|
62
|
+
msg = f"Metric type {self.metric_type} is not supported for LanceDB!"
|
63
|
+
raise ValueError(msg)
|
64
|
+
|
65
|
+
|
66
|
+
class LanceDBNoIndexConfig(LanceDBIndexConfig):
|
67
|
+
index: IndexType = IndexType.NONE
|
68
|
+
|
69
|
+
def index_param(self) -> dict:
|
70
|
+
return {}
|
71
|
+
|
72
|
+
|
73
|
+
class LanceDBAutoIndexConfig(LanceDBIndexConfig):
|
74
|
+
index: IndexType = IndexType.AUTOINDEX
|
75
|
+
|
76
|
+
def index_param(self) -> dict:
|
77
|
+
return {}
|
78
|
+
|
79
|
+
|
80
|
+
class LanceDBHNSWIndexConfig(LanceDBIndexConfig):
|
81
|
+
index: IndexType = IndexType.HNSW
|
82
|
+
m: int = 0
|
83
|
+
ef_construction: int = 0
|
84
|
+
|
85
|
+
def index_param(self) -> dict:
|
86
|
+
params = LanceDBIndexConfig.index_param(self)
|
87
|
+
|
88
|
+
# See https://lancedb.github.io/lancedb/python/python/#lancedb.index.HnswSq
|
89
|
+
params["index_type"] = "IVF_HNSW_SQ"
|
90
|
+
if self.m > 0:
|
91
|
+
params["m"] = self.m
|
92
|
+
if self.ef_construction > 0:
|
93
|
+
params["ef_construction"] = self.ef_construction
|
94
|
+
|
95
|
+
return params
|
96
|
+
|
97
|
+
|
98
|
+
_lancedb_case_config = {
|
99
|
+
IndexType.IVFPQ: LanceDBIndexConfig,
|
100
|
+
IndexType.AUTOINDEX: LanceDBAutoIndexConfig,
|
101
|
+
IndexType.HNSW: LanceDBHNSWIndexConfig,
|
102
|
+
IndexType.NONE: LanceDBNoIndexConfig,
|
103
|
+
}
|
@@ -0,0 +1,91 @@
|
|
1
|
+
import logging
|
2
|
+
from contextlib import contextmanager
|
3
|
+
|
4
|
+
import lancedb
|
5
|
+
import pyarrow as pa
|
6
|
+
from lancedb.pydantic import LanceModel
|
7
|
+
|
8
|
+
from ..api import IndexType, VectorDB
|
9
|
+
from .config import LanceDBConfig, LanceDBIndexConfig
|
10
|
+
|
11
|
+
log = logging.getLogger(__name__)
|
12
|
+
|
13
|
+
|
14
|
+
class VectorModel(LanceModel):
|
15
|
+
id: int
|
16
|
+
vector: list[float]
|
17
|
+
|
18
|
+
|
19
|
+
class LanceDB(VectorDB):
|
20
|
+
def __init__(
|
21
|
+
self,
|
22
|
+
dim: int,
|
23
|
+
db_config: LanceDBConfig,
|
24
|
+
db_case_config: LanceDBIndexConfig,
|
25
|
+
collection_name: str = "vector_bench_test",
|
26
|
+
drop_old: bool = False,
|
27
|
+
**kwargs,
|
28
|
+
):
|
29
|
+
self.name = "LanceDB"
|
30
|
+
self.db_config = db_config
|
31
|
+
self.case_config = db_case_config
|
32
|
+
self.table_name = collection_name
|
33
|
+
self.dim = dim
|
34
|
+
self.uri = db_config["uri"]
|
35
|
+
|
36
|
+
db = lancedb.connect(self.uri)
|
37
|
+
|
38
|
+
if drop_old:
|
39
|
+
try:
|
40
|
+
db.drop_table(self.table_name)
|
41
|
+
except Exception as e:
|
42
|
+
log.warning(f"Failed to drop table {self.table_name}: {e}")
|
43
|
+
|
44
|
+
try:
|
45
|
+
db.open_table(self.table_name)
|
46
|
+
except Exception:
|
47
|
+
schema = pa.schema(
|
48
|
+
[pa.field("id", pa.int64()), pa.field("vector", pa.list_(pa.float64(), list_size=self.dim))]
|
49
|
+
)
|
50
|
+
db.create_table(self.table_name, schema=schema, mode="overwrite")
|
51
|
+
|
52
|
+
@contextmanager
|
53
|
+
def init(self):
|
54
|
+
self.db = lancedb.connect(self.uri)
|
55
|
+
self.table = self.db.open_table(self.table_name)
|
56
|
+
yield
|
57
|
+
self.db = None
|
58
|
+
self.table = None
|
59
|
+
|
60
|
+
def insert_embeddings(
|
61
|
+
self,
|
62
|
+
embeddings: list[list[float]],
|
63
|
+
metadata: list[int],
|
64
|
+
) -> tuple[int, Exception | None]:
|
65
|
+
try:
|
66
|
+
data = [{"id": meta, "vector": emb} for meta, emb in zip(metadata, embeddings, strict=False)]
|
67
|
+
self.table.add(data)
|
68
|
+
return len(metadata), None
|
69
|
+
except Exception as e:
|
70
|
+
log.warning(f"Failed to insert data into LanceDB table ({self.table_name}), error: {e}")
|
71
|
+
return 0, e
|
72
|
+
|
73
|
+
def search_embedding(
|
74
|
+
self,
|
75
|
+
query: list[float],
|
76
|
+
k: int = 100,
|
77
|
+
filters: dict | None = None,
|
78
|
+
) -> list[int]:
|
79
|
+
if filters:
|
80
|
+
results = self.table.search(query).where(f"id >= {filters['id']}", prefilter=True).limit(k).to_list()
|
81
|
+
else:
|
82
|
+
results = self.table.search(query).limit(k).to_list()
|
83
|
+
return [int(result["id"]) for result in results]
|
84
|
+
|
85
|
+
def optimize(self, data_size: int | None = None):
|
86
|
+
if self.table and hasattr(self, "case_config") and self.case_config.index != IndexType.NONE:
|
87
|
+
log.info(f"Creating index for LanceDB table ({self.table_name})")
|
88
|
+
self.table.create_index(**self.case_config.index_param())
|
89
|
+
# Better recall with IVF_PQ (though still bad) but breaks HNSW: https://github.com/lancedb/lancedb/issues/2369
|
90
|
+
if self.case_config.index in (IndexType.IVFPQ, IndexType.AUTOINDEX):
|
91
|
+
self.table.optimize()
|
vectordb_bench/cli/cli.py
CHANGED
@@ -405,6 +405,7 @@ class CommonTypedDict(TypedDict):
|
|
405
405
|
show_default=True,
|
406
406
|
),
|
407
407
|
]
|
408
|
+
task_label: Annotated[str, click.option("--task-label", help="Task label")]
|
408
409
|
|
409
410
|
|
410
411
|
class HNSWBaseTypedDict(TypedDict):
|
@@ -499,10 +500,11 @@ def run(
|
|
499
500
|
parameters["search_concurrent"],
|
500
501
|
),
|
501
502
|
)
|
503
|
+
task_label = parameters["task_label"]
|
502
504
|
|
503
505
|
log.info(f"Task:\n{pformat(task)}\n")
|
504
506
|
if not parameters["dry_run"]:
|
505
|
-
benchmark_runner.run([task])
|
507
|
+
benchmark_runner.run([task], task_label)
|
506
508
|
time.sleep(5)
|
507
509
|
if global_result_future:
|
508
510
|
wait([global_result_future])
|
@@ -1,6 +1,7 @@
|
|
1
1
|
from ..backend.clients.alloydb.cli import AlloyDBScaNN
|
2
2
|
from ..backend.clients.aws_opensearch.cli import AWSOpenSearch
|
3
3
|
from ..backend.clients.clickhouse.cli import Clickhouse
|
4
|
+
from ..backend.clients.lancedb.cli import LanceDB
|
4
5
|
from ..backend.clients.mariadb.cli import MariaDBHNSW
|
5
6
|
from ..backend.clients.memorydb.cli import MemoryDB
|
6
7
|
from ..backend.clients.milvus.cli import MilvusAutoIndex
|
@@ -33,6 +34,7 @@ cli.add_command(MariaDBHNSW)
|
|
33
34
|
cli.add_command(TiDB)
|
34
35
|
cli.add_command(Clickhouse)
|
35
36
|
cli.add_command(Vespa)
|
37
|
+
cli.add_command(LanceDB)
|
36
38
|
|
37
39
|
|
38
40
|
if __name__ == "__main__":
|
@@ -1491,6 +1491,127 @@ VespaLoadingConfig = [
|
|
1491
1491
|
]
|
1492
1492
|
VespaPerformanceConfig = VespaLoadingConfig
|
1493
1493
|
|
1494
|
+
CaseConfigParamInput_IndexType_LanceDB = CaseConfigInput(
|
1495
|
+
label=CaseConfigParamType.IndexType,
|
1496
|
+
inputHelp="AUTOINDEX = IVFPQ with default parameters",
|
1497
|
+
inputType=InputType.Option,
|
1498
|
+
inputConfig={
|
1499
|
+
"options": [
|
1500
|
+
IndexType.NONE.value,
|
1501
|
+
IndexType.AUTOINDEX.value,
|
1502
|
+
IndexType.IVFPQ.value,
|
1503
|
+
IndexType.HNSW.value,
|
1504
|
+
],
|
1505
|
+
},
|
1506
|
+
)
|
1507
|
+
|
1508
|
+
CaseConfigParamInput_num_partitions_LanceDB = CaseConfigInput(
|
1509
|
+
label=CaseConfigParamType.num_partitions,
|
1510
|
+
displayLabel="Number of Partitions",
|
1511
|
+
inputHelp="Number of partitions (clusters) for IVF_PQ. Default (when 0): sqrt(num_rows)",
|
1512
|
+
inputType=InputType.Number,
|
1513
|
+
inputConfig={
|
1514
|
+
"min": 0,
|
1515
|
+
"max": 10000,
|
1516
|
+
"value": 0,
|
1517
|
+
},
|
1518
|
+
isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None) == IndexType.IVFPQ.value
|
1519
|
+
or config.get(CaseConfigParamType.IndexType, None) == IndexType.HNSW.value,
|
1520
|
+
)
|
1521
|
+
|
1522
|
+
CaseConfigParamInput_num_sub_vectors_LanceDB = CaseConfigInput(
|
1523
|
+
label=CaseConfigParamType.num_sub_vectors,
|
1524
|
+
displayLabel="Number of Sub-vectors",
|
1525
|
+
inputHelp="Number of sub-vectors for PQ. Default (when 0): dim/16 or dim/8",
|
1526
|
+
inputType=InputType.Number,
|
1527
|
+
inputConfig={
|
1528
|
+
"min": 0,
|
1529
|
+
"max": 1000,
|
1530
|
+
"value": 0,
|
1531
|
+
},
|
1532
|
+
isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None) == IndexType.IVFPQ.value
|
1533
|
+
or config.get(CaseConfigParamType.IndexType, None) == IndexType.HNSW.value,
|
1534
|
+
)
|
1535
|
+
|
1536
|
+
CaseConfigParamInput_num_bits_LanceDB = CaseConfigInput(
|
1537
|
+
label=CaseConfigParamType.nbits,
|
1538
|
+
displayLabel="Number of Bits",
|
1539
|
+
inputHelp="Number of bits per sub-vector.",
|
1540
|
+
inputType=InputType.Option,
|
1541
|
+
inputConfig={
|
1542
|
+
"options": [4, 8],
|
1543
|
+
},
|
1544
|
+
isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None) == IndexType.IVFPQ.value
|
1545
|
+
or config.get(CaseConfigParamType.IndexType, None) == IndexType.HNSW.value,
|
1546
|
+
)
|
1547
|
+
|
1548
|
+
CaseConfigParamInput_sample_rate_LanceDB = CaseConfigInput(
|
1549
|
+
label=CaseConfigParamType.sample_rate,
|
1550
|
+
displayLabel="Sample Rate",
|
1551
|
+
inputHelp="Sample rate for training. Higher values are more accurate but slower",
|
1552
|
+
inputType=InputType.Number,
|
1553
|
+
inputConfig={
|
1554
|
+
"min": 16,
|
1555
|
+
"max": 1024,
|
1556
|
+
"value": 256,
|
1557
|
+
},
|
1558
|
+
isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None) == IndexType.IVFPQ.value
|
1559
|
+
or config.get(CaseConfigParamType.IndexType, None) == IndexType.HNSW.value,
|
1560
|
+
)
|
1561
|
+
|
1562
|
+
CaseConfigParamInput_max_iterations_LanceDB = CaseConfigInput(
|
1563
|
+
label=CaseConfigParamType.max_iterations,
|
1564
|
+
displayLabel="Max Iterations",
|
1565
|
+
inputHelp="Maximum iterations for k-means clustering",
|
1566
|
+
inputType=InputType.Number,
|
1567
|
+
inputConfig={
|
1568
|
+
"min": 10,
|
1569
|
+
"max": 200,
|
1570
|
+
"value": 50,
|
1571
|
+
},
|
1572
|
+
isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None) == IndexType.IVFPQ.value
|
1573
|
+
or config.get(CaseConfigParamType.IndexType, None) == IndexType.HNSW.value,
|
1574
|
+
)
|
1575
|
+
|
1576
|
+
CaseConfigParamInput_m_LanceDB = CaseConfigInput(
|
1577
|
+
label=CaseConfigParamType.m,
|
1578
|
+
displayLabel="m",
|
1579
|
+
inputHelp="m parameter in HNSW",
|
1580
|
+
inputType=InputType.Number,
|
1581
|
+
inputConfig={
|
1582
|
+
"min": 0,
|
1583
|
+
"max": 1000,
|
1584
|
+
"value": 0,
|
1585
|
+
},
|
1586
|
+
isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None) == IndexType.HNSW.value,
|
1587
|
+
)
|
1588
|
+
|
1589
|
+
CaseConfigParamInput_ef_construction_LanceDB = CaseConfigInput(
|
1590
|
+
label=CaseConfigParamType.ef_construction,
|
1591
|
+
displayLabel="ef_construction",
|
1592
|
+
inputHelp="ef_construction parameter in HNSW",
|
1593
|
+
inputType=InputType.Number,
|
1594
|
+
inputConfig={
|
1595
|
+
"min": 0,
|
1596
|
+
"max": 1000,
|
1597
|
+
"value": 0,
|
1598
|
+
},
|
1599
|
+
isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None) == IndexType.HNSW.value,
|
1600
|
+
)
|
1601
|
+
|
1602
|
+
LanceDBLoadConfig = [
|
1603
|
+
CaseConfigParamInput_IndexType_LanceDB,
|
1604
|
+
CaseConfigParamInput_num_partitions_LanceDB,
|
1605
|
+
CaseConfigParamInput_num_sub_vectors_LanceDB,
|
1606
|
+
CaseConfigParamInput_num_bits_LanceDB,
|
1607
|
+
CaseConfigParamInput_sample_rate_LanceDB,
|
1608
|
+
CaseConfigParamInput_max_iterations_LanceDB,
|
1609
|
+
CaseConfigParamInput_m_LanceDB,
|
1610
|
+
CaseConfigParamInput_ef_construction_LanceDB,
|
1611
|
+
]
|
1612
|
+
|
1613
|
+
LanceDBPerformanceConfig = LanceDBLoadConfig
|
1614
|
+
|
1494
1615
|
CASE_CONFIG_MAP = {
|
1495
1616
|
DB.Milvus: {
|
1496
1617
|
CaseLabel.Load: MilvusLoadConfig,
|
@@ -1551,4 +1672,8 @@ CASE_CONFIG_MAP = {
|
|
1551
1672
|
CaseLabel.Load: VespaLoadingConfig,
|
1552
1673
|
CaseLabel.Performance: VespaPerformanceConfig,
|
1553
1674
|
},
|
1675
|
+
DB.LanceDB: {
|
1676
|
+
CaseLabel.Load: LanceDBLoadConfig,
|
1677
|
+
CaseLabel.Performance: LanceDBPerformanceConfig,
|
1678
|
+
},
|
1554
1679
|
}
|
@@ -49,6 +49,7 @@ DB_TO_ICON = {
|
|
49
49
|
DB.AWSOpenSearch: "https://assets.zilliz.com/opensearch_1eee37584e.jpeg",
|
50
50
|
DB.TiDB: "https://img2.pingcap.com/forms/3/d/3d7fd5f9767323d6f037795704211ac44b4923d6.png",
|
51
51
|
DB.Vespa: "https://vespa.ai/vespa-content/uploads/2025/01/Vespa-symbol-green-rgb.png.webp",
|
52
|
+
DB.LanceDB: "https://raw.githubusercontent.com/lancedb/lancedb/main/docs/src/assets/logo.png",
|
52
53
|
}
|
53
54
|
|
54
55
|
# RedisCloud color: #0D6EFD
|
vectordb_bench/models.py
CHANGED
@@ -12,6 +12,7 @@ from .backend.clients import (
|
|
12
12
|
DB,
|
13
13
|
DBCaseConfig,
|
14
14
|
DBConfig,
|
15
|
+
EmptyDBCaseConfig,
|
15
16
|
)
|
16
17
|
from .base import BaseModel
|
17
18
|
from .metric import Metric
|
@@ -96,6 +97,9 @@ class CaseConfigParamType(Enum):
|
|
96
97
|
maxNumPrefetchDatasets = "max_num_prefetch_datasets"
|
97
98
|
storage_engine = "storage_engine"
|
98
99
|
max_cache_size = "max_cache_size"
|
100
|
+
num_partitions = "num_partitions"
|
101
|
+
num_sub_vectors = "num_sub_vectors"
|
102
|
+
sample_rate = "sample_rate"
|
99
103
|
|
100
104
|
# mongodb params
|
101
105
|
mongodb_quantization_type = "quantization"
|
@@ -247,13 +251,19 @@ class TestResult(BaseModel):
|
|
247
251
|
test_result["task_label"] = test_result["run_id"]
|
248
252
|
|
249
253
|
for case_result in test_result["results"]:
|
250
|
-
task_config = case_result
|
251
|
-
db = DB(task_config
|
254
|
+
task_config = case_result["task_config"]
|
255
|
+
db = DB(task_config["db"])
|
252
256
|
|
253
257
|
task_config["db_config"] = db.config_cls(**task_config["db_config"])
|
254
|
-
|
255
|
-
|
256
|
-
|
258
|
+
|
259
|
+
# Safely instantiate DBCaseConfig (fallback to EmptyDBCaseConfig on None)
|
260
|
+
raw_case_cfg = task_config.get("db_case_config") or {}
|
261
|
+
index_value = raw_case_cfg.get("index", None)
|
262
|
+
try:
|
263
|
+
task_config["db_case_config"] = db.case_config_cls(index_type=index_value)(**raw_case_cfg)
|
264
|
+
except Exception:
|
265
|
+
log.exception(f"Couldn't get class for index '{index_value}' ({full_path})")
|
266
|
+
task_config["db_case_config"] = EmptyDBCaseConfig(**raw_case_cfg)
|
257
267
|
|
258
268
|
case_result["task_config"] = task_config
|
259
269
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: vectordb-bench
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.28
|
4
4
|
Summary: VectorDBBench is not just an offering of benchmark results for mainstream vector databases and cloud services, it's your go-to tool for the ultimate performance and cost-effectiveness comparison. Designed with ease-of-use in mind, VectorDBBench is devised to help users, even non-professionals, reproduce results or test new systems, making the hunt for the optimal choice amongst a plethora of cloud services and open-source vector databases a breeze.
|
5
5
|
Author-email: XuanYang-cn <xuan.yang@zilliz.com>
|
6
6
|
Project-URL: repository, https://github.com/zilliztech/VectorDBBench
|
@@ -52,6 +52,7 @@ Requires-Dist: mariadb; extra == "all"
|
|
52
52
|
Requires-Dist: PyMySQL; extra == "all"
|
53
53
|
Requires-Dist: clickhouse-connect; extra == "all"
|
54
54
|
Requires-Dist: pyvespa; extra == "all"
|
55
|
+
Requires-Dist: lancedb; extra == "all"
|
55
56
|
Provides-Extra: qdrant
|
56
57
|
Requires-Dist: qdrant-client; extra == "qdrant"
|
57
58
|
Provides-Extra: pinecone
|
@@ -87,6 +88,8 @@ Provides-Extra: clickhouse
|
|
87
88
|
Requires-Dist: clickhouse-connect; extra == "clickhouse"
|
88
89
|
Provides-Extra: vespa
|
89
90
|
Requires-Dist: pyvespa; extra == "vespa"
|
91
|
+
Provides-Extra: lancedb
|
92
|
+
Requires-Dist: lancedb; extra == "lancedb"
|
90
93
|
Dynamic: license-file
|
91
94
|
|
92
95
|
# VectorDBBench: A Benchmark Tool for VectorDB
|
@@ -358,13 +361,13 @@ pip install -e '.[pinecone]'
|
|
358
361
|
```
|
359
362
|
### Run test server
|
360
363
|
```
|
361
|
-
|
364
|
+
python -m vectordb_bench
|
362
365
|
```
|
363
366
|
|
364
367
|
OR:
|
365
368
|
|
366
369
|
```shell
|
367
|
-
|
370
|
+
init_bench
|
368
371
|
```
|
369
372
|
|
370
373
|
OR:
|
@@ -381,13 +384,13 @@ After reopen the repository in container, run `python -m vectordb_bench` in the
|
|
381
384
|
|
382
385
|
### Check coding styles
|
383
386
|
```shell
|
384
|
-
|
387
|
+
make lint
|
385
388
|
```
|
386
389
|
|
387
390
|
To fix the coding styles automatically
|
388
391
|
|
389
392
|
```shell
|
390
|
-
|
393
|
+
make format
|
391
394
|
```
|
392
395
|
|
393
396
|
## How does it work?
|
@@ -4,7 +4,7 @@ vectordb_bench/base.py,sha256=AgavIF0P9ku_RmCRk1KKziba-wI4ZpA2aJvjJzNhRSs,129
|
|
4
4
|
vectordb_bench/interface.py,sha256=XaCjTgUeI17uVjsgOauPeVlkvnkuCyQOWyOaWhrgCt8,9811
|
5
5
|
vectordb_bench/log_util.py,sha256=wDNaU_JBBOfKi_Z4vq7LDa0kOlLjoNNzDX3VZQn_Dxo,3239
|
6
6
|
vectordb_bench/metric.py,sha256=pj-AxQHyIRHTaJY-wTIkTbC6TqEqMzt3kcEmMWEv71w,2063
|
7
|
-
vectordb_bench/models.py,sha256=
|
7
|
+
vectordb_bench/models.py,sha256=aQPO6sLFtNL0CU0JySZqrZIQ5SdckCj5SOI7EfyaLNM,11995
|
8
8
|
vectordb_bench/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
9
9
|
vectordb_bench/backend/assembler.py,sha256=6GInRT7yBgfTaIPmo-XMkYX4pA8PJQmjMQInynwaunE,2047
|
10
10
|
vectordb_bench/backend/cases.py,sha256=obDdY6g3p9Z2fog7qDwLLDuRMwo3LGQKMHsP66QZd2M,16296
|
@@ -13,8 +13,8 @@ vectordb_bench/backend/dataset.py,sha256=lH2Q01AEJxA-sYfZHzH2BM019mwuy9mB_i0VLhI
|
|
13
13
|
vectordb_bench/backend/result_collector.py,sha256=mpROVdZ-HChKBVyMV5TZ5v7YGRb69bvfT7Gezn5F5sY,819
|
14
14
|
vectordb_bench/backend/task_runner.py,sha256=vlaXB0_25-G9w1Lj-F0SrvJzhXT7ceDWGIb2aKRXukU,11488
|
15
15
|
vectordb_bench/backend/utils.py,sha256=R6THuJdZhiQYSSJTqv0Uegl2B20taV_QjwvFrun2yxE,1949
|
16
|
-
vectordb_bench/backend/clients/__init__.py,sha256=
|
17
|
-
vectordb_bench/backend/clients/api.py,sha256=
|
16
|
+
vectordb_bench/backend/clients/__init__.py,sha256=4P4Y7qOIYBJqJENsfMNzD5L0C651ypcPr05M1-ph0LU,10549
|
17
|
+
vectordb_bench/backend/clients/api.py,sha256=3AfO-EPNzosaIBfYX3U9HeOMO7Uw0muOZ0x4cqqSH34,6534
|
18
18
|
vectordb_bench/backend/clients/aliyun_elasticsearch/aliyun_elasticsearch.py,sha256=7yPYaWoHeHNxDMtpReGXsdEPFD1e4vQblFor7TmLq5o,770
|
19
19
|
vectordb_bench/backend/clients/aliyun_elasticsearch/config.py,sha256=d9RCgfCgauKvy6z9ig_wBormgwiGtkh8POyoHloHnJA,505
|
20
20
|
vectordb_bench/backend/clients/aliyun_opensearch/aliyun_opensearch.py,sha256=rwa4rtbbP2Kaczh7Bf0bc_lE_sGG5w9PhtfdFu7rQNs,13237
|
@@ -33,6 +33,9 @@ vectordb_bench/backend/clients/clickhouse/clickhouse.py,sha256=nRTFE5KQn-_juKrtI
|
|
33
33
|
vectordb_bench/backend/clients/clickhouse/config.py,sha256=Bd2fqpaJU5YcPKTNOL0mzEFWpaoVyxVt_21-Jb_tHKk,2659
|
34
34
|
vectordb_bench/backend/clients/elastic_cloud/config.py,sha256=_5Cz3__CbMU7zCizkhK1pGhH3TLJacn8efVueUZ0lnQ,1573
|
35
35
|
vectordb_bench/backend/clients/elastic_cloud/elastic_cloud.py,sha256=FSslLDH2Yi9ZdUwaCbKC_IXxFbMvW-L1xB3YMU08MVI,5448
|
36
|
+
vectordb_bench/backend/clients/lancedb/cli.py,sha256=j5eqb-_CSWF1rdxAj2Byut6albHEj0JF51wCruaJsu8,2688
|
37
|
+
vectordb_bench/backend/clients/lancedb/config.py,sha256=Udd39FrYmIa9ZztmfAC0BLj0rBaPv3yd9XaF5VkCziU,2950
|
38
|
+
vectordb_bench/backend/clients/lancedb/lancedb.py,sha256=nylYVn7I2kLmKZACQoPf9CvGU-3g9vFNOkiLXZMBdjg,3018
|
36
39
|
vectordb_bench/backend/clients/mariadb/cli.py,sha256=nqV9V-gOSKGQ1y6VmxOMxGz0a3jz860Va55x7JBcuPk,2727
|
37
40
|
vectordb_bench/backend/clients/mariadb/config.py,sha256=DNxo0i1c0wIfii78Luv9GeOFq-74yvkkg3Np9sNUyFI,1870
|
38
41
|
vectordb_bench/backend/clients/mariadb/mariadb.py,sha256=O2PY7pP3dYdp-aTOQLDVckdNabCZscw5Xup7Z8LnWIg,7137
|
@@ -86,8 +89,8 @@ vectordb_bench/backend/runner/read_write_runner.py,sha256=CXYBXEEkS1S7-NurdzN5Wh
|
|
86
89
|
vectordb_bench/backend/runner/serial_runner.py,sha256=Y4Y2mSK8nU3hml7gliiF6BXUaW49sD-Ueci0xn62IL0,10290
|
87
90
|
vectordb_bench/backend/runner/util.py,sha256=tjTFUxth6hNnVrlU82TqkHhfeZo4ymj7WlyK4zFyPTg,522
|
88
91
|
vectordb_bench/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
89
|
-
vectordb_bench/cli/cli.py,sha256=
|
90
|
-
vectordb_bench/cli/vectordbbench.py,sha256=
|
92
|
+
vectordb_bench/cli/cli.py,sha256=Ik6n-4cIq_huHLucP6ug48Lc2uHlzaRmWQrsD9CRbPE,15568
|
93
|
+
vectordb_bench/cli/vectordbbench.py,sha256=5Ic2cVxn4aKx53N0pN7W9eguaJlC05jHd_SVRndfzHI,1516
|
91
94
|
vectordb_bench/config-files/sample_config.yml,sha256=yw9ZgHczNi9PedNuTVxZKiOTI6AVoQS1h8INNgoDjPk,340
|
92
95
|
vectordb_bench/custom/custom_case.json,sha256=uKo7NJgXDPPLtf_V6y1uc5w1aIcjLp-GCJEYOCty1As,475
|
93
96
|
vectordb_bench/frontend/utils.py,sha256=8eb4I9F0cQdnPQiFX0gMEk1e2fdgultgTKzzY5zS0Q0,489
|
@@ -116,9 +119,9 @@ vectordb_bench/frontend/components/run_test/hideSidebar.py,sha256=vb5kzIMmbMqWX6
|
|
116
119
|
vectordb_bench/frontend/components/run_test/initStyle.py,sha256=osPUgfFfH7rRlVNHSMumvmZxvKWlLxmZiNqgnMiUJEU,723
|
117
120
|
vectordb_bench/frontend/components/run_test/submitTask.py,sha256=VZjkopkCBNhqLwGqsoM0hbPEeF6Q5UOQcdFUaegerxc,4094
|
118
121
|
vectordb_bench/frontend/components/tables/data.py,sha256=5DdnC64BB7Aj2z9acht2atsPB4NabzQCZKALfIUnqtQ,1233
|
119
|
-
vectordb_bench/frontend/config/dbCaseConfigs.py,sha256=
|
122
|
+
vectordb_bench/frontend/config/dbCaseConfigs.py,sha256=DQrSuBVuTCjwS_I1hVNTnYygDu6Zkka7PLfLi7TNN3E,51023
|
120
123
|
vectordb_bench/frontend/config/dbPrices.py,sha256=10aBKjVcEg8y7TPSda28opmBM1KmXNrvbU9WM_BsZcE,176
|
121
|
-
vectordb_bench/frontend/config/styles.py,sha256=
|
124
|
+
vectordb_bench/frontend/config/styles.py,sha256=y-vYXCF4_o0-88BNzbKNKvfhvVxmz8BSr4v_E_Qv37E,2643
|
122
125
|
vectordb_bench/frontend/pages/concurrent.py,sha256=bvoSafRSIsRzBQkI3uBwwrdg8jnhRUQG-epZbrJhGiE,2082
|
123
126
|
vectordb_bench/frontend/pages/custom.py,sha256=j7oJ2FHBv5O50D7YbzXTLRuIDgwkGt0iEd0FRHHkYLw,2436
|
124
127
|
vectordb_bench/frontend/pages/quries_per_dollar.py,sha256=BDukiFwxyqQK_btCSsRR5D_a17PMu0yI8Muw3eRLz6Y,2461
|
@@ -142,9 +145,9 @@ vectordb_bench/results/WeaviateCloud/result_20230808_standard_weaviatecloud.json
|
|
142
145
|
vectordb_bench/results/ZillizCloud/result_20230727_standard_zillizcloud.json,sha256=-Mdm4By65XDRCrmVOCF8yQXjcZtH4Xo4shcjoDoBUKU,18293
|
143
146
|
vectordb_bench/results/ZillizCloud/result_20230808_standard_zillizcloud.json,sha256=77XlHT5zM_K7mG5HfDQKwXZnSCuR37VUbt6-P3J_amI,15737
|
144
147
|
vectordb_bench/results/ZillizCloud/result_20240105_standard_202401_zillizcloud.json,sha256=TualfJ0664Hs-vdIW68bdkqAEYyzotXmu2P0yIN-GHk,42526
|
145
|
-
vectordb_bench-0.0.
|
146
|
-
vectordb_bench-0.0.
|
147
|
-
vectordb_bench-0.0.
|
148
|
-
vectordb_bench-0.0.
|
149
|
-
vectordb_bench-0.0.
|
150
|
-
vectordb_bench-0.0.
|
148
|
+
vectordb_bench-0.0.28.dist-info/licenses/LICENSE,sha256=HXbxhrb5u5SegVzeLNF_voVgRsJMavcLaOmD1N0lZkM,1067
|
149
|
+
vectordb_bench-0.0.28.dist-info/METADATA,sha256=wz_jhR3BceHrcdbARNJeXnWXN8YNevrINi5GsZ2G_f8,38159
|
150
|
+
vectordb_bench-0.0.28.dist-info/WHEEL,sha256=0CuiUZ_p9E4cD6NyLD6UG80LBXYyiSYZOKDm5lp32xk,91
|
151
|
+
vectordb_bench-0.0.28.dist-info/entry_points.txt,sha256=Qzw6gVx96ui8esG21H6yHsI6nboEohRmV424TYhQNrA,113
|
152
|
+
vectordb_bench-0.0.28.dist-info/top_level.txt,sha256=jnhZFZAuKX1J60yt-XOeBZ__ctiZMvoC_s0RFq29lpM,15
|
153
|
+
vectordb_bench-0.0.28.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|