vectordb-bench 1.0.0__py3-none-any.whl → 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vectordb_bench/__main__.py +1 -1
- vectordb_bench/backend/clients/api.py +1 -1
- vectordb_bench/backend/clients/aws_opensearch/config.py +14 -0
- vectordb_bench/backend/clients/elastic_cloud/config.py +12 -0
- vectordb_bench/backend/clients/milvus/milvus.py +1 -1
- vectordb_bench/backend/clients/qdrant_cloud/config.py +14 -0
- vectordb_bench/backend/clients/zilliz_cloud/zilliz_cloud.py +1 -1
- vectordb_bench/backend/dataset.py +3 -0
- vectordb_bench/backend/task_runner.py +11 -0
- vectordb_bench/frontend/components/check_results/headerIcon.py +4 -2
- vectordb_bench/frontend/components/welcome/explainPrams.py +8 -8
- vectordb_bench/frontend/components/welcome/welcomePrams.py +1 -1
- vectordb_bench/frontend/config/dbCaseConfigs.py +3 -3
- vectordb_bench/frontend/config/styles.py +2 -2
- vectordb_bench/frontend/pages/results.py +5 -3
- vectordb_bench/interface.py +2 -1
- vectordb_bench/results/getLeaderboardDataV2.py +59 -0
- vectordb_bench/results/leaderboard_v2.json +2662 -0
- {vectordb_bench-1.0.0.dist-info → vectordb_bench-1.0.1.dist-info}/METADATA +10 -10
- {vectordb_bench-1.0.0.dist-info → vectordb_bench-1.0.1.dist-info}/RECORD +25 -23
- /vectordb_bench/frontend/{vdb_benchmark.py → vdbbench.py} +0 -0
- {vectordb_bench-1.0.0.dist-info → vectordb_bench-1.0.1.dist-info}/WHEEL +0 -0
- {vectordb_bench-1.0.0.dist-info → vectordb_bench-1.0.1.dist-info}/entry_points.txt +0 -0
- {vectordb_bench-1.0.0.dist-info → vectordb_bench-1.0.1.dist-info}/licenses/LICENSE +0 -0
- {vectordb_bench-1.0.0.dist-info → vectordb_bench-1.0.1.dist-info}/top_level.txt +0 -0
vectordb_bench/__main__.py
CHANGED
@@ -132,7 +132,7 @@ class VectorDB(ABC):
|
|
132
132
|
"""
|
133
133
|
|
134
134
|
"The filtering types supported by the VectorDB Client, default only non-filter"
|
135
|
-
supported_filter_types: list[FilterOp] = [FilterOp.NonFilter
|
135
|
+
supported_filter_types: list[FilterOp] = [FilterOp.NonFilter]
|
136
136
|
|
137
137
|
@classmethod
|
138
138
|
def filter_supported(cls, filters: Filter) -> bool:
|
@@ -75,6 +75,20 @@ class AWSOpenSearchIndexConfig(BaseModel, DBCaseConfig):
|
|
75
75
|
and self.quantization_type == obj.quantization_type
|
76
76
|
)
|
77
77
|
|
78
|
+
def __hash__(self) -> int:
|
79
|
+
return hash(
|
80
|
+
(
|
81
|
+
self.engine,
|
82
|
+
self.M,
|
83
|
+
self.efConstruction,
|
84
|
+
self.number_of_shards,
|
85
|
+
self.number_of_replicas,
|
86
|
+
self.number_of_segments,
|
87
|
+
self.use_routing,
|
88
|
+
self.quantization_type,
|
89
|
+
)
|
90
|
+
)
|
91
|
+
|
78
92
|
def parse_metric(self) -> str:
|
79
93
|
log.info(f"User specified metric_type: {self.metric_type_name}")
|
80
94
|
self.metric_type = MetricType[self.metric_type_name.upper()]
|
@@ -48,6 +48,18 @@ class ElasticCloudIndexConfig(BaseModel, DBCaseConfig):
|
|
48
48
|
and self.M == obj.M
|
49
49
|
)
|
50
50
|
|
51
|
+
def __hash__(self) -> int:
|
52
|
+
return hash(
|
53
|
+
(
|
54
|
+
self.index,
|
55
|
+
self.number_of_shards,
|
56
|
+
self.number_of_replicas,
|
57
|
+
self.use_routing,
|
58
|
+
self.efConstruction,
|
59
|
+
self.M,
|
60
|
+
)
|
61
|
+
)
|
62
|
+
|
51
63
|
def parse_metric(self) -> str:
|
52
64
|
if self.metric_type == MetricType.L2:
|
53
65
|
return "l2_norm"
|
@@ -29,7 +29,7 @@ class Milvus(VectorDB):
|
|
29
29
|
dim: int,
|
30
30
|
db_config: dict,
|
31
31
|
db_case_config: MilvusIndexConfig,
|
32
|
-
collection_name: str = "
|
32
|
+
collection_name: str = "VDBBench",
|
33
33
|
drop_old: bool = False,
|
34
34
|
name: str = "Milvus",
|
35
35
|
with_scalar_labels: bool = False,
|
@@ -63,6 +63,20 @@ class QdrantIndexConfig(BaseModel, DBCaseConfig):
|
|
63
63
|
and self.default_segment_number == obj.default_segment_number
|
64
64
|
)
|
65
65
|
|
66
|
+
def __hash__(self) -> int:
|
67
|
+
return hash(
|
68
|
+
(
|
69
|
+
self.m,
|
70
|
+
self.payload_m,
|
71
|
+
self.create_payload_int_index,
|
72
|
+
self.create_payload_keyword_index,
|
73
|
+
self.is_tenant,
|
74
|
+
self.use_scalar_quant,
|
75
|
+
self.sq_quantile,
|
76
|
+
self.default_segment_number,
|
77
|
+
)
|
78
|
+
)
|
79
|
+
|
66
80
|
def parse_metric(self) -> str:
|
67
81
|
if self.metric_type == MetricType.L2:
|
68
82
|
return "Euclid"
|
@@ -10,7 +10,7 @@ class ZillizCloud(Milvus):
|
|
10
10
|
dim: int,
|
11
11
|
db_config: dict,
|
12
12
|
db_case_config: DBCaseConfig,
|
13
|
-
collection_name: str = "
|
13
|
+
collection_name: str = "ZillizCloudVDBBench",
|
14
14
|
drop_old: bool = False,
|
15
15
|
name: str = "ZillizCloud",
|
16
16
|
**kwargs,
|
@@ -242,6 +242,9 @@ class DatasetManager(BaseModel):
|
|
242
242
|
return self.data.name == obj.data.name and self.data.label == obj.data.label
|
243
243
|
return False
|
244
244
|
|
245
|
+
def __hash__(self) -> int:
|
246
|
+
return hash((self.data.name, self.data.label))
|
247
|
+
|
245
248
|
def set_reader(self, reader: DatasetReader):
|
246
249
|
self.reader = reader
|
247
250
|
|
@@ -59,6 +59,17 @@ class CaseRunner(BaseModel):
|
|
59
59
|
)
|
60
60
|
return False
|
61
61
|
|
62
|
+
def __hash__(self) -> int:
|
63
|
+
"""Hash method to maintain consistency with __eq__ method."""
|
64
|
+
return hash(
|
65
|
+
(
|
66
|
+
self.ca.label,
|
67
|
+
self.config.db,
|
68
|
+
self.config.db_case_config,
|
69
|
+
self.ca.dataset,
|
70
|
+
)
|
71
|
+
)
|
72
|
+
|
62
73
|
def display(self) -> dict:
|
63
74
|
c_dict = self.ca.dict(
|
64
75
|
include={
|
@@ -4,7 +4,7 @@ from vectordb_bench.frontend.config.styles import HEADER_ICON
|
|
4
4
|
def drawHeaderIcon(st):
|
5
5
|
st.markdown(
|
6
6
|
f"""
|
7
|
-
<a href="/
|
7
|
+
<a href="/vdbbench" target="_self">
|
8
8
|
<div class="headerIconContainer"></div>
|
9
9
|
</a>
|
10
10
|
|
@@ -16,8 +16,10 @@ def drawHeaderIcon(st):
|
|
16
16
|
width: 100%;
|
17
17
|
border-bottom: 2px solid #E8EAEE;
|
18
18
|
background-image: url({HEADER_ICON});
|
19
|
+
background-size: contain;
|
20
|
+
background-position: left top;
|
19
21
|
background-repeat: no-repeat;
|
20
|
-
cursor: pointer;
|
22
|
+
cursor: pointer;
|
21
23
|
}}
|
22
24
|
</style>
|
23
25
|
""",
|
@@ -1,16 +1,16 @@
|
|
1
1
|
def explainPrams(st):
|
2
2
|
st.markdown("## descriptions")
|
3
|
-
st.markdown("### 1.Overview")
|
3
|
+
st.markdown("### 1. Overview")
|
4
4
|
st.markdown(
|
5
5
|
"""
|
6
|
-
- **VectorDBBench** is an open-source benchmarking tool designed specifically for vector databases. Its main features include:
|
6
|
+
- **VectorDBBench(VDBBench)** is an open-source benchmarking tool designed specifically for vector databases. Its main features include:
|
7
7
|
- (1) An easy-to-use **web UI** for configuration of tests and visual analysis of results.
|
8
8
|
- (2) A comprehensive set of **standards for testing and metric collection**.
|
9
9
|
- (3) Support for **various scenarios**, including additional support for **Filter** and **Streaming** based on standard tests.
|
10
|
-
-
|
10
|
+
- VDBBench embraces open-source and welcome contributions of code and test result submissions. The testing process and extended scenarios of VDBBench, as well as the intention behind our design will be introduced as follows.
|
11
11
|
"""
|
12
12
|
)
|
13
|
-
st.markdown("### 2.Dataset")
|
13
|
+
st.markdown("### 2. Dataset")
|
14
14
|
st.markdown(
|
15
15
|
"""
|
16
16
|
- We provide two embedding datasets:
|
@@ -19,7 +19,7 @@ def explainPrams(st):
|
|
19
19
|
- (3)*OpenAI 1536dim*, generated using the **OpenAI** model based on the [C4 corpus](https://huggingface.co/datasets/legacy-datasets/c4).
|
20
20
|
"""
|
21
21
|
)
|
22
|
-
st.markdown("### 3.Standard Test")
|
22
|
+
st.markdown("### 3. Standard Test")
|
23
23
|
st.markdown(
|
24
24
|
"""
|
25
25
|
The test is actually divided into 3 sub-processes
|
@@ -42,19 +42,19 @@ The test is actually divided into 3 sub-processes
|
|
42
42
|
""",
|
43
43
|
unsafe_allow_html=True,
|
44
44
|
)
|
45
|
-
st.markdown("### 4.Filter Search Test")
|
45
|
+
st.markdown("### 4. Filter Search Test")
|
46
46
|
st.markdown(
|
47
47
|
"""
|
48
48
|
- Compared to the Standard Test, the **Filter Search** introduces additional scalar constraints (e.g. **color == red**) during the Search Test. Different **filter_ratios** present varying levels of challenge to the VectorDB's search performance.
|
49
49
|
- We provide an additional **string column** containing 10 labels with different distribution ratios (50%,20%,10%,5%,2%,1%,0.5%,0.2%,0.1%). For each label, we conduct both a **Serial Test** and a **Concurrency Test** to observe the VectorDB's performance in terms of **QPS, latency, and recall** under different filtering conditions.
|
50
50
|
"""
|
51
51
|
)
|
52
|
-
st.markdown("### 5.Streaming Search Test")
|
52
|
+
st.markdown("### 5. Streaming Search Test")
|
53
53
|
st.markdown(
|
54
54
|
"""
|
55
55
|
Different from Standard's load and search separation, Streaming Search Test primarily focuses on **search performance during the insertion process**.
|
56
56
|
Different **base dataset sizes** and varying **insertion rates** set distinct challenges to the VectorDB's search capabilities.
|
57
|
-
|
57
|
+
VDBBench will send insert requests at a **fixed rate**, maintaining consistent insertion pressure. The search test consists of three steps as follows:
|
58
58
|
- 1.**Streaming Search**
|
59
59
|
- Users can configure **multiple search stages**. When the inserted data volume reaches a specified stage, a **Serial Test** and a **Concurrent Test** will be conducted, recording qps, latency, and recall performance.
|
60
60
|
- 2.**Streaming Final Search**
|
@@ -520,7 +520,7 @@ CaseConfigParamInput_M = CaseConfigInput(
|
|
520
520
|
inputConfig={
|
521
521
|
"min": 4,
|
522
522
|
"max": 64,
|
523
|
-
"value":
|
523
|
+
"value": 16,
|
524
524
|
},
|
525
525
|
isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None)
|
526
526
|
in [
|
@@ -550,7 +550,7 @@ CaseConfigParamInput_EFConstruction_Milvus = CaseConfigInput(
|
|
550
550
|
inputConfig={
|
551
551
|
"min": 8,
|
552
552
|
"max": 512,
|
553
|
-
"value":
|
553
|
+
"value": 256,
|
554
554
|
},
|
555
555
|
isDisplayed=lambda config: config[CaseConfigParamType.IndexType]
|
556
556
|
in [
|
@@ -1441,7 +1441,7 @@ CaseConfigParamInput_Milvus_use_partition_key = CaseConfigInput(
|
|
1441
1441
|
label=CaseConfigParamType.use_partition_key,
|
1442
1442
|
inputType=InputType.Option,
|
1443
1443
|
inputHelp="whether to use partition_key for label-filter cases. only works in label-filter cases",
|
1444
|
-
inputConfig={"options": [
|
1444
|
+
inputConfig={"options": [False, True]},
|
1445
1445
|
)
|
1446
1446
|
|
1447
1447
|
|
@@ -33,8 +33,8 @@ MAX_AUTO_REFRESH_COUNT = 999999
|
|
33
33
|
MAX_AUTO_REFRESH_INTERVAL = 5000 # 5s
|
34
34
|
|
35
35
|
PAGE_TITLE = "VectorDB Benchmark"
|
36
|
-
FAVICON = "https://assets.zilliz.com/
|
37
|
-
HEADER_ICON = "https://assets.zilliz.com/
|
36
|
+
FAVICON = "https://assets.zilliz.com/VDB_Bench_icon_d3276bedc4.png"
|
37
|
+
HEADER_ICON = "https://assets.zilliz.com/VDB_Bench_text_icon_6c5f52a458.png"
|
38
38
|
|
39
39
|
# RedisCloud icon: https://assets.zilliz.com/Redis_Cloud_74b8bfef39.png
|
40
40
|
# Elasticsearch icon: https://assets.zilliz.com/elasticsearch_beffeadc29.png
|
@@ -30,10 +30,12 @@ def main():
|
|
30
30
|
|
31
31
|
st.title("Vector Database Benchmark")
|
32
32
|
st.caption(
|
33
|
-
"
|
33
|
+
"Choose your desired test results to display from the sidebar. "
|
34
|
+
"For your reference, we've included two standard benchmarks tested by our team. "
|
35
|
+
"Note that `standard_2025` was tested in 2025; the others in 2023. "
|
36
|
+
"Unless explicitly labeled as distributed multi-node, test with single-node mode by default."
|
34
37
|
)
|
35
|
-
st.caption("
|
36
|
-
|
38
|
+
st.caption("We welcome community contributions for better results, parameter configurations, and optimizations.")
|
37
39
|
# results selector and filter
|
38
40
|
resultSelectorContainer = st.sidebar.container()
|
39
41
|
shownData, failedTasks, showCaseNames = getshownData(resultSelectorContainer, allResults)
|
vectordb_bench/interface.py
CHANGED
@@ -95,7 +95,8 @@ class BenchMarkRunner:
|
|
95
95
|
|
96
96
|
return self._run_async(send_conn)
|
97
97
|
|
98
|
-
|
98
|
+
@staticmethod
|
99
|
+
def get_results(result_dir: pathlib.Path | None = None) -> list[TestResult]:
|
99
100
|
"""results of all runs, each TestResult represents one run."""
|
100
101
|
target_dir = result_dir if result_dir else config.RESULTS_LOCAL_DIR
|
101
102
|
return ResultCollector.collect(target_dir)
|
@@ -0,0 +1,59 @@
|
|
1
|
+
import json
|
2
|
+
import logging
|
3
|
+
|
4
|
+
|
5
|
+
from vectordb_bench.backend.cases import CaseType
|
6
|
+
from vectordb_bench.backend.clients import DB
|
7
|
+
from vectordb_bench.models import CaseResult
|
8
|
+
from vectordb_bench import config
|
9
|
+
|
10
|
+
logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - %(levelname)s - %(message)s")
|
11
|
+
|
12
|
+
from vectordb_bench.interface import BenchMarkRunner
|
13
|
+
|
14
|
+
|
15
|
+
def get_standard_2025_results() -> list[CaseResult]:
|
16
|
+
all_results = BenchMarkRunner.get_results()
|
17
|
+
standard_2025_case_results = []
|
18
|
+
for result in all_results:
|
19
|
+
if result.task_label == "standard_2025":
|
20
|
+
standard_2025_case_results += result.results
|
21
|
+
return standard_2025_case_results
|
22
|
+
|
23
|
+
|
24
|
+
def save_to_json(data: list[dict], file_name: str):
|
25
|
+
with open(file_name, "w") as f:
|
26
|
+
json.dump(data, f, indent=4)
|
27
|
+
|
28
|
+
|
29
|
+
def main():
|
30
|
+
standard_2025_case_results = get_standard_2025_results()
|
31
|
+
data = []
|
32
|
+
for case_result in standard_2025_case_results:
|
33
|
+
db = case_result.task_config.db
|
34
|
+
label = case_result.task_config.db_config.db_label
|
35
|
+
metrics = case_result.metrics
|
36
|
+
qps = metrics.qps
|
37
|
+
latency = metrics.serial_latency_p99
|
38
|
+
recall = metrics.recall
|
39
|
+
case = case_result.task_config.case_config.case
|
40
|
+
filter_ratio = case.filters.filter_rate
|
41
|
+
dataset = case.dataset.data.full_name
|
42
|
+
if case.case_id != CaseType.StreamingPerformanceCase:
|
43
|
+
data.append(
|
44
|
+
{
|
45
|
+
"dataset": dataset,
|
46
|
+
"db": db.value,
|
47
|
+
"label": label,
|
48
|
+
"db_name": f"{db.value}-{label}",
|
49
|
+
"qps": round(qps, 4),
|
50
|
+
"latency": round(latency, 4),
|
51
|
+
"recall": round(recall, 4),
|
52
|
+
"filter_ratio": round(filter_ratio, 2),
|
53
|
+
}
|
54
|
+
)
|
55
|
+
save_to_json(data, config.RESULTS_LOCAL_DIR / "leaderboard_v2.json")
|
56
|
+
|
57
|
+
|
58
|
+
if __name__ == "__main__":
|
59
|
+
main()
|