vectordb-bench 0.0.30__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vectordb_bench/__init__.py +14 -27
- vectordb_bench/backend/assembler.py +19 -6
- vectordb_bench/backend/cases.py +186 -23
- vectordb_bench/backend/clients/__init__.py +16 -0
- vectordb_bench/backend/clients/api.py +22 -1
- vectordb_bench/backend/clients/aws_opensearch/aws_opensearch.py +82 -41
- vectordb_bench/backend/clients/aws_opensearch/config.py +23 -4
- vectordb_bench/backend/clients/chroma/chroma.py +6 -2
- vectordb_bench/backend/clients/elastic_cloud/config.py +19 -1
- vectordb_bench/backend/clients/elastic_cloud/elastic_cloud.py +133 -45
- vectordb_bench/backend/clients/milvus/config.py +1 -0
- vectordb_bench/backend/clients/milvus/milvus.py +74 -22
- vectordb_bench/backend/clients/oceanbase/cli.py +100 -0
- vectordb_bench/backend/clients/oceanbase/config.py +125 -0
- vectordb_bench/backend/clients/oceanbase/oceanbase.py +215 -0
- vectordb_bench/backend/clients/pinecone/pinecone.py +39 -25
- vectordb_bench/backend/clients/qdrant_cloud/config.py +59 -3
- vectordb_bench/backend/clients/qdrant_cloud/qdrant_cloud.py +100 -33
- vectordb_bench/backend/dataset.py +143 -27
- vectordb_bench/backend/filter.py +76 -0
- vectordb_bench/backend/runner/__init__.py +3 -3
- vectordb_bench/backend/runner/mp_runner.py +52 -39
- vectordb_bench/backend/runner/rate_runner.py +68 -52
- vectordb_bench/backend/runner/read_write_runner.py +125 -68
- vectordb_bench/backend/runner/serial_runner.py +56 -23
- vectordb_bench/backend/task_runner.py +48 -20
- vectordb_bench/cli/cli.py +59 -1
- vectordb_bench/cli/vectordbbench.py +3 -0
- vectordb_bench/frontend/components/check_results/data.py +16 -11
- vectordb_bench/frontend/components/check_results/filters.py +53 -25
- vectordb_bench/frontend/components/check_results/headerIcon.py +16 -13
- vectordb_bench/frontend/components/check_results/nav.py +20 -0
- vectordb_bench/frontend/components/custom/displayCustomCase.py +43 -8
- vectordb_bench/frontend/components/custom/displaypPrams.py +10 -5
- vectordb_bench/frontend/components/custom/getCustomConfig.py +10 -0
- vectordb_bench/frontend/components/label_filter/charts.py +60 -0
- vectordb_bench/frontend/components/run_test/caseSelector.py +48 -52
- vectordb_bench/frontend/components/run_test/dbSelector.py +9 -5
- vectordb_bench/frontend/components/run_test/inputWidget.py +48 -0
- vectordb_bench/frontend/components/run_test/submitTask.py +3 -1
- vectordb_bench/frontend/components/streaming/charts.py +253 -0
- vectordb_bench/frontend/components/streaming/data.py +62 -0
- vectordb_bench/frontend/components/tables/data.py +1 -1
- vectordb_bench/frontend/components/welcome/explainPrams.py +66 -0
- vectordb_bench/frontend/components/welcome/pagestyle.py +106 -0
- vectordb_bench/frontend/components/welcome/welcomePrams.py +147 -0
- vectordb_bench/frontend/config/dbCaseConfigs.py +307 -40
- vectordb_bench/frontend/config/styles.py +32 -2
- vectordb_bench/frontend/pages/concurrent.py +5 -1
- vectordb_bench/frontend/pages/custom.py +4 -0
- vectordb_bench/frontend/pages/label_filter.py +56 -0
- vectordb_bench/frontend/pages/quries_per_dollar.py +5 -1
- vectordb_bench/frontend/pages/results.py +60 -0
- vectordb_bench/frontend/pages/run_test.py +3 -3
- vectordb_bench/frontend/pages/streaming.py +135 -0
- vectordb_bench/frontend/pages/tables.py +4 -0
- vectordb_bench/frontend/vdb_benchmark.py +16 -41
- vectordb_bench/interface.py +6 -2
- vectordb_bench/metric.py +15 -1
- vectordb_bench/models.py +31 -11
- vectordb_bench/results/ElasticCloud/result_20250318_standard_elasticcloud.json +5890 -0
- vectordb_bench/results/Milvus/result_20250509_standard_milvus.json +6138 -0
- vectordb_bench/results/OpenSearch/result_20250224_standard_opensearch.json +7319 -0
- vectordb_bench/results/Pinecone/result_20250124_standard_pinecone.json +2365 -0
- vectordb_bench/results/QdrantCloud/result_20250602_standard_qdrantcloud.json +3556 -0
- vectordb_bench/results/ZillizCloud/result_20250613_standard_zillizcloud.json +6290 -0
- vectordb_bench/results/dbPrices.json +12 -4
- {vectordb_bench-0.0.30.dist-info → vectordb_bench-1.0.0.dist-info}/METADATA +85 -32
- {vectordb_bench-0.0.30.dist-info → vectordb_bench-1.0.0.dist-info}/RECORD +73 -56
- vectordb_bench/results/ZillizCloud/result_20230727_standard_zillizcloud.json +0 -791
- vectordb_bench/results/ZillizCloud/result_20230808_standard_zillizcloud.json +0 -679
- vectordb_bench/results/ZillizCloud/result_20240105_standard_202401_zillizcloud.json +0 -1352
- {vectordb_bench-0.0.30.dist-info → vectordb_bench-1.0.0.dist-info}/WHEEL +0 -0
- {vectordb_bench-0.0.30.dist-info → vectordb_bench-1.0.0.dist-info}/entry_points.txt +0 -0
- {vectordb_bench-0.0.30.dist-info → vectordb_bench-1.0.0.dist-info}/licenses/LICENSE +0 -0
- {vectordb_bench-0.0.30.dist-info → vectordb_bench-1.0.0.dist-info}/top_level.txt +0 -0
@@ -4,6 +4,7 @@ from vectordb_bench.frontend.components.check_results.headerIcon import drawHead
|
|
4
4
|
from vectordb_bench.frontend.components.check_results.nav import (
|
5
5
|
NavToResults,
|
6
6
|
NavToRunTest,
|
7
|
+
NavToPages,
|
7
8
|
)
|
8
9
|
from vectordb_bench.frontend.components.check_results.filters import getshownData
|
9
10
|
from vectordb_bench.frontend.components.concurrent.charts import drawChartsByCase
|
@@ -25,6 +26,9 @@ def main():
|
|
25
26
|
# header
|
26
27
|
drawHeaderIcon(st)
|
27
28
|
|
29
|
+
# navigate
|
30
|
+
NavToPages(st)
|
31
|
+
|
28
32
|
allResults = benchmark_runner.get_results()
|
29
33
|
|
30
34
|
def check_conc_data(res: TestResult):
|
@@ -42,7 +46,7 @@ def main():
|
|
42
46
|
|
43
47
|
# results selector
|
44
48
|
resultSelectorContainer = st.sidebar.container()
|
45
|
-
shownData, _, showCaseNames = getshownData(
|
49
|
+
shownData, _, showCaseNames = getshownData(resultSelectorContainer, checkedResults)
|
46
50
|
|
47
51
|
resultSelectorContainer.divider()
|
48
52
|
|
@@ -1,6 +1,7 @@
|
|
1
1
|
from functools import partial
|
2
2
|
import streamlit as st
|
3
3
|
from vectordb_bench.frontend.components.check_results.headerIcon import drawHeaderIcon
|
4
|
+
from vectordb_bench.frontend.components.check_results.nav import NavToPages
|
4
5
|
from vectordb_bench.frontend.components.custom.displayCustomCase import (
|
5
6
|
displayCustomCase,
|
6
7
|
)
|
@@ -49,6 +50,9 @@ def main():
|
|
49
50
|
# init style
|
50
51
|
initStyle(st)
|
51
52
|
|
53
|
+
# navigate
|
54
|
+
NavToPages(st)
|
55
|
+
|
52
56
|
st.title("Custom Dataset")
|
53
57
|
displayParams(st)
|
54
58
|
customCaseManager = CustomCaseManager()
|
@@ -0,0 +1,56 @@
|
|
1
|
+
import streamlit as st
|
2
|
+
from vectordb_bench.backend.filter import FilterOp
|
3
|
+
from vectordb_bench.frontend.components.check_results.footer import footer
|
4
|
+
from vectordb_bench.frontend.components.check_results.headerIcon import drawHeaderIcon
|
5
|
+
from vectordb_bench.frontend.components.check_results.nav import (
|
6
|
+
NavToQuriesPerDollar,
|
7
|
+
NavToRunTest,
|
8
|
+
NavToPages,
|
9
|
+
)
|
10
|
+
from vectordb_bench.frontend.components.label_filter.charts import drawCharts
|
11
|
+
from vectordb_bench.frontend.components.check_results.filters import getshownData
|
12
|
+
from vectordb_bench.frontend.config.styles import FAVICON
|
13
|
+
from vectordb_bench.interface import benchmark_runner
|
14
|
+
|
15
|
+
|
16
|
+
def main():
|
17
|
+
# set page config
|
18
|
+
st.set_page_config(
|
19
|
+
page_title="Label Filter",
|
20
|
+
page_icon=FAVICON,
|
21
|
+
layout="wide",
|
22
|
+
# initial_sidebar_state="collapsed",
|
23
|
+
)
|
24
|
+
|
25
|
+
# header
|
26
|
+
drawHeaderIcon(st)
|
27
|
+
|
28
|
+
# navigate
|
29
|
+
NavToPages(st)
|
30
|
+
|
31
|
+
allResults = benchmark_runner.get_results()
|
32
|
+
|
33
|
+
st.title("Vector Database Benchmark (Label Filter)")
|
34
|
+
|
35
|
+
# results selector and filter
|
36
|
+
resultSelectorContainer = st.sidebar.container()
|
37
|
+
shownData, failedTasks, showCaseNames = getshownData(
|
38
|
+
resultSelectorContainer, allResults, filter_type=FilterOp.StrEqual
|
39
|
+
)
|
40
|
+
|
41
|
+
resultSelectorContainer.divider()
|
42
|
+
|
43
|
+
# nav
|
44
|
+
navContainer = st.sidebar.container()
|
45
|
+
NavToRunTest(navContainer)
|
46
|
+
NavToQuriesPerDollar(navContainer)
|
47
|
+
|
48
|
+
# charts
|
49
|
+
drawCharts(st, shownData)
|
50
|
+
|
51
|
+
# footer
|
52
|
+
footer(st.container())
|
53
|
+
|
54
|
+
|
55
|
+
if __name__ == "__main__":
|
56
|
+
main()
|
@@ -9,6 +9,7 @@ from vectordb_bench.frontend.components.check_results.stPageConfig import (
|
|
9
9
|
)
|
10
10
|
from vectordb_bench.frontend.components.check_results.headerIcon import drawHeaderIcon
|
11
11
|
from vectordb_bench.frontend.components.check_results.nav import (
|
12
|
+
NavToPages,
|
12
13
|
NavToResults,
|
13
14
|
NavToRunTest,
|
14
15
|
)
|
@@ -27,13 +28,16 @@ def main():
|
|
27
28
|
# header
|
28
29
|
drawHeaderIcon(st)
|
29
30
|
|
31
|
+
# navigate
|
32
|
+
NavToPages(st)
|
33
|
+
|
30
34
|
allResults = benchmark_runner.get_results()
|
31
35
|
|
32
36
|
st.title("Vector DB Benchmark (QP$)")
|
33
37
|
|
34
38
|
# results selector
|
35
39
|
resultSelectorContainer = st.sidebar.container()
|
36
|
-
shownData, _, showCaseNames = getshownData(
|
40
|
+
shownData, _, showCaseNames = getshownData(resultSelectorContainer, allResults)
|
37
41
|
|
38
42
|
resultSelectorContainer.divider()
|
39
43
|
|
@@ -0,0 +1,60 @@
|
|
1
|
+
import streamlit as st
|
2
|
+
from vectordb_bench.frontend.components.check_results.footer import footer
|
3
|
+
from vectordb_bench.frontend.components.check_results.stPageConfig import (
|
4
|
+
initResultsPageConfig,
|
5
|
+
)
|
6
|
+
from vectordb_bench.frontend.components.check_results.headerIcon import drawHeaderIcon
|
7
|
+
from vectordb_bench.frontend.components.check_results.nav import (
|
8
|
+
NavToQuriesPerDollar,
|
9
|
+
NavToRunTest,
|
10
|
+
NavToPages,
|
11
|
+
)
|
12
|
+
from vectordb_bench.frontend.components.check_results.charts import drawCharts
|
13
|
+
from vectordb_bench.frontend.components.check_results.filters import getshownData
|
14
|
+
from vectordb_bench.frontend.components.get_results.saveAsImage import getResults
|
15
|
+
|
16
|
+
from vectordb_bench.interface import benchmark_runner
|
17
|
+
|
18
|
+
|
19
|
+
def main():
|
20
|
+
# set page config
|
21
|
+
initResultsPageConfig(st)
|
22
|
+
|
23
|
+
# header
|
24
|
+
drawHeaderIcon(st)
|
25
|
+
|
26
|
+
# navigate
|
27
|
+
NavToPages(st)
|
28
|
+
|
29
|
+
allResults = benchmark_runner.get_results()
|
30
|
+
|
31
|
+
st.title("Vector Database Benchmark")
|
32
|
+
st.caption(
|
33
|
+
"Except for zillizcloud-v2024.1, which was tested in _January 2024_, all other tests were completed before _August 2023_."
|
34
|
+
)
|
35
|
+
st.caption("All tested milvus are in _standalone_ mode.")
|
36
|
+
|
37
|
+
# results selector and filter
|
38
|
+
resultSelectorContainer = st.sidebar.container()
|
39
|
+
shownData, failedTasks, showCaseNames = getshownData(resultSelectorContainer, allResults)
|
40
|
+
|
41
|
+
resultSelectorContainer.divider()
|
42
|
+
|
43
|
+
# nav
|
44
|
+
navContainer = st.sidebar.container()
|
45
|
+
NavToRunTest(navContainer)
|
46
|
+
NavToQuriesPerDollar(navContainer)
|
47
|
+
|
48
|
+
# save or share
|
49
|
+
resultesContainer = st.sidebar.container()
|
50
|
+
getResults(resultesContainer, "vectordb_bench")
|
51
|
+
|
52
|
+
# charts
|
53
|
+
drawCharts(st, shownData, failedTasks, showCaseNames)
|
54
|
+
|
55
|
+
# footer
|
56
|
+
footer(st.container())
|
57
|
+
|
58
|
+
|
59
|
+
if __name__ == "__main__":
|
60
|
+
main()
|
@@ -7,7 +7,7 @@ from vectordb_bench.frontend.components.run_test.generateTasks import generate_t
|
|
7
7
|
from vectordb_bench.frontend.components.run_test.hideSidebar import hideSidebar
|
8
8
|
from vectordb_bench.frontend.components.run_test.initStyle import initStyle
|
9
9
|
from vectordb_bench.frontend.components.run_test.submitTask import submitTask
|
10
|
-
from vectordb_bench.frontend.components.check_results.nav import NavToResults
|
10
|
+
from vectordb_bench.frontend.components.check_results.nav import NavToResults, NavToPages
|
11
11
|
from vectordb_bench.frontend.components.check_results.headerIcon import drawHeaderIcon
|
12
12
|
from vectordb_bench.frontend.components.check_results.stPageConfig import initRunTestPageConfig
|
13
13
|
|
@@ -25,8 +25,8 @@ def main():
|
|
25
25
|
# hide sidebar
|
26
26
|
hideSidebar(st)
|
27
27
|
|
28
|
-
#
|
29
|
-
|
28
|
+
# navigate
|
29
|
+
NavToPages(st)
|
30
30
|
|
31
31
|
# header
|
32
32
|
st.title("Run Your Test")
|
@@ -0,0 +1,135 @@
|
|
1
|
+
import logging
|
2
|
+
import streamlit as st
|
3
|
+
from vectordb_bench.backend.cases import CaseLabel
|
4
|
+
from vectordb_bench.frontend.components.check_results.footer import footer
|
5
|
+
from vectordb_bench.frontend.components.check_results.headerIcon import drawHeaderIcon
|
6
|
+
from vectordb_bench.frontend.components.check_results.nav import (
|
7
|
+
NavToResults,
|
8
|
+
NavToRunTest,
|
9
|
+
NavToPages,
|
10
|
+
)
|
11
|
+
from vectordb_bench.frontend.components.check_results.filters import getshownData
|
12
|
+
from vectordb_bench.frontend.components.streaming.charts import drawChartsByCase
|
13
|
+
from vectordb_bench.frontend.components.get_results.saveAsImage import getResults
|
14
|
+
from vectordb_bench.frontend.components.streaming.data import DisplayedMetric
|
15
|
+
from vectordb_bench.frontend.config.styles import FAVICON
|
16
|
+
from vectordb_bench.interface import benchmark_runner
|
17
|
+
from vectordb_bench.models import CaseResult, TestResult
|
18
|
+
|
19
|
+
log = logging.getLogger("vectordb_bench")
|
20
|
+
|
21
|
+
|
22
|
+
def main():
|
23
|
+
# set page config
|
24
|
+
st.set_page_config(
|
25
|
+
page_title="VDBB Streaming Perf",
|
26
|
+
page_icon=FAVICON,
|
27
|
+
layout="wide",
|
28
|
+
# initial_sidebar_state="collapsed",
|
29
|
+
)
|
30
|
+
|
31
|
+
# header
|
32
|
+
drawHeaderIcon(st)
|
33
|
+
|
34
|
+
# navigate
|
35
|
+
NavToPages(st)
|
36
|
+
|
37
|
+
allResults = benchmark_runner.get_results()
|
38
|
+
|
39
|
+
def check_streaming_data(res: TestResult):
|
40
|
+
case_results = res.results
|
41
|
+
flag = False
|
42
|
+
for case_result in case_results:
|
43
|
+
if case_result.task_config.case_config.case.label == CaseLabel.Streaming:
|
44
|
+
flag = True
|
45
|
+
|
46
|
+
return flag
|
47
|
+
|
48
|
+
checkedResults = [res for res in allResults if check_streaming_data(res)]
|
49
|
+
|
50
|
+
st.title("VDBBench - Streaming Performance")
|
51
|
+
|
52
|
+
# results selector
|
53
|
+
resultSelectorContainer = st.sidebar.container()
|
54
|
+
|
55
|
+
def case_results_filter(case_result: CaseResult) -> bool:
|
56
|
+
return len(case_result.metrics.st_search_stage_list) > 0
|
57
|
+
|
58
|
+
shownData, _, showCaseNames = getshownData(
|
59
|
+
resultSelectorContainer, checkedResults, case_results_filter=case_results_filter
|
60
|
+
)
|
61
|
+
|
62
|
+
resultSelectorContainer.divider()
|
63
|
+
|
64
|
+
# nav
|
65
|
+
navContainer = st.sidebar.container()
|
66
|
+
NavToRunTest(navContainer)
|
67
|
+
NavToResults(navContainer)
|
68
|
+
|
69
|
+
# save or share
|
70
|
+
resultesContainer = st.sidebar.container()
|
71
|
+
getResults(resultesContainer, "vectordb_bench_streaming")
|
72
|
+
|
73
|
+
# # main
|
74
|
+
# latency_type = st.radio("Latency Type", options=["latency_p99", "latency_avg"])
|
75
|
+
st.markdown("Tests search performance with a **stable** and **fixed** insertion rate.")
|
76
|
+
control_panel = st.columns(3)
|
77
|
+
compared_with_optimized = control_panel[0].toggle(
|
78
|
+
"Compare with **optimezed** performance.",
|
79
|
+
value=True,
|
80
|
+
help="VectorDB is allowed to do **optimizations** after all insertions done and then test search performance.",
|
81
|
+
)
|
82
|
+
x_use_actual_time = control_panel[0].toggle(
|
83
|
+
"Use **actual time** as X-axis instead of search stage.",
|
84
|
+
value=False,
|
85
|
+
help="Since vdbbench inserts may be faster than vetordb can process them, the time it actually reaches search_stage may have different delays.",
|
86
|
+
)
|
87
|
+
accuracy_metric = DisplayedMetric.recall
|
88
|
+
show_ndcg = control_panel[1].toggle(
|
89
|
+
"Show **NDCG** instead of Recall.",
|
90
|
+
value=False,
|
91
|
+
help="A more appropriate indicator to measure ANN search accuracy than Recall.",
|
92
|
+
)
|
93
|
+
need_adjust = control_panel[1].toggle(
|
94
|
+
"Adjust the NDCG/Recall value based on the search stage.",
|
95
|
+
value=True,
|
96
|
+
help="NDCG/Recall is calculated using the ground truth file of the **entire** database, **divided by the search stage** to simulate the actual value.",
|
97
|
+
)
|
98
|
+
if show_ndcg:
|
99
|
+
if need_adjust:
|
100
|
+
accuracy_metric = DisplayedMetric.adjusted_ndcg
|
101
|
+
else:
|
102
|
+
accuracy_metric = DisplayedMetric.ndcg
|
103
|
+
else:
|
104
|
+
if need_adjust:
|
105
|
+
accuracy_metric = DisplayedMetric.adjusted_recall
|
106
|
+
line_chart_displayed_y_metrics: list[tuple[DisplayedMetric, str]] = [
|
107
|
+
(
|
108
|
+
DisplayedMetric.qps,
|
109
|
+
"max-qps of increasing **concurrency search** tests in each search stage.",
|
110
|
+
),
|
111
|
+
(accuracy_metric, "calculated in each search_stage."),
|
112
|
+
(
|
113
|
+
DisplayedMetric.latency_p99,
|
114
|
+
"serial lantency (p99) of **serial search** tests in each search stage.",
|
115
|
+
),
|
116
|
+
]
|
117
|
+
line_chart_displayed_x_metric = DisplayedMetric.search_stage
|
118
|
+
if x_use_actual_time:
|
119
|
+
line_chart_displayed_x_metric = DisplayedMetric.search_time
|
120
|
+
|
121
|
+
drawChartsByCase(
|
122
|
+
st.container(),
|
123
|
+
shownData,
|
124
|
+
showCaseNames,
|
125
|
+
with_last_optimized_data=compared_with_optimized,
|
126
|
+
line_chart_displayed_x_metric=line_chart_displayed_x_metric,
|
127
|
+
line_chart_displayed_y_metrics=line_chart_displayed_y_metrics,
|
128
|
+
)
|
129
|
+
|
130
|
+
# footer
|
131
|
+
footer(st.container())
|
132
|
+
|
133
|
+
|
134
|
+
if __name__ == "__main__":
|
135
|
+
main()
|
@@ -1,5 +1,6 @@
|
|
1
1
|
import streamlit as st
|
2
2
|
from vectordb_bench.frontend.components.check_results.headerIcon import drawHeaderIcon
|
3
|
+
from vectordb_bench.frontend.components.check_results.nav import NavToPages
|
3
4
|
from vectordb_bench.frontend.components.tables.data import getNewResults
|
4
5
|
from vectordb_bench.frontend.config.styles import FAVICON
|
5
6
|
|
@@ -16,6 +17,9 @@ def main():
|
|
16
17
|
# header
|
17
18
|
drawHeaderIcon(st)
|
18
19
|
|
20
|
+
# navigate
|
21
|
+
NavToPages(st)
|
22
|
+
|
19
23
|
df = getNewResults()
|
20
24
|
st.dataframe(df, height=800)
|
21
25
|
|
@@ -1,55 +1,30 @@
|
|
1
1
|
import streamlit as st
|
2
|
-
from vectordb_bench.frontend.components.check_results.footer import footer
|
3
|
-
from vectordb_bench.frontend.components.check_results.stPageConfig import (
|
4
|
-
initResultsPageConfig,
|
5
|
-
)
|
6
2
|
from vectordb_bench.frontend.components.check_results.headerIcon import drawHeaderIcon
|
7
|
-
from vectordb_bench.frontend.components.
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
from vectordb_bench.frontend.components.check_results.charts import drawCharts
|
12
|
-
from vectordb_bench.frontend.components.check_results.filters import getshownData
|
13
|
-
from vectordb_bench.frontend.components.get_results.saveAsImage import getResults
|
14
|
-
|
15
|
-
from vectordb_bench.interface import benchmark_runner
|
3
|
+
from vectordb_bench.frontend.components.custom.initStyle import initStyle
|
4
|
+
from vectordb_bench.frontend.components.welcome.explainPrams import explainPrams
|
5
|
+
from vectordb_bench.frontend.components.welcome.welcomePrams import welcomePrams
|
6
|
+
from vectordb_bench.frontend.config.styles import FAVICON, PAGE_TITLE
|
16
7
|
|
17
8
|
|
18
9
|
def main():
|
19
|
-
|
20
|
-
|
10
|
+
st.set_page_config(
|
11
|
+
page_title=PAGE_TITLE,
|
12
|
+
page_icon=FAVICON,
|
13
|
+
layout="wide",
|
14
|
+
initial_sidebar_state="collapsed",
|
15
|
+
)
|
21
16
|
|
22
17
|
# header
|
23
18
|
drawHeaderIcon(st)
|
24
19
|
|
25
|
-
|
26
|
-
|
27
|
-
st.title("Vector Database Benchmark")
|
28
|
-
st.caption(
|
29
|
-
"Except for zillizcloud-v2024.1, which was tested in _January 2024_, all other tests were completed before _August 2023_."
|
30
|
-
)
|
31
|
-
st.caption("All tested milvus are in _standalone_ mode.")
|
32
|
-
|
33
|
-
# results selector and filter
|
34
|
-
resultSelectorContainer = st.sidebar.container()
|
35
|
-
shownData, failedTasks, showCaseNames = getshownData(allResults, resultSelectorContainer)
|
36
|
-
|
37
|
-
resultSelectorContainer.divider()
|
38
|
-
|
39
|
-
# nav
|
40
|
-
navContainer = st.sidebar.container()
|
41
|
-
NavToRunTest(navContainer)
|
42
|
-
NavToQuriesPerDollar(navContainer)
|
43
|
-
|
44
|
-
# save or share
|
45
|
-
resultesContainer = st.sidebar.container()
|
46
|
-
getResults(resultesContainer, "vectordb_bench")
|
20
|
+
# init style
|
21
|
+
initStyle(st)
|
47
22
|
|
48
|
-
#
|
49
|
-
|
23
|
+
# page
|
24
|
+
welcomePrams(st)
|
50
25
|
|
51
|
-
#
|
52
|
-
|
26
|
+
# description
|
27
|
+
explainPrams(st)
|
53
28
|
|
54
29
|
|
55
30
|
if __name__ == "__main__":
|
vectordb_bench/interface.py
CHANGED
@@ -12,7 +12,7 @@ from multiprocessing.connection import Connection
|
|
12
12
|
import psutil
|
13
13
|
|
14
14
|
from . import config
|
15
|
-
from .backend.assembler import Assembler
|
15
|
+
from .backend.assembler import Assembler, FilterNotSupportedError
|
16
16
|
from .backend.data_source import DatasetSource
|
17
17
|
from .backend.result_collector import ResultCollector
|
18
18
|
from .backend.task_runner import TaskRunner
|
@@ -88,6 +88,10 @@ class BenchMarkRunner:
|
|
88
88
|
log.warning(msg)
|
89
89
|
self.latest_error = msg
|
90
90
|
return True
|
91
|
+
except FilterNotSupportedError as e:
|
92
|
+
log.warning(e.args[0])
|
93
|
+
self.latest_error = e.args[0]
|
94
|
+
return True
|
91
95
|
|
92
96
|
return self._run_async(send_conn)
|
93
97
|
|
@@ -97,7 +101,7 @@ class BenchMarkRunner:
|
|
97
101
|
return ResultCollector.collect(target_dir)
|
98
102
|
|
99
103
|
def _try_get_signal(self):
|
100
|
-
|
104
|
+
while self.receive_conn and self.receive_conn.poll():
|
101
105
|
sig, received = self.receive_conn.recv()
|
102
106
|
log.debug(f"Sigal received to process: {sig}, {received}")
|
103
107
|
if sig == SIGNAL.ERROR:
|
vectordb_bench/metric.py
CHANGED
@@ -13,8 +13,12 @@ class Metric:
|
|
13
13
|
# for load cases
|
14
14
|
max_load_count: int = 0
|
15
15
|
|
16
|
+
# for both performace and streaming cases
|
17
|
+
insert_duration: float = 0.0
|
18
|
+
optimize_duration: float = 0.0
|
19
|
+
load_duration: float = 0.0 # insert + optimize
|
20
|
+
|
16
21
|
# for performance cases
|
17
|
-
load_duration: float = 0.0 # duration to load all dataset into DB
|
18
22
|
qps: float = 0.0
|
19
23
|
serial_latency_p99: float = 0.0
|
20
24
|
recall: float = 0.0
|
@@ -24,6 +28,16 @@ class Metric:
|
|
24
28
|
conc_latency_p99_list: list[float] = field(default_factory=list)
|
25
29
|
conc_latency_avg_list: list[float] = field(default_factory=list)
|
26
30
|
|
31
|
+
# for streaming cases
|
32
|
+
st_ideal_insert_duration: int = 0
|
33
|
+
st_search_stage_list: list[int] = field(default_factory=list)
|
34
|
+
st_search_time_list: list[float] = field(default_factory=list)
|
35
|
+
st_max_qps_list_list: list[float] = field(default_factory=list)
|
36
|
+
st_recall_list: list[float] = field(default_factory=list)
|
37
|
+
st_ndcg_list: list[float] = field(default_factory=list)
|
38
|
+
st_serial_latency_p99_list: list[float] = field(default_factory=list)
|
39
|
+
st_conc_failed_rate_list: list[float] = field(default_factory=list)
|
40
|
+
|
27
41
|
|
28
42
|
QURIES_PER_DOLLAR_METRIC = "QP$ (Quries per Dollar)"
|
29
43
|
LOAD_DURATION_METRIC = "load_duration"
|
vectordb_bench/models.py
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
import logging
|
2
2
|
import pathlib
|
3
3
|
from datetime import date, datetime
|
4
|
-
from enum import Enum, StrEnum
|
4
|
+
from enum import Enum, StrEnum
|
5
5
|
from typing import Self
|
6
6
|
|
7
7
|
import ujson
|
8
8
|
|
9
9
|
from . import config
|
10
|
-
from .backend.cases import CaseType
|
10
|
+
from .backend.cases import Case, CaseType
|
11
11
|
from .backend.clients import (
|
12
12
|
DB,
|
13
13
|
DBCaseConfig,
|
@@ -112,10 +112,20 @@ class CaseConfigParamType(Enum):
|
|
112
112
|
index_thread_qty = "index_thread_qty"
|
113
113
|
engine_name = "engine_name"
|
114
114
|
metric_type_name = "metric_type_name"
|
115
|
-
|
116
|
-
# mongodb params
|
117
115
|
mongodb_quantization_type = "quantization"
|
118
116
|
mongodb_num_candidates_ratio = "num_candidates_ratio"
|
117
|
+
use_partition_key = "use_partition_key"
|
118
|
+
refresh_interval = "refresh_interval"
|
119
|
+
use_rescore = "use_rescore"
|
120
|
+
oversample_ratio = "oversample_ratio"
|
121
|
+
use_routing = "use_routing"
|
122
|
+
|
123
|
+
dataset_with_size_type = "dataset_with_size_type"
|
124
|
+
insert_rate = "insert_rate"
|
125
|
+
search_stages = "search_stages"
|
126
|
+
concurrencies = "concurrencies"
|
127
|
+
optimize_after_write = "optimize_after_write"
|
128
|
+
read_dur_after_write = "read_dur_after_write"
|
119
129
|
|
120
130
|
|
121
131
|
class CustomizedCase(BaseModel):
|
@@ -151,14 +161,22 @@ class CaseConfig(BaseModel):
|
|
151
161
|
def __hash__(self) -> int:
|
152
162
|
return hash(self.json())
|
153
163
|
|
164
|
+
@property
|
165
|
+
def case(self) -> Case:
|
166
|
+
return self.case_id.case_cls(self.custom_case)
|
167
|
+
|
168
|
+
@property
|
169
|
+
def case_name(self) -> str:
|
170
|
+
return self.case.name
|
171
|
+
|
154
172
|
|
155
173
|
class TaskStage(StrEnum):
|
156
174
|
"""Enumerations of various stages of the task"""
|
157
175
|
|
158
|
-
DROP_OLD =
|
159
|
-
LOAD =
|
160
|
-
SEARCH_SERIAL =
|
161
|
-
SEARCH_CONCURRENT =
|
176
|
+
DROP_OLD = "drop_old"
|
177
|
+
LOAD = "load"
|
178
|
+
SEARCH_SERIAL = "search_serial"
|
179
|
+
SEARCH_CONCURRENT = "search_concurrent"
|
162
180
|
|
163
181
|
def __repr__(self) -> str:
|
164
182
|
return str.__repr__(self.value)
|
@@ -299,12 +317,14 @@ class TestResult(BaseModel):
|
|
299
317
|
key=lambda x: (
|
300
318
|
x.task_config.db.name,
|
301
319
|
x.task_config.db_config.db_label,
|
302
|
-
x.task_config.case_config.
|
320
|
+
x.task_config.case_config.case_name,
|
303
321
|
),
|
304
322
|
reverse=True,
|
305
323
|
)
|
306
324
|
|
307
325
|
filtered_results = [r for r in sorted_results if not filter_list or r.task_config.db not in filter_list]
|
326
|
+
if len(filtered_results) == 0:
|
327
|
+
return
|
308
328
|
|
309
329
|
def append_return(x: any, y: any):
|
310
330
|
x.append(y)
|
@@ -312,7 +332,7 @@ class TestResult(BaseModel):
|
|
312
332
|
|
313
333
|
max_db = max(map(len, [f.task_config.db.name for f in filtered_results]))
|
314
334
|
max_db_labels = max(map(len, [f.task_config.db_config.db_label for f in filtered_results])) + 3
|
315
|
-
max_case = max(map(len, [f.task_config.case_config.
|
335
|
+
max_case = max(map(len, [f.task_config.case_config.case_name for f in filtered_results]))
|
316
336
|
max_load_dur = max(map(len, [str(f.metrics.load_duration) for f in filtered_results])) + 3
|
317
337
|
max_qps = max(map(len, [str(f.metrics.qps) for f in filtered_results])) + 3
|
318
338
|
max_recall = max(map(len, [str(f.metrics.recall) for f in filtered_results])) + 3
|
@@ -366,7 +386,7 @@ class TestResult(BaseModel):
|
|
366
386
|
% (
|
367
387
|
f.task_config.db.name,
|
368
388
|
f.task_config.db_config.db_label,
|
369
|
-
f.task_config.case_config.
|
389
|
+
f.task_config.case_config.case_name,
|
370
390
|
self.task_label,
|
371
391
|
f.metrics.load_duration,
|
372
392
|
f.metrics.qps,
|