vectordb-bench 0.0.10__py3-none-any.whl → 0.0.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vectordb_bench/__init__.py +19 -5
- vectordb_bench/backend/assembler.py +1 -1
- vectordb_bench/backend/cases.py +93 -27
- vectordb_bench/backend/clients/__init__.py +14 -0
- vectordb_bench/backend/clients/api.py +1 -1
- vectordb_bench/backend/clients/aws_opensearch/aws_opensearch.py +159 -0
- vectordb_bench/backend/clients/aws_opensearch/cli.py +44 -0
- vectordb_bench/backend/clients/aws_opensearch/config.py +58 -0
- vectordb_bench/backend/clients/aws_opensearch/run.py +125 -0
- vectordb_bench/backend/clients/milvus/cli.py +291 -0
- vectordb_bench/backend/clients/milvus/milvus.py +13 -6
- vectordb_bench/backend/clients/pgvector/cli.py +116 -0
- vectordb_bench/backend/clients/pgvector/config.py +1 -1
- vectordb_bench/backend/clients/pgvector/pgvector.py +7 -4
- vectordb_bench/backend/clients/redis/cli.py +74 -0
- vectordb_bench/backend/clients/test/cli.py +25 -0
- vectordb_bench/backend/clients/test/config.py +18 -0
- vectordb_bench/backend/clients/test/test.py +62 -0
- vectordb_bench/backend/clients/weaviate_cloud/cli.py +41 -0
- vectordb_bench/backend/clients/zilliz_cloud/cli.py +55 -0
- vectordb_bench/backend/dataset.py +27 -5
- vectordb_bench/backend/runner/mp_runner.py +14 -3
- vectordb_bench/backend/runner/serial_runner.py +7 -3
- vectordb_bench/backend/task_runner.py +76 -26
- vectordb_bench/cli/__init__.py +0 -0
- vectordb_bench/cli/cli.py +362 -0
- vectordb_bench/cli/vectordbbench.py +22 -0
- vectordb_bench/config-files/sample_config.yml +17 -0
- vectordb_bench/custom/custom_case.json +18 -0
- vectordb_bench/frontend/components/check_results/charts.py +6 -6
- vectordb_bench/frontend/components/check_results/data.py +23 -20
- vectordb_bench/frontend/components/check_results/expanderStyle.py +1 -1
- vectordb_bench/frontend/components/check_results/filters.py +20 -13
- vectordb_bench/frontend/components/check_results/headerIcon.py +1 -1
- vectordb_bench/frontend/components/check_results/priceTable.py +1 -1
- vectordb_bench/frontend/components/check_results/stPageConfig.py +1 -1
- vectordb_bench/frontend/components/concurrent/charts.py +79 -0
- vectordb_bench/frontend/components/custom/displayCustomCase.py +31 -0
- vectordb_bench/frontend/components/custom/displaypPrams.py +11 -0
- vectordb_bench/frontend/components/custom/getCustomConfig.py +40 -0
- vectordb_bench/frontend/components/custom/initStyle.py +15 -0
- vectordb_bench/frontend/components/run_test/autoRefresh.py +1 -1
- vectordb_bench/frontend/components/run_test/caseSelector.py +40 -28
- vectordb_bench/frontend/components/run_test/dbConfigSetting.py +1 -5
- vectordb_bench/frontend/components/run_test/dbSelector.py +8 -14
- vectordb_bench/frontend/components/run_test/generateTasks.py +3 -5
- vectordb_bench/frontend/components/run_test/initStyle.py +14 -0
- vectordb_bench/frontend/components/run_test/submitTask.py +13 -5
- vectordb_bench/frontend/components/tables/data.py +44 -0
- vectordb_bench/frontend/{const → config}/dbCaseConfigs.py +140 -32
- vectordb_bench/frontend/{const → config}/styles.py +2 -0
- vectordb_bench/frontend/pages/concurrent.py +65 -0
- vectordb_bench/frontend/pages/custom.py +64 -0
- vectordb_bench/frontend/pages/quries_per_dollar.py +5 -5
- vectordb_bench/frontend/pages/run_test.py +4 -0
- vectordb_bench/frontend/pages/tables.py +24 -0
- vectordb_bench/frontend/utils.py +17 -1
- vectordb_bench/frontend/vdb_benchmark.py +3 -3
- vectordb_bench/interface.py +21 -25
- vectordb_bench/metric.py +23 -1
- vectordb_bench/models.py +45 -1
- vectordb_bench/results/getLeaderboardData.py +1 -1
- {vectordb_bench-0.0.10.dist-info → vectordb_bench-0.0.12.dist-info}/METADATA +228 -14
- vectordb_bench-0.0.12.dist-info/RECORD +115 -0
- {vectordb_bench-0.0.10.dist-info → vectordb_bench-0.0.12.dist-info}/WHEEL +1 -1
- {vectordb_bench-0.0.10.dist-info → vectordb_bench-0.0.12.dist-info}/entry_points.txt +1 -0
- vectordb_bench-0.0.10.dist-info/RECORD +0 -88
- /vectordb_bench/frontend/{const → config}/dbPrices.py +0 -0
- {vectordb_bench-0.0.10.dist-info → vectordb_bench-0.0.12.dist-info}/LICENSE +0 -0
- {vectordb_bench-0.0.10.dist-info → vectordb_bench-0.0.12.dist-info}/top_level.txt +0 -0
@@ -1,42 +1,147 @@
|
|
1
|
-
from enum import IntEnum
|
1
|
+
from enum import IntEnum, Enum
|
2
2
|
import typing
|
3
3
|
from pydantic import BaseModel
|
4
4
|
from vectordb_bench.backend.cases import CaseLabel, CaseType
|
5
5
|
from vectordb_bench.backend.clients import DB
|
6
6
|
from vectordb_bench.backend.clients.api import IndexType
|
7
|
+
from vectordb_bench.frontend.components.custom.getCustomConfig import get_custom_configs
|
7
8
|
|
8
|
-
from vectordb_bench.models import CaseConfigParamType
|
9
|
+
from vectordb_bench.models import CaseConfig, CaseConfigParamType
|
9
10
|
|
10
11
|
MAX_STREAMLIT_INT = (1 << 53) - 1
|
11
12
|
|
12
|
-
DB_LIST = [d for d in DB]
|
13
|
+
DB_LIST = [d for d in DB if d != DB.Test]
|
14
|
+
|
15
|
+
|
16
|
+
class Delimiter(Enum):
|
17
|
+
Line = "line"
|
18
|
+
|
19
|
+
|
20
|
+
class BatchCaseConfig(BaseModel):
|
21
|
+
label: str = ""
|
22
|
+
description: str = ""
|
23
|
+
cases: list[CaseConfig] = []
|
24
|
+
|
25
|
+
|
26
|
+
class UICaseItem(BaseModel):
|
27
|
+
isLine: bool = False
|
28
|
+
label: str = ""
|
29
|
+
description: str = ""
|
30
|
+
cases: list[CaseConfig] = []
|
31
|
+
caseLabel: CaseLabel = CaseLabel.Performance
|
32
|
+
|
33
|
+
def __init__(
|
34
|
+
self,
|
35
|
+
isLine: bool = False,
|
36
|
+
case_id: CaseType = None,
|
37
|
+
custom_case: dict = {},
|
38
|
+
cases: list[CaseConfig] = [],
|
39
|
+
label: str = "",
|
40
|
+
description: str = "",
|
41
|
+
caseLabel: CaseLabel = CaseLabel.Performance,
|
42
|
+
):
|
43
|
+
if isLine is True:
|
44
|
+
super().__init__(isLine=True)
|
45
|
+
elif case_id is not None and isinstance(case_id, CaseType):
|
46
|
+
c = case_id.case_cls(custom_case)
|
47
|
+
super().__init__(
|
48
|
+
label=c.name,
|
49
|
+
description=c.description,
|
50
|
+
cases=[CaseConfig(case_id=case_id, custom_case=custom_case)],
|
51
|
+
caseLabel=c.label,
|
52
|
+
)
|
53
|
+
else:
|
54
|
+
super().__init__(
|
55
|
+
label=label,
|
56
|
+
description=description,
|
57
|
+
cases=cases,
|
58
|
+
caseLabel=caseLabel,
|
59
|
+
)
|
60
|
+
|
61
|
+
def __hash__(self) -> int:
|
62
|
+
return hash(self.json())
|
63
|
+
|
64
|
+
|
65
|
+
class UICaseItemCluster(BaseModel):
|
66
|
+
label: str = ""
|
67
|
+
uiCaseItems: list[UICaseItem] = []
|
68
|
+
|
69
|
+
|
70
|
+
def get_custom_case_items() -> list[UICaseItem]:
|
71
|
+
custom_configs = get_custom_configs()
|
72
|
+
return [
|
73
|
+
UICaseItem(
|
74
|
+
case_id=CaseType.PerformanceCustomDataset, custom_case=custom_config.dict()
|
75
|
+
)
|
76
|
+
for custom_config in custom_configs
|
77
|
+
]
|
78
|
+
|
79
|
+
|
80
|
+
def get_custom_case_cluter() -> UICaseItemCluster:
|
81
|
+
return UICaseItemCluster(
|
82
|
+
label="Custom Search Performance Test", uiCaseItems=get_custom_case_items()
|
83
|
+
)
|
84
|
+
|
85
|
+
|
86
|
+
UI_CASE_CLUSTERS: list[UICaseItemCluster] = [
|
87
|
+
UICaseItemCluster(
|
88
|
+
label="Search Performance Test",
|
89
|
+
uiCaseItems=[
|
90
|
+
UICaseItem(case_id=CaseType.Performance768D100M),
|
91
|
+
UICaseItem(case_id=CaseType.Performance768D10M),
|
92
|
+
UICaseItem(case_id=CaseType.Performance768D1M),
|
93
|
+
UICaseItem(isLine=True),
|
94
|
+
UICaseItem(case_id=CaseType.Performance1536D5M),
|
95
|
+
UICaseItem(case_id=CaseType.Performance1536D500K),
|
96
|
+
UICaseItem(case_id=CaseType.Performance1536D50K),
|
97
|
+
],
|
98
|
+
),
|
99
|
+
UICaseItemCluster(
|
100
|
+
label="Filter Search Performance Test",
|
101
|
+
uiCaseItems=[
|
102
|
+
UICaseItem(case_id=CaseType.Performance768D10M1P),
|
103
|
+
UICaseItem(case_id=CaseType.Performance768D10M99P),
|
104
|
+
UICaseItem(case_id=CaseType.Performance768D1M1P),
|
105
|
+
UICaseItem(case_id=CaseType.Performance768D1M99P),
|
106
|
+
UICaseItem(isLine=True),
|
107
|
+
UICaseItem(case_id=CaseType.Performance1536D5M1P),
|
108
|
+
UICaseItem(case_id=CaseType.Performance1536D5M99P),
|
109
|
+
UICaseItem(case_id=CaseType.Performance1536D500K1P),
|
110
|
+
UICaseItem(case_id=CaseType.Performance1536D500K99P),
|
111
|
+
],
|
112
|
+
),
|
113
|
+
UICaseItemCluster(
|
114
|
+
label="Capacity Test",
|
115
|
+
uiCaseItems=[
|
116
|
+
UICaseItem(case_id=CaseType.CapacityDim960),
|
117
|
+
UICaseItem(case_id=CaseType.CapacityDim128),
|
118
|
+
],
|
119
|
+
),
|
120
|
+
]
|
13
121
|
|
14
|
-
DIVIDER = "DIVIDER"
|
15
|
-
|
122
|
+
# DIVIDER = "DIVIDER"
|
123
|
+
DISPLAY_CASE_ORDER: list[CaseType] = [
|
16
124
|
CaseType.Performance768D100M,
|
17
125
|
CaseType.Performance768D10M,
|
18
126
|
CaseType.Performance768D1M,
|
19
|
-
DIVIDER,
|
20
127
|
CaseType.Performance1536D5M,
|
21
128
|
CaseType.Performance1536D500K,
|
22
|
-
|
129
|
+
CaseType.Performance1536D50K,
|
23
130
|
CaseType.Performance768D10M1P,
|
24
131
|
CaseType.Performance768D1M1P,
|
25
|
-
DIVIDER,
|
26
132
|
CaseType.Performance1536D5M1P,
|
27
133
|
CaseType.Performance1536D500K1P,
|
28
|
-
DIVIDER,
|
29
134
|
CaseType.Performance768D10M99P,
|
30
135
|
CaseType.Performance768D1M99P,
|
31
|
-
DIVIDER,
|
32
136
|
CaseType.Performance1536D5M99P,
|
33
137
|
CaseType.Performance1536D500K99P,
|
34
|
-
DIVIDER,
|
35
138
|
CaseType.CapacityDim960,
|
36
139
|
CaseType.CapacityDim128,
|
37
140
|
]
|
141
|
+
CASE_NAME_ORDER = [case.case_cls().name for case in DISPLAY_CASE_ORDER]
|
38
142
|
|
39
|
-
CASE_LIST = [
|
143
|
+
# CASE_LIST = [
|
144
|
+
# item for item in CASE_LIST_WITH_DIVIDER if isinstance(item, CaseType)]
|
40
145
|
|
41
146
|
|
42
147
|
class InputType(IntEnum):
|
@@ -52,7 +157,7 @@ class CaseConfigInput(BaseModel):
|
|
52
157
|
inputHelp: str = ""
|
53
158
|
displayLabel: str = ""
|
54
159
|
# todo type should be a function
|
55
|
-
isDisplayed: typing.Any = lambda
|
160
|
+
isDisplayed: typing.Any = lambda config: True
|
56
161
|
|
57
162
|
|
58
163
|
CaseConfigParamInput_IndexType = CaseConfigInput(
|
@@ -145,7 +250,7 @@ CaseConfigParamInput_EFConstruction_ES = CaseConfigInput(
|
|
145
250
|
CaseConfigParamInput_maintenance_work_mem_PgVector = CaseConfigInput(
|
146
251
|
label=CaseConfigParamType.maintenance_work_mem,
|
147
252
|
inputHelp="Recommended value: 1.33x the index size, not to exceed the available free memory."
|
148
|
-
|
253
|
+
"Specify in gigabytes. e.g. 8GB",
|
149
254
|
inputType=InputType.Text,
|
150
255
|
inputConfig={
|
151
256
|
"value": "8GB",
|
@@ -156,7 +261,7 @@ CaseConfigParamInput_max_parallel_workers_PgVector = CaseConfigInput(
|
|
156
261
|
label=CaseConfigParamType.max_parallel_workers,
|
157
262
|
displayLabel="Max parallel workers",
|
158
263
|
inputHelp="Recommended value: (cpu cores - 1). This will set the parameters: max_parallel_maintenance_workers,"
|
159
|
-
|
264
|
+
" max_parallel_workers & table(parallel_workers)",
|
160
265
|
inputType=InputType.Number,
|
161
266
|
inputConfig={
|
162
267
|
"min": 0,
|
@@ -513,7 +618,8 @@ CaseConfigParamInput_QuantizationRatio_PgVectoRS = CaseConfigInput(
|
|
513
618
|
"options": ["x4", "x8", "x16", "x32", "x64"],
|
514
619
|
},
|
515
620
|
isDisplayed=lambda config: config.get(CaseConfigParamType.quantizationType, None)
|
516
|
-
== "product"
|
621
|
+
== "product"
|
622
|
+
and config.get(CaseConfigParamType.IndexType, None)
|
517
623
|
in [
|
518
624
|
IndexType.HNSW.value,
|
519
625
|
IndexType.IVFFlat.value,
|
@@ -581,22 +687,24 @@ ESPerformanceConfig = [
|
|
581
687
|
CaseConfigParamInput_NumCandidates_ES,
|
582
688
|
]
|
583
689
|
|
584
|
-
PgVectorLoadingConfig = [
|
585
|
-
|
586
|
-
|
587
|
-
|
588
|
-
|
589
|
-
|
590
|
-
|
591
|
-
|
592
|
-
|
593
|
-
|
594
|
-
|
595
|
-
|
596
|
-
|
597
|
-
|
598
|
-
|
599
|
-
|
690
|
+
PgVectorLoadingConfig = [
|
691
|
+
CaseConfigParamInput_IndexType_PgVector,
|
692
|
+
CaseConfigParamInput_Lists_PgVector,
|
693
|
+
CaseConfigParamInput_m,
|
694
|
+
CaseConfigParamInput_EFConstruction_PgVector,
|
695
|
+
CaseConfigParamInput_maintenance_work_mem_PgVector,
|
696
|
+
CaseConfigParamInput_max_parallel_workers_PgVector,
|
697
|
+
]
|
698
|
+
PgVectorPerformanceConfig = [
|
699
|
+
CaseConfigParamInput_IndexType_PgVector,
|
700
|
+
CaseConfigParamInput_m,
|
701
|
+
CaseConfigParamInput_EFConstruction_PgVector,
|
702
|
+
CaseConfigParamInput_EFSearch_PgVector,
|
703
|
+
CaseConfigParamInput_Lists_PgVector,
|
704
|
+
CaseConfigParamInput_Probes_PgVector,
|
705
|
+
CaseConfigParamInput_maintenance_work_mem_PgVector,
|
706
|
+
CaseConfigParamInput_max_parallel_workers_PgVector,
|
707
|
+
]
|
600
708
|
|
601
709
|
PgVectoRSLoadingConfig = [
|
602
710
|
CaseConfigParamInput_IndexType,
|
@@ -46,6 +46,7 @@ DB_TO_ICON = {
|
|
46
46
|
DB.PgVectoRS: "https://assets.zilliz.com/PG_Vector_d464f2ef5f.png",
|
47
47
|
DB.Redis: "https://assets.zilliz.com/Redis_Cloud_74b8bfef39.png",
|
48
48
|
DB.Chroma: "https://assets.zilliz.com/chroma_ceb3f06ed7.png",
|
49
|
+
DB.AWSOpenSearch: "https://assets.zilliz.com/opensearch_1eee37584e.jpeg",
|
49
50
|
}
|
50
51
|
|
51
52
|
# RedisCloud color: #0D6EFD
|
@@ -59,4 +60,5 @@ COLOR_MAP = {
|
|
59
60
|
DB.WeaviateCloud.value: "#20C997",
|
60
61
|
DB.PgVector.value: "#4C779A",
|
61
62
|
DB.Redis.value: "#0D6EFD",
|
63
|
+
DB.AWSOpenSearch.value: "#0DCAF0",
|
62
64
|
}
|
@@ -0,0 +1,65 @@
|
|
1
|
+
import streamlit as st
|
2
|
+
from vectordb_bench.frontend.components.check_results.footer import footer
|
3
|
+
from vectordb_bench.frontend.components.check_results.headerIcon import drawHeaderIcon
|
4
|
+
from vectordb_bench.frontend.components.check_results.nav import (
|
5
|
+
NavToResults,
|
6
|
+
NavToRunTest,
|
7
|
+
)
|
8
|
+
from vectordb_bench.frontend.components.check_results.filters import getshownData
|
9
|
+
from vectordb_bench.frontend.components.concurrent.charts import drawChartsByCase
|
10
|
+
from vectordb_bench.frontend.components.get_results.saveAsImage import getResults
|
11
|
+
from vectordb_bench.frontend.config.styles import FAVICON
|
12
|
+
from vectordb_bench.interface import benchMarkRunner
|
13
|
+
from vectordb_bench.models import TestResult
|
14
|
+
|
15
|
+
|
16
|
+
def main():
|
17
|
+
# set page config
|
18
|
+
st.set_page_config(
|
19
|
+
page_title="VDBBench Conc Perf",
|
20
|
+
page_icon=FAVICON,
|
21
|
+
layout="wide",
|
22
|
+
# initial_sidebar_state="collapsed",
|
23
|
+
)
|
24
|
+
|
25
|
+
# header
|
26
|
+
drawHeaderIcon(st)
|
27
|
+
|
28
|
+
allResults = benchMarkRunner.get_results()
|
29
|
+
|
30
|
+
def check_conc_data(res: TestResult):
|
31
|
+
case_results = res.results
|
32
|
+
count = 0
|
33
|
+
for case_result in case_results:
|
34
|
+
if len(case_result.metrics.conc_num_list) > 0:
|
35
|
+
count += 1
|
36
|
+
|
37
|
+
return count > 0
|
38
|
+
|
39
|
+
checkedResults = [res for res in allResults if check_conc_data(res)]
|
40
|
+
|
41
|
+
st.title("VectorDB Benchmark (Concurrent Performance)")
|
42
|
+
|
43
|
+
# results selector
|
44
|
+
resultSelectorContainer = st.sidebar.container()
|
45
|
+
shownData, _, showCaseNames = getshownData(checkedResults, resultSelectorContainer)
|
46
|
+
|
47
|
+
resultSelectorContainer.divider()
|
48
|
+
|
49
|
+
# nav
|
50
|
+
navContainer = st.sidebar.container()
|
51
|
+
NavToRunTest(navContainer)
|
52
|
+
NavToResults(navContainer)
|
53
|
+
|
54
|
+
# save or share
|
55
|
+
resultesContainer = st.sidebar.container()
|
56
|
+
getResults(resultesContainer, "vectordb_bench_concurrent")
|
57
|
+
|
58
|
+
drawChartsByCase(shownData, showCaseNames, st.container())
|
59
|
+
|
60
|
+
# footer
|
61
|
+
footer(st.container())
|
62
|
+
|
63
|
+
|
64
|
+
if __name__ == "__main__":
|
65
|
+
main()
|
@@ -0,0 +1,64 @@
|
|
1
|
+
import streamlit as st
|
2
|
+
from vectordb_bench.frontend.components.check_results.headerIcon import drawHeaderIcon
|
3
|
+
from vectordb_bench.frontend.components.custom.displayCustomCase import displayCustomCase
|
4
|
+
from vectordb_bench.frontend.components.custom.displaypPrams import displayParams
|
5
|
+
from vectordb_bench.frontend.components.custom.getCustomConfig import CustomCaseConfig, generate_custom_case, get_custom_configs, save_custom_configs
|
6
|
+
from vectordb_bench.frontend.components.custom.initStyle import initStyle
|
7
|
+
from vectordb_bench.frontend.config.styles import FAVICON, PAGE_TITLE
|
8
|
+
|
9
|
+
|
10
|
+
class CustomCaseManager():
|
11
|
+
customCaseItems: list[CustomCaseConfig]
|
12
|
+
|
13
|
+
def __init__(self):
|
14
|
+
self.customCaseItems = get_custom_configs()
|
15
|
+
|
16
|
+
def addCase(self):
|
17
|
+
new_custom_case = generate_custom_case()
|
18
|
+
new_custom_case.dataset_config.name = f"{new_custom_case.dataset_config.name} {len(self.customCaseItems)}"
|
19
|
+
self.customCaseItems += [new_custom_case]
|
20
|
+
self.save()
|
21
|
+
|
22
|
+
def deleteCase(self, idx: int):
|
23
|
+
self.customCaseItems.pop(idx)
|
24
|
+
self.save()
|
25
|
+
|
26
|
+
def save(self):
|
27
|
+
save_custom_configs(self.customCaseItems)
|
28
|
+
|
29
|
+
|
30
|
+
def main():
|
31
|
+
st.set_page_config(
|
32
|
+
page_title=PAGE_TITLE,
|
33
|
+
page_icon=FAVICON,
|
34
|
+
# layout="wide",
|
35
|
+
# initial_sidebar_state="collapsed",
|
36
|
+
)
|
37
|
+
|
38
|
+
# header
|
39
|
+
drawHeaderIcon(st)
|
40
|
+
|
41
|
+
# init style
|
42
|
+
initStyle(st)
|
43
|
+
|
44
|
+
st.title("Custom Dataset")
|
45
|
+
displayParams(st)
|
46
|
+
customCaseManager = CustomCaseManager()
|
47
|
+
|
48
|
+
for idx, customCase in enumerate(customCaseManager.customCaseItems):
|
49
|
+
expander = st.expander(customCase.dataset_config.name, expanded=True)
|
50
|
+
key = f"custom_case_{idx}"
|
51
|
+
displayCustomCase(customCase, expander, key=key)
|
52
|
+
|
53
|
+
columns = expander.columns(8)
|
54
|
+
columns[0].button(
|
55
|
+
"Save", key=f"{key}_", type="secondary", on_click=lambda: customCaseManager.save())
|
56
|
+
columns[1].button(":red[Delete]", key=f"{key}_delete", type="secondary",
|
57
|
+
on_click=lambda: customCaseManager.deleteCase(idx))
|
58
|
+
|
59
|
+
st.button("\+ New Dataset", key=f"add_custom_configs",
|
60
|
+
type="primary", on_click=lambda: customCaseManager.addCase())
|
61
|
+
|
62
|
+
|
63
|
+
if __name__ == "__main__":
|
64
|
+
main()
|
@@ -8,7 +8,7 @@ from vectordb_bench.frontend.components.check_results.nav import NavToResults, N
|
|
8
8
|
from vectordb_bench.frontend.components.check_results.charts import drawMetricChart
|
9
9
|
from vectordb_bench.frontend.components.check_results.filters import getshownData
|
10
10
|
from vectordb_bench.frontend.components.get_results.saveAsImage import getResults
|
11
|
-
from vectordb_bench.frontend.
|
11
|
+
from vectordb_bench.frontend.config.styles import *
|
12
12
|
from vectordb_bench.interface import benchMarkRunner
|
13
13
|
from vectordb_bench.metric import QURIES_PER_DOLLAR_METRIC
|
14
14
|
|
@@ -26,7 +26,7 @@ def main():
|
|
26
26
|
|
27
27
|
# results selector
|
28
28
|
resultSelectorContainer = st.sidebar.container()
|
29
|
-
shownData, _,
|
29
|
+
shownData, _, showCaseNames = getshownData(allResults, resultSelectorContainer)
|
30
30
|
|
31
31
|
resultSelectorContainer.divider()
|
32
32
|
|
@@ -45,8 +45,8 @@ def main():
|
|
45
45
|
priceMap = priceTable(priceTableContainer, shownData)
|
46
46
|
|
47
47
|
# charts
|
48
|
-
for
|
49
|
-
data = [data for data in shownData if data["case_name"] ==
|
48
|
+
for caseName in showCaseNames:
|
49
|
+
data = [data for data in shownData if data["case_name"] == caseName]
|
50
50
|
dataWithMetric = []
|
51
51
|
metric = QURIES_PER_DOLLAR_METRIC
|
52
52
|
for d in data:
|
@@ -56,7 +56,7 @@ def main():
|
|
56
56
|
d[metric] = d["qps"] / price * 3.6
|
57
57
|
dataWithMetric.append(d)
|
58
58
|
if len(dataWithMetric) > 0:
|
59
|
-
chartContainer = st.expander(
|
59
|
+
chartContainer = st.expander(caseName, True)
|
60
60
|
drawMetricChart(data, metric, chartContainer)
|
61
61
|
|
62
62
|
# footer
|
@@ -5,6 +5,7 @@ from vectordb_bench.frontend.components.run_test.dbConfigSetting import dbConfig
|
|
5
5
|
from vectordb_bench.frontend.components.run_test.dbSelector import dbSelector
|
6
6
|
from vectordb_bench.frontend.components.run_test.generateTasks import generate_tasks
|
7
7
|
from vectordb_bench.frontend.components.run_test.hideSidebar import hideSidebar
|
8
|
+
from vectordb_bench.frontend.components.run_test.initStyle import initStyle
|
8
9
|
from vectordb_bench.frontend.components.run_test.submitTask import submitTask
|
9
10
|
from vectordb_bench.frontend.components.check_results.nav import NavToResults
|
10
11
|
from vectordb_bench.frontend.components.check_results.headerIcon import drawHeaderIcon
|
@@ -15,6 +16,9 @@ def main():
|
|
15
16
|
# set page config
|
16
17
|
initRunTestPageConfig(st)
|
17
18
|
|
19
|
+
# init style
|
20
|
+
initStyle(st)
|
21
|
+
|
18
22
|
# header
|
19
23
|
drawHeaderIcon(st)
|
20
24
|
|
@@ -0,0 +1,24 @@
|
|
1
|
+
import streamlit as st
|
2
|
+
from vectordb_bench.frontend.components.check_results.headerIcon import drawHeaderIcon
|
3
|
+
from vectordb_bench.frontend.components.tables.data import getNewResults
|
4
|
+
from vectordb_bench.frontend.config.styles import FAVICON
|
5
|
+
|
6
|
+
|
7
|
+
def main():
|
8
|
+
# set page config
|
9
|
+
st.set_page_config(
|
10
|
+
page_title="Table",
|
11
|
+
page_icon=FAVICON,
|
12
|
+
layout="wide",
|
13
|
+
# initial_sidebar_state="collapsed",
|
14
|
+
)
|
15
|
+
|
16
|
+
# header
|
17
|
+
drawHeaderIcon(st)
|
18
|
+
|
19
|
+
df = getNewResults()
|
20
|
+
st.dataframe(df, height=800)
|
21
|
+
|
22
|
+
|
23
|
+
if __name__ == "__main__":
|
24
|
+
main()
|
vectordb_bench/frontend/utils.py
CHANGED
@@ -1,6 +1,22 @@
|
|
1
|
-
|
1
|
+
import random
|
2
|
+
import string
|
3
|
+
|
2
4
|
|
3
5
|
passwordKeys = ["password", "api_key"]
|
6
|
+
|
7
|
+
|
4
8
|
def inputIsPassword(key: str) -> bool:
|
5
9
|
return key.lower() in passwordKeys
|
6
10
|
|
11
|
+
|
12
|
+
def addHorizontalLine(st):
|
13
|
+
st.markdown(
|
14
|
+
"<div style='border: 1px solid #cccccc60; margin-bottom: 24px;'></div>",
|
15
|
+
unsafe_allow_html=True,
|
16
|
+
)
|
17
|
+
|
18
|
+
|
19
|
+
def generate_random_string(length):
|
20
|
+
letters = string.ascii_letters + string.digits
|
21
|
+
result = ''.join(random.choice(letters) for _ in range(length))
|
22
|
+
return result
|
@@ -6,7 +6,7 @@ from vectordb_bench.frontend.components.check_results.nav import NavToQuriesPerD
|
|
6
6
|
from vectordb_bench.frontend.components.check_results.charts import drawCharts
|
7
7
|
from vectordb_bench.frontend.components.check_results.filters import getshownData
|
8
8
|
from vectordb_bench.frontend.components.get_results.saveAsImage import getResults
|
9
|
-
from vectordb_bench.frontend.
|
9
|
+
from vectordb_bench.frontend.config.styles import *
|
10
10
|
from vectordb_bench.interface import benchMarkRunner
|
11
11
|
|
12
12
|
|
@@ -24,7 +24,7 @@ def main():
|
|
24
24
|
|
25
25
|
# results selector and filter
|
26
26
|
resultSelectorContainer = st.sidebar.container()
|
27
|
-
shownData, failedTasks,
|
27
|
+
shownData, failedTasks, showCaseNames = getshownData(
|
28
28
|
allResults, resultSelectorContainer
|
29
29
|
)
|
30
30
|
|
@@ -40,7 +40,7 @@ def main():
|
|
40
40
|
getResults(resultesContainer, "vectordb_bench")
|
41
41
|
|
42
42
|
# charts
|
43
|
-
drawCharts(st, shownData, failedTasks,
|
43
|
+
drawCharts(st, shownData, failedTasks, showCaseNames)
|
44
44
|
|
45
45
|
# footer
|
46
46
|
footer(st.container())
|
vectordb_bench/interface.py
CHANGED
@@ -1,38 +1,33 @@
|
|
1
|
-
import
|
1
|
+
import concurrent.futures
|
2
|
+
import logging
|
3
|
+
import multiprocessing as mp
|
2
4
|
import pathlib
|
3
5
|
import signal
|
4
|
-
import
|
6
|
+
import traceback
|
5
7
|
import uuid
|
6
|
-
import
|
7
|
-
import multiprocessing as mp
|
8
|
+
from enum import Enum
|
8
9
|
from multiprocessing.connection import Connection
|
9
10
|
|
10
11
|
import psutil
|
11
|
-
from enum import Enum
|
12
12
|
|
13
13
|
from . import config
|
14
|
-
from .metric import Metric
|
15
|
-
from .models import (
|
16
|
-
TaskConfig,
|
17
|
-
TestResult,
|
18
|
-
CaseResult,
|
19
|
-
LoadTimeoutError,
|
20
|
-
PerformanceTimeoutError,
|
21
|
-
ResultLabel,
|
22
|
-
)
|
23
|
-
from .backend.result_collector import ResultCollector
|
24
14
|
from .backend.assembler import Assembler
|
25
|
-
from .backend.task_runner import TaskRunner
|
26
15
|
from .backend.data_source import DatasetSource
|
16
|
+
from .backend.result_collector import ResultCollector
|
17
|
+
from .backend.task_runner import TaskRunner
|
18
|
+
from .metric import Metric
|
19
|
+
from .models import (CaseResult, LoadTimeoutError, PerformanceTimeoutError,
|
20
|
+
ResultLabel, TaskConfig, TaskStage, TestResult)
|
27
21
|
|
28
22
|
log = logging.getLogger(__name__)
|
29
23
|
|
30
24
|
global_result_future: concurrent.futures.Future | None = None
|
31
25
|
|
26
|
+
|
32
27
|
class SIGNAL(Enum):
|
33
|
-
SUCCESS=0
|
34
|
-
ERROR=1
|
35
|
-
WIP=2
|
28
|
+
SUCCESS = 0
|
29
|
+
ERROR = 1
|
30
|
+
WIP = 2
|
36
31
|
|
37
32
|
|
38
33
|
class BenchMarkRunner:
|
@@ -42,9 +37,11 @@ class BenchMarkRunner:
|
|
42
37
|
self.drop_old: bool = True
|
43
38
|
self.dataset_source: DatasetSource = DatasetSource.S3
|
44
39
|
|
40
|
+
|
45
41
|
def set_drop_old(self, drop_old: bool):
|
46
42
|
self.drop_old = drop_old
|
47
43
|
|
44
|
+
|
48
45
|
def set_download_address(self, use_aliyun: bool):
|
49
46
|
if use_aliyun:
|
50
47
|
self.dataset_source = DatasetSource.AliyunOSS
|
@@ -152,13 +149,13 @@ class BenchMarkRunner:
|
|
152
149
|
latest_runner, cached_load_duration = None, None
|
153
150
|
for idx, runner in enumerate(running_task.case_runners):
|
154
151
|
case_res = CaseResult(
|
155
|
-
result_id=idx,
|
156
152
|
metrics=Metric(),
|
157
153
|
task_config=runner.config,
|
158
154
|
)
|
159
155
|
|
160
156
|
# drop_old = False if latest_runner and runner == latest_runner else config.DROP_OLD
|
161
|
-
drop_old = config.DROP_OLD
|
157
|
+
# drop_old = config.DROP_OLD
|
158
|
+
drop_old = TaskStage.DROP_OLD in runner.config.stages
|
162
159
|
if latest_runner and runner == latest_runner:
|
163
160
|
drop_old = False
|
164
161
|
elif not self.drop_old:
|
@@ -167,7 +164,7 @@ class BenchMarkRunner:
|
|
167
164
|
log.info(f"[{idx+1}/{running_task.num_cases()}] start case: {runner.display()}, drop_old={drop_old}")
|
168
165
|
case_res.metrics = runner.run(drop_old)
|
169
166
|
log.info(f"[{idx+1}/{running_task.num_cases()}] finish case: {runner.display()}, "
|
170
|
-
|
167
|
+
f"result={case_res.metrics}, label={case_res.label}")
|
171
168
|
|
172
169
|
# cache the latest succeeded runner
|
173
170
|
latest_runner = runner
|
@@ -193,7 +190,6 @@ class BenchMarkRunner:
|
|
193
190
|
c_results.append(case_res)
|
194
191
|
send_conn.send((SIGNAL.WIP, idx))
|
195
192
|
|
196
|
-
|
197
193
|
test_result = TestResult(
|
198
194
|
run_id=running_task.run_id,
|
199
195
|
task_label=running_task.task_label,
|
@@ -204,7 +200,7 @@ class BenchMarkRunner:
|
|
204
200
|
|
205
201
|
send_conn.send((SIGNAL.SUCCESS, None))
|
206
202
|
send_conn.close()
|
207
|
-
log.info(f"
|
203
|
+
log.info(f"Success to finish task: label={running_task.task_label}, run_id={running_task.run_id}")
|
208
204
|
|
209
205
|
except Exception as e:
|
210
206
|
err_msg = f"An error occurs when running task={running_task.task_label}, run_id={running_task.run_id}, err={e}"
|
@@ -246,7 +242,7 @@ class BenchMarkRunner:
|
|
246
242
|
called as soon as a child terminates.
|
247
243
|
"""
|
248
244
|
children = psutil.Process().children(recursive=True)
|
249
|
-
for p in
|
245
|
+
for p in children:
|
250
246
|
try:
|
251
247
|
log.warning(f"sending SIGTERM to child process: {p}")
|
252
248
|
p.send_signal(sig)
|
vectordb_bench/metric.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
import logging
|
2
2
|
import numpy as np
|
3
3
|
|
4
|
-
from dataclasses import dataclass
|
4
|
+
from dataclasses import dataclass, field
|
5
5
|
|
6
6
|
|
7
7
|
log = logging.getLogger(__name__)
|
@@ -19,6 +19,10 @@ class Metric:
|
|
19
19
|
qps: float = 0.0
|
20
20
|
serial_latency_p99: float = 0.0
|
21
21
|
recall: float = 0.0
|
22
|
+
ndcg: float = 0.0
|
23
|
+
conc_num_list: list[int] = field(default_factory=list)
|
24
|
+
conc_qps_list: list[float] = field(default_factory=list)
|
25
|
+
conc_latency_p99_list: list[float] = field(default_factory=list)
|
22
26
|
|
23
27
|
|
24
28
|
QURIES_PER_DOLLAR_METRIC = "QP$ (Quries per Dollar)"
|
@@ -60,3 +64,21 @@ def calc_recall(count: int, ground_truth: list[int], got: list[int]) -> float:
|
|
60
64
|
recalls[i] = 1
|
61
65
|
|
62
66
|
return np.mean(recalls)
|
67
|
+
|
68
|
+
|
69
|
+
def get_ideal_dcg(k: int):
|
70
|
+
ideal_dcg = 0
|
71
|
+
for i in range(k):
|
72
|
+
ideal_dcg += 1 / np.log2(i+2)
|
73
|
+
|
74
|
+
return ideal_dcg
|
75
|
+
|
76
|
+
|
77
|
+
def calc_ndcg(ground_truth: list[int], got: list[int], ideal_dcg: float) -> float:
|
78
|
+
dcg = 0
|
79
|
+
ground_truth = list(ground_truth)
|
80
|
+
for id in set(got):
|
81
|
+
if id in ground_truth:
|
82
|
+
idx = ground_truth.index(id)
|
83
|
+
dcg += 1 / np.log2(idx+2)
|
84
|
+
return dcg / ideal_dcg
|