vectordb-bench 0.0.10__py3-none-any.whl → 0.0.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vectordb_bench/__init__.py +19 -5
- vectordb_bench/backend/assembler.py +1 -1
- vectordb_bench/backend/cases.py +93 -27
- vectordb_bench/backend/clients/__init__.py +14 -0
- vectordb_bench/backend/clients/api.py +1 -1
- vectordb_bench/backend/clients/aws_opensearch/aws_opensearch.py +159 -0
- vectordb_bench/backend/clients/aws_opensearch/cli.py +44 -0
- vectordb_bench/backend/clients/aws_opensearch/config.py +58 -0
- vectordb_bench/backend/clients/aws_opensearch/run.py +125 -0
- vectordb_bench/backend/clients/milvus/cli.py +291 -0
- vectordb_bench/backend/clients/milvus/milvus.py +13 -6
- vectordb_bench/backend/clients/pgvector/cli.py +116 -0
- vectordb_bench/backend/clients/pgvector/config.py +1 -1
- vectordb_bench/backend/clients/pgvector/pgvector.py +7 -4
- vectordb_bench/backend/clients/redis/cli.py +74 -0
- vectordb_bench/backend/clients/test/cli.py +25 -0
- vectordb_bench/backend/clients/test/config.py +18 -0
- vectordb_bench/backend/clients/test/test.py +62 -0
- vectordb_bench/backend/clients/weaviate_cloud/cli.py +41 -0
- vectordb_bench/backend/clients/zilliz_cloud/cli.py +55 -0
- vectordb_bench/backend/dataset.py +27 -5
- vectordb_bench/backend/runner/mp_runner.py +14 -3
- vectordb_bench/backend/runner/serial_runner.py +7 -3
- vectordb_bench/backend/task_runner.py +76 -26
- vectordb_bench/cli/__init__.py +0 -0
- vectordb_bench/cli/cli.py +362 -0
- vectordb_bench/cli/vectordbbench.py +22 -0
- vectordb_bench/config-files/sample_config.yml +17 -0
- vectordb_bench/custom/custom_case.json +18 -0
- vectordb_bench/frontend/components/check_results/charts.py +6 -6
- vectordb_bench/frontend/components/check_results/data.py +23 -20
- vectordb_bench/frontend/components/check_results/expanderStyle.py +1 -1
- vectordb_bench/frontend/components/check_results/filters.py +20 -13
- vectordb_bench/frontend/components/check_results/headerIcon.py +1 -1
- vectordb_bench/frontend/components/check_results/priceTable.py +1 -1
- vectordb_bench/frontend/components/check_results/stPageConfig.py +1 -1
- vectordb_bench/frontend/components/concurrent/charts.py +79 -0
- vectordb_bench/frontend/components/custom/displayCustomCase.py +31 -0
- vectordb_bench/frontend/components/custom/displaypPrams.py +11 -0
- vectordb_bench/frontend/components/custom/getCustomConfig.py +40 -0
- vectordb_bench/frontend/components/custom/initStyle.py +15 -0
- vectordb_bench/frontend/components/run_test/autoRefresh.py +1 -1
- vectordb_bench/frontend/components/run_test/caseSelector.py +40 -28
- vectordb_bench/frontend/components/run_test/dbConfigSetting.py +1 -5
- vectordb_bench/frontend/components/run_test/dbSelector.py +8 -14
- vectordb_bench/frontend/components/run_test/generateTasks.py +3 -5
- vectordb_bench/frontend/components/run_test/initStyle.py +14 -0
- vectordb_bench/frontend/components/run_test/submitTask.py +13 -5
- vectordb_bench/frontend/components/tables/data.py +44 -0
- vectordb_bench/frontend/{const → config}/dbCaseConfigs.py +140 -32
- vectordb_bench/frontend/{const → config}/styles.py +2 -0
- vectordb_bench/frontend/pages/concurrent.py +65 -0
- vectordb_bench/frontend/pages/custom.py +64 -0
- vectordb_bench/frontend/pages/quries_per_dollar.py +5 -5
- vectordb_bench/frontend/pages/run_test.py +4 -0
- vectordb_bench/frontend/pages/tables.py +24 -0
- vectordb_bench/frontend/utils.py +17 -1
- vectordb_bench/frontend/vdb_benchmark.py +3 -3
- vectordb_bench/interface.py +21 -25
- vectordb_bench/metric.py +23 -1
- vectordb_bench/models.py +45 -1
- vectordb_bench/results/getLeaderboardData.py +1 -1
- {vectordb_bench-0.0.10.dist-info → vectordb_bench-0.0.12.dist-info}/METADATA +228 -14
- vectordb_bench-0.0.12.dist-info/RECORD +115 -0
- {vectordb_bench-0.0.10.dist-info → vectordb_bench-0.0.12.dist-info}/WHEEL +1 -1
- {vectordb_bench-0.0.10.dist-info → vectordb_bench-0.0.12.dist-info}/entry_points.txt +1 -0
- vectordb_bench-0.0.10.dist-info/RECORD +0 -88
- /vectordb_bench/frontend/{const → config}/dbPrices.py +0 -0
- {vectordb_bench-0.0.10.dist-info → vectordb_bench-0.0.12.dist-info}/LICENSE +0 -0
- {vectordb_bench-0.0.10.dist-info → vectordb_bench-0.0.12.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,79 @@
|
|
1
|
+
from vectordb_bench.frontend.components.check_results.expanderStyle import (
|
2
|
+
initMainExpanderStyle,
|
3
|
+
)
|
4
|
+
import plotly.express as px
|
5
|
+
|
6
|
+
from vectordb_bench.frontend.config.styles import COLOR_MAP
|
7
|
+
|
8
|
+
|
9
|
+
def drawChartsByCase(allData, showCaseNames: list[str], st):
|
10
|
+
initMainExpanderStyle(st)
|
11
|
+
for caseName in showCaseNames:
|
12
|
+
chartContainer = st.expander(caseName, True)
|
13
|
+
caseDataList = [data for data in allData if data["case_name"] == caseName]
|
14
|
+
data = [
|
15
|
+
{
|
16
|
+
"conc_num": caseData["conc_num_list"][i],
|
17
|
+
"qps": caseData["conc_qps_list"][i],
|
18
|
+
"latency_p99": caseData["conc_latency_p99_list"][i] * 1000,
|
19
|
+
"db_name": caseData["db_name"],
|
20
|
+
"db": caseData["db"],
|
21
|
+
}
|
22
|
+
for caseData in caseDataList
|
23
|
+
for i in range(len(caseData["conc_num_list"]))
|
24
|
+
]
|
25
|
+
drawChart(data, chartContainer)
|
26
|
+
|
27
|
+
|
28
|
+
def getRange(metric, data, padding_multipliers):
|
29
|
+
minV = min([d.get(metric, 0) for d in data])
|
30
|
+
maxV = max([d.get(metric, 0) for d in data])
|
31
|
+
padding = maxV - minV
|
32
|
+
rangeV = [
|
33
|
+
minV - padding * padding_multipliers[0],
|
34
|
+
maxV + padding * padding_multipliers[1],
|
35
|
+
]
|
36
|
+
return rangeV
|
37
|
+
|
38
|
+
|
39
|
+
def drawChart(data, st):
|
40
|
+
if len(data) == 0:
|
41
|
+
return
|
42
|
+
|
43
|
+
x = "latency_p99"
|
44
|
+
xrange = getRange(x, data, [0.05, 0.1])
|
45
|
+
|
46
|
+
y = "qps"
|
47
|
+
yrange = getRange(y, data, [0.2, 0.1])
|
48
|
+
|
49
|
+
color = "db"
|
50
|
+
color_discrete_map = COLOR_MAP
|
51
|
+
color = "db_name"
|
52
|
+
color_discrete_map = None
|
53
|
+
line_group = "db_name"
|
54
|
+
text = "conc_num"
|
55
|
+
|
56
|
+
data.sort(key=lambda a: a["conc_num"])
|
57
|
+
|
58
|
+
fig = px.line(
|
59
|
+
data,
|
60
|
+
x=x,
|
61
|
+
y=y,
|
62
|
+
color=color,
|
63
|
+
color_discrete_map=color_discrete_map,
|
64
|
+
line_group=line_group,
|
65
|
+
text=text,
|
66
|
+
markers=True,
|
67
|
+
hover_data={
|
68
|
+
"conc_num": True,
|
69
|
+
},
|
70
|
+
height=720,
|
71
|
+
)
|
72
|
+
fig.update_xaxes(range=xrange, title_text="Latency P99 (ms)")
|
73
|
+
fig.update_yaxes(range=yrange, title_text="QPS")
|
74
|
+
fig.update_traces(textposition="bottom right", texttemplate="conc-%{text:,.4~r}")
|
75
|
+
|
76
|
+
st.plotly_chart(
|
77
|
+
fig,
|
78
|
+
use_container_width=True,
|
79
|
+
)
|
@@ -0,0 +1,31 @@
|
|
1
|
+
|
2
|
+
from vectordb_bench.frontend.components.custom.getCustomConfig import CustomCaseConfig
|
3
|
+
|
4
|
+
|
5
|
+
def displayCustomCase(customCase: CustomCaseConfig, st, key):
|
6
|
+
|
7
|
+
columns = st.columns([1, 2])
|
8
|
+
customCase.dataset_config.name = columns[0].text_input(
|
9
|
+
"Name", key=f"{key}_name", value=customCase.dataset_config.name)
|
10
|
+
customCase.name = f"{customCase.dataset_config.name} (Performace Case)"
|
11
|
+
customCase.dataset_config.dir = columns[1].text_input(
|
12
|
+
"Folder Path", key=f"{key}_dir", value=customCase.dataset_config.dir)
|
13
|
+
|
14
|
+
columns = st.columns(4)
|
15
|
+
customCase.dataset_config.dim = columns[0].number_input(
|
16
|
+
"dim", key=f"{key}_dim", value=customCase.dataset_config.dim)
|
17
|
+
customCase.dataset_config.size = columns[1].number_input(
|
18
|
+
"size", key=f"{key}_size", value=customCase.dataset_config.size)
|
19
|
+
customCase.dataset_config.metric_type = columns[2].selectbox(
|
20
|
+
"metric type", key=f"{key}_metric_type", options=["L2", "Cosine", "IP"])
|
21
|
+
customCase.dataset_config.file_count = columns[3].number_input(
|
22
|
+
"train file count", key=f"{key}_file_count", value=customCase.dataset_config.file_count)
|
23
|
+
|
24
|
+
columns = st.columns(4)
|
25
|
+
customCase.dataset_config.use_shuffled = columns[0].checkbox(
|
26
|
+
"use shuffled data", key=f"{key}_use_shuffled", value=customCase.dataset_config.use_shuffled)
|
27
|
+
customCase.dataset_config.with_gt = columns[1].checkbox(
|
28
|
+
"with groundtruth", key=f"{key}_with_gt", value=customCase.dataset_config.with_gt)
|
29
|
+
|
30
|
+
customCase.description = st.text_area(
|
31
|
+
"description", key=f"{key}_description", value=customCase.description)
|
@@ -0,0 +1,11 @@
|
|
1
|
+
def displayParams(st):
|
2
|
+
st.markdown("""
|
3
|
+
- `Folder Path` - The path to the folder containing all the files. Please ensure that all files in the folder are in the `Parquet` format.
|
4
|
+
- Vectors data files: The file must be named `train.parquet` and should have two columns: `id` as an incrementing `int` and `emb` as an array of `float32`.
|
5
|
+
- Query test vectors: The file must be named `test.parquet` and should have two columns: `id` as an incrementing `int` and `emb` as an array of `float32`.
|
6
|
+
- Ground truth file: The file must be named `neighbors.parquet` and should have two columns: `id` corresponding to query vectors and `neighbors_id` as an array of `int`.
|
7
|
+
|
8
|
+
- `Train File Count` - If the vector file is too large, you can consider splitting it into multiple files. The naming format for the split files should be `train-[index]-of-[file_count].parquet`. For example, `train-01-of-10.parquet` represents the second file (0-indexed) among 10 split files.
|
9
|
+
|
10
|
+
- `Use Shuffled Data` - If you check this option, the vector data files need to be modified. VectorDBBench will load the data labeled with `shuffle`. For example, use `shuffle_train.parquet` instead of `train.parquet` and `shuffle_train-04-of-10.parquet` instead of `train-04-of-10.parquet`. The `id` column in the shuffled data can be in any order.
|
11
|
+
""")
|
@@ -0,0 +1,40 @@
|
|
1
|
+
import json
|
2
|
+
|
3
|
+
from pydantic import BaseModel
|
4
|
+
|
5
|
+
from vectordb_bench import config
|
6
|
+
|
7
|
+
|
8
|
+
class CustomDatasetConfig(BaseModel):
|
9
|
+
name: str = "custom_dataset"
|
10
|
+
dir: str = ""
|
11
|
+
size: int = 0
|
12
|
+
dim: int = 0
|
13
|
+
metric_type: str = "L2"
|
14
|
+
file_count: int = 1
|
15
|
+
use_shuffled: bool = False
|
16
|
+
with_gt: bool = True
|
17
|
+
|
18
|
+
|
19
|
+
class CustomCaseConfig(BaseModel):
|
20
|
+
name: str = "custom_dataset (Performace Case)"
|
21
|
+
description: str = ""
|
22
|
+
load_timeout: int = 36000
|
23
|
+
optimize_timeout: int = 36000
|
24
|
+
dataset_config: CustomDatasetConfig = CustomDatasetConfig()
|
25
|
+
|
26
|
+
|
27
|
+
def get_custom_configs():
|
28
|
+
with open(config.CUSTOM_CONFIG_DIR, "r") as f:
|
29
|
+
custom_configs = json.load(f)
|
30
|
+
return [CustomCaseConfig(**custom_config) for custom_config in custom_configs]
|
31
|
+
|
32
|
+
|
33
|
+
def save_custom_configs(custom_configs: list[CustomDatasetConfig]):
|
34
|
+
with open(config.CUSTOM_CONFIG_DIR, "w") as f:
|
35
|
+
json.dump([custom_config.dict()
|
36
|
+
for custom_config in custom_configs], f, indent=4)
|
37
|
+
|
38
|
+
|
39
|
+
def generate_custom_case():
|
40
|
+
return CustomCaseConfig()
|
@@ -0,0 +1,15 @@
|
|
1
|
+
def initStyle(st):
|
2
|
+
st.markdown(
|
3
|
+
"""<style>
|
4
|
+
/* expander - header */
|
5
|
+
.main div[data-testid='stExpander'] summary p {font-size: 20px; font-weight: 600;}
|
6
|
+
/*
|
7
|
+
button {
|
8
|
+
height: auto;
|
9
|
+
padding-left: 8px !important;
|
10
|
+
padding-right: 6px !important;
|
11
|
+
}
|
12
|
+
*/
|
13
|
+
</style>""",
|
14
|
+
unsafe_allow_html=True,
|
15
|
+
)
|
@@ -1,9 +1,13 @@
|
|
1
|
-
|
1
|
+
|
2
|
+
from vectordb_bench.frontend.config.styles import *
|
2
3
|
from vectordb_bench.backend.cases import CaseType
|
3
|
-
from vectordb_bench.frontend.
|
4
|
+
from vectordb_bench.frontend.config.dbCaseConfigs import *
|
5
|
+
from collections import defaultdict
|
6
|
+
|
7
|
+
from vectordb_bench.frontend.utils import addHorizontalLine
|
4
8
|
|
5
9
|
|
6
|
-
def caseSelector(st, activedDbList):
|
10
|
+
def caseSelector(st, activedDbList: list[DB]):
|
7
11
|
st.markdown(
|
8
12
|
"<div style='height: 24px;'></div>",
|
9
13
|
unsafe_allow_html=True,
|
@@ -14,41 +18,49 @@ def caseSelector(st, activedDbList):
|
|
14
18
|
unsafe_allow_html=True,
|
15
19
|
)
|
16
20
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
21
|
+
activedCaseList: list[CaseConfig] = []
|
22
|
+
dbToCaseClusterConfigs = defaultdict(lambda: defaultdict(dict))
|
23
|
+
dbToCaseConfigs = defaultdict(lambda: defaultdict(dict))
|
24
|
+
caseClusters = UI_CASE_CLUSTERS + [get_custom_case_cluter()]
|
25
|
+
for caseCluster in caseClusters:
|
26
|
+
activedCaseList += caseClusterExpander(
|
27
|
+
st, caseCluster, dbToCaseClusterConfigs, activedDbList)
|
28
|
+
for db in dbToCaseClusterConfigs:
|
29
|
+
for uiCaseItem in dbToCaseClusterConfigs[db]:
|
30
|
+
for case in uiCaseItem.cases:
|
31
|
+
dbToCaseConfigs[db][case] = dbToCaseClusterConfigs[db][uiCaseItem]
|
32
|
+
|
33
|
+
return activedCaseList, dbToCaseConfigs
|
34
|
+
|
35
|
+
|
36
|
+
def caseClusterExpander(st, caseCluster: UICaseItemCluster, dbToCaseClusterConfigs, activedDbList: list[DB]):
|
37
|
+
expander = st.expander(caseCluster.label, False)
|
38
|
+
activedCases: list[CaseConfig] = []
|
39
|
+
for uiCaseItem in caseCluster.uiCaseItems:
|
40
|
+
if uiCaseItem.isLine:
|
41
|
+
addHorizontalLine(expander)
|
25
42
|
else:
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
caseItemContainer, allCaseConfigs, case, activedDbList
|
30
|
-
)
|
31
|
-
activedCaseList = [case for case in CASE_LIST if caseIsActived[case]]
|
32
|
-
return activedCaseList, allCaseConfigs
|
43
|
+
activedCases += caseItemCheckbox(expander,
|
44
|
+
dbToCaseClusterConfigs, uiCaseItem, activedDbList)
|
45
|
+
return activedCases
|
33
46
|
|
34
47
|
|
35
|
-
def
|
36
|
-
selected = st.checkbox(
|
48
|
+
def caseItemCheckbox(st, dbToCaseClusterConfigs, uiCaseItem: UICaseItem, activedDbList: list[DB]):
|
49
|
+
selected = st.checkbox(uiCaseItem.label)
|
37
50
|
st.markdown(
|
38
|
-
f"<div style='color: #1D2939; margin: -8px 0 20px {CHECKBOX_INDENT}px; font-size: 14px;'>{
|
51
|
+
f"<div style='color: #1D2939; margin: -8px 0 20px {CHECKBOX_INDENT}px; font-size: 14px;'>{uiCaseItem.description}</div>",
|
39
52
|
unsafe_allow_html=True,
|
40
53
|
)
|
41
54
|
|
42
55
|
if selected:
|
43
|
-
caseConfigSettingContainer = st.container()
|
44
56
|
caseConfigSetting(
|
45
|
-
|
57
|
+
st.container(), dbToCaseClusterConfigs, uiCaseItem, activedDbList
|
46
58
|
)
|
47
59
|
|
48
|
-
return selected
|
60
|
+
return uiCaseItem.cases if selected else []
|
49
61
|
|
50
62
|
|
51
|
-
def caseConfigSetting(st,
|
63
|
+
def caseConfigSetting(st, dbToCaseClusterConfigs, uiCaseItem: UICaseItem, activedDbList: list[DB]):
|
52
64
|
for db in activedDbList:
|
53
65
|
columns = st.columns(1 + CASE_CONFIG_SETTING_COLUMNS)
|
54
66
|
# column 0 - title
|
@@ -57,12 +69,12 @@ def caseConfigSetting(st, allCaseConfigs, case, activedDbList):
|
|
57
69
|
f"<div style='margin: 0 0 24px {CHECKBOX_INDENT}px; font-size: 18px; font-weight: 600;'>{db.name}</div>",
|
58
70
|
unsafe_allow_html=True,
|
59
71
|
)
|
60
|
-
caseConfig = allCaseConfigs[db][case]
|
61
72
|
k = 0
|
62
|
-
|
73
|
+
caseConfig = dbToCaseClusterConfigs[db][uiCaseItem]
|
74
|
+
for config in CASE_CONFIG_MAP.get(db, {}).get(uiCaseItem.caseLabel, []):
|
63
75
|
if config.isDisplayed(caseConfig):
|
64
76
|
column = columns[1 + k % CASE_CONFIG_SETTING_COLUMNS]
|
65
|
-
key = "%s-%s-%s" % (db,
|
77
|
+
key = "%s-%s-%s" % (db, uiCaseItem.label, config.label.value)
|
66
78
|
if config.inputType == InputType.Text:
|
67
79
|
caseConfig[config.label] = column.text_input(
|
68
80
|
config.displayLabel if config.displayLabel else config.label.value,
|
@@ -1,13 +1,9 @@
|
|
1
1
|
from pydantic import ValidationError
|
2
|
-
from vectordb_bench.frontend.
|
2
|
+
from vectordb_bench.frontend.config.styles import *
|
3
3
|
from vectordb_bench.frontend.utils import inputIsPassword
|
4
4
|
|
5
5
|
|
6
6
|
def dbConfigSettings(st, activedDbList):
|
7
|
-
st.markdown(
|
8
|
-
"<style> .streamlit-expanderHeader p {font-size: 20px; font-weight: 600;}</style>",
|
9
|
-
unsafe_allow_html=True,
|
10
|
-
)
|
11
7
|
expander = st.expander("Configurations for the selected databases", True)
|
12
8
|
|
13
9
|
dbConfigs = {}
|
@@ -1,5 +1,6 @@
|
|
1
|
-
from
|
2
|
-
from vectordb_bench.frontend.
|
1
|
+
from streamlit.runtime.media_file_storage import MediaFileStorageError
|
2
|
+
from vectordb_bench.frontend.config.styles import DB_SELECTOR_COLUMNS, DB_TO_ICON
|
3
|
+
from vectordb_bench.frontend.config.dbCaseConfigs import DB_LIST
|
3
4
|
|
4
5
|
|
5
6
|
def dbSelector(st):
|
@@ -16,21 +17,14 @@ def dbSelector(st):
|
|
16
17
|
dbContainerColumns = st.columns(DB_SELECTOR_COLUMNS, gap="small")
|
17
18
|
dbIsActived = {db: False for db in DB_LIST}
|
18
19
|
|
19
|
-
# style - image; column gap; checkbox font;
|
20
|
-
st.markdown(
|
21
|
-
"""
|
22
|
-
<style>
|
23
|
-
div[data-testid='stImage'] {margin: auto;}
|
24
|
-
div[data-testid='stHorizontalBlock'] {gap: 8px;}
|
25
|
-
.stCheckbox p { color: #000; font-size: 18px; font-weight: 600; }
|
26
|
-
</style>
|
27
|
-
""",
|
28
|
-
unsafe_allow_html=True,
|
29
|
-
)
|
30
20
|
for i, db in enumerate(DB_LIST):
|
31
21
|
column = dbContainerColumns[i % DB_SELECTOR_COLUMNS]
|
32
22
|
dbIsActived[db] = column.checkbox(db.name)
|
33
|
-
|
23
|
+
try:
|
24
|
+
column.image(DB_TO_ICON.get(db, ""))
|
25
|
+
except MediaFileStorageError as e:
|
26
|
+
column.warning(f"{db.name} image not available")
|
27
|
+
pass
|
34
28
|
activedDbList = [db for db in DB_LIST if dbIsActived[db]]
|
35
29
|
|
36
30
|
return activedDbList
|
@@ -1,17 +1,15 @@
|
|
1
|
+
from vectordb_bench.backend.clients import DB
|
1
2
|
from vectordb_bench.models import CaseConfig, CaseConfigParamType, TaskConfig
|
2
3
|
|
3
4
|
|
4
|
-
def generate_tasks(activedDbList, dbConfigs, activedCaseList, allCaseConfigs):
|
5
|
+
def generate_tasks(activedDbList: list[DB], dbConfigs, activedCaseList: list[CaseConfig], allCaseConfigs):
|
5
6
|
tasks = []
|
6
7
|
for db in activedDbList:
|
7
8
|
for case in activedCaseList:
|
8
9
|
task = TaskConfig(
|
9
10
|
db=db.value,
|
10
11
|
db_config=dbConfigs[db],
|
11
|
-
case_config=
|
12
|
-
case_id=case.value,
|
13
|
-
custom_case={},
|
14
|
-
),
|
12
|
+
case_config=case,
|
15
13
|
db_case_config=db.case_config_cls(
|
16
14
|
allCaseConfigs[db][case].get(CaseConfigParamType.IndexType, None)
|
17
15
|
)(**{key.value: value for key, value in allCaseConfigs[db][case].items()}),
|
@@ -0,0 +1,14 @@
|
|
1
|
+
def initStyle(st):
|
2
|
+
st.markdown(
|
3
|
+
"""<style>
|
4
|
+
/* expander - header */
|
5
|
+
.main div[data-testid='stExpander'] p {font-size: 18px; font-weight: 600;}
|
6
|
+
/* db icon */
|
7
|
+
div[data-testid='stImage'] {margin: auto;}
|
8
|
+
/* db column gap */
|
9
|
+
div[data-testid='stHorizontalBlock'] {gap: 8px;}
|
10
|
+
/* check box */
|
11
|
+
.stCheckbox p { color: #000; font-size: 18px; font-weight: 600; }
|
12
|
+
</style>""",
|
13
|
+
unsafe_allow_html=True,
|
14
|
+
)
|
@@ -1,5 +1,5 @@
|
|
1
1
|
from datetime import datetime
|
2
|
-
from vectordb_bench.frontend.
|
2
|
+
from vectordb_bench.frontend.config.styles import *
|
3
3
|
from vectordb_bench.interface import benchMarkRunner
|
4
4
|
|
5
5
|
|
@@ -37,22 +37,30 @@ def taskLabelInput(st):
|
|
37
37
|
def advancedSettings(st):
|
38
38
|
container = st.columns([1, 2])
|
39
39
|
index_already_exists = container[0].checkbox("Index already exists", value=False)
|
40
|
-
container[1].caption("if
|
40
|
+
container[1].caption("if selected, inserting and building will be skipped.")
|
41
41
|
|
42
42
|
container = st.columns([1, 2])
|
43
43
|
use_aliyun = container[0].checkbox("Dataset from Aliyun (Shanghai)", value=False)
|
44
44
|
container[1].caption(
|
45
|
-
"if
|
45
|
+
"if selected, the dataset will be downloaded from Aliyun OSS shanghai, default AWS S3 aws-us-west."
|
46
46
|
)
|
47
47
|
|
48
|
-
|
48
|
+
container = st.columns([1, 2])
|
49
|
+
k = container[0].number_input("k",min_value=1, value=100, label_visibility="collapsed")
|
50
|
+
container[1].caption(
|
51
|
+
"K value for number of nearest neighbors to search"
|
52
|
+
)
|
53
|
+
|
54
|
+
return index_already_exists, use_aliyun, k
|
49
55
|
|
50
56
|
|
51
57
|
def controlPanel(st, tasks, taskLabel, isAllValid):
|
52
|
-
index_already_exists, use_aliyun = advancedSettings(st)
|
58
|
+
index_already_exists, use_aliyun, k = advancedSettings(st)
|
53
59
|
|
54
60
|
def runHandler():
|
55
61
|
benchMarkRunner.set_drop_old(not index_already_exists)
|
62
|
+
for task in tasks:
|
63
|
+
task.case_config.k = k
|
56
64
|
benchMarkRunner.set_download_address(use_aliyun)
|
57
65
|
benchMarkRunner.run(tasks, taskLabel)
|
58
66
|
|
@@ -0,0 +1,44 @@
|
|
1
|
+
from dataclasses import asdict
|
2
|
+
from vectordb_bench.backend.cases import CaseType
|
3
|
+
from vectordb_bench.interface import benchMarkRunner
|
4
|
+
from vectordb_bench.models import CaseResult, ResultLabel
|
5
|
+
import pandas as pd
|
6
|
+
|
7
|
+
|
8
|
+
def getNewResults():
|
9
|
+
allResults = benchMarkRunner.get_results()
|
10
|
+
newResults: list[CaseResult] = []
|
11
|
+
|
12
|
+
for res in allResults:
|
13
|
+
results = res.results
|
14
|
+
for result in results:
|
15
|
+
if result.label == ResultLabel.NORMAL:
|
16
|
+
newResults.append(result)
|
17
|
+
|
18
|
+
|
19
|
+
df = pd.DataFrame(formatData(newResults))
|
20
|
+
return df
|
21
|
+
|
22
|
+
|
23
|
+
def formatData(caseResults: list[CaseResult]):
|
24
|
+
data = []
|
25
|
+
for caseResult in caseResults:
|
26
|
+
db = caseResult.task_config.db.value
|
27
|
+
db_label = caseResult.task_config.db_config.db_label
|
28
|
+
case_config = caseResult.task_config.case_config
|
29
|
+
db_case_config = caseResult.task_config.db_case_config
|
30
|
+
case = case_config.case_id.case_cls()
|
31
|
+
filter_rate = case.filter_rate
|
32
|
+
dataset = case.dataset.data.name
|
33
|
+
metrics = asdict(caseResult.metrics)
|
34
|
+
data.append(
|
35
|
+
{
|
36
|
+
"db": db,
|
37
|
+
"db_label": db_label,
|
38
|
+
"case_name": case.name,
|
39
|
+
"dataset": dataset,
|
40
|
+
"filter_rate": filter_rate,
|
41
|
+
**metrics,
|
42
|
+
}
|
43
|
+
)
|
44
|
+
return data
|