vectordb-bench 0.0.10__py3-none-any.whl → 0.0.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. vectordb_bench/__init__.py +19 -5
  2. vectordb_bench/backend/assembler.py +1 -1
  3. vectordb_bench/backend/cases.py +93 -27
  4. vectordb_bench/backend/clients/__init__.py +14 -0
  5. vectordb_bench/backend/clients/api.py +1 -1
  6. vectordb_bench/backend/clients/aws_opensearch/aws_opensearch.py +159 -0
  7. vectordb_bench/backend/clients/aws_opensearch/cli.py +44 -0
  8. vectordb_bench/backend/clients/aws_opensearch/config.py +58 -0
  9. vectordb_bench/backend/clients/aws_opensearch/run.py +125 -0
  10. vectordb_bench/backend/clients/milvus/cli.py +291 -0
  11. vectordb_bench/backend/clients/milvus/milvus.py +13 -6
  12. vectordb_bench/backend/clients/pgvector/cli.py +116 -0
  13. vectordb_bench/backend/clients/pgvector/config.py +1 -1
  14. vectordb_bench/backend/clients/pgvector/pgvector.py +7 -4
  15. vectordb_bench/backend/clients/redis/cli.py +74 -0
  16. vectordb_bench/backend/clients/test/cli.py +25 -0
  17. vectordb_bench/backend/clients/test/config.py +18 -0
  18. vectordb_bench/backend/clients/test/test.py +62 -0
  19. vectordb_bench/backend/clients/weaviate_cloud/cli.py +41 -0
  20. vectordb_bench/backend/clients/zilliz_cloud/cli.py +55 -0
  21. vectordb_bench/backend/dataset.py +27 -5
  22. vectordb_bench/backend/runner/mp_runner.py +14 -3
  23. vectordb_bench/backend/runner/serial_runner.py +7 -3
  24. vectordb_bench/backend/task_runner.py +76 -26
  25. vectordb_bench/cli/__init__.py +0 -0
  26. vectordb_bench/cli/cli.py +362 -0
  27. vectordb_bench/cli/vectordbbench.py +22 -0
  28. vectordb_bench/config-files/sample_config.yml +17 -0
  29. vectordb_bench/custom/custom_case.json +18 -0
  30. vectordb_bench/frontend/components/check_results/charts.py +6 -6
  31. vectordb_bench/frontend/components/check_results/data.py +23 -20
  32. vectordb_bench/frontend/components/check_results/expanderStyle.py +1 -1
  33. vectordb_bench/frontend/components/check_results/filters.py +20 -13
  34. vectordb_bench/frontend/components/check_results/headerIcon.py +1 -1
  35. vectordb_bench/frontend/components/check_results/priceTable.py +1 -1
  36. vectordb_bench/frontend/components/check_results/stPageConfig.py +1 -1
  37. vectordb_bench/frontend/components/concurrent/charts.py +79 -0
  38. vectordb_bench/frontend/components/custom/displayCustomCase.py +31 -0
  39. vectordb_bench/frontend/components/custom/displaypPrams.py +11 -0
  40. vectordb_bench/frontend/components/custom/getCustomConfig.py +40 -0
  41. vectordb_bench/frontend/components/custom/initStyle.py +15 -0
  42. vectordb_bench/frontend/components/run_test/autoRefresh.py +1 -1
  43. vectordb_bench/frontend/components/run_test/caseSelector.py +40 -28
  44. vectordb_bench/frontend/components/run_test/dbConfigSetting.py +1 -5
  45. vectordb_bench/frontend/components/run_test/dbSelector.py +8 -14
  46. vectordb_bench/frontend/components/run_test/generateTasks.py +3 -5
  47. vectordb_bench/frontend/components/run_test/initStyle.py +14 -0
  48. vectordb_bench/frontend/components/run_test/submitTask.py +13 -5
  49. vectordb_bench/frontend/components/tables/data.py +44 -0
  50. vectordb_bench/frontend/{const → config}/dbCaseConfigs.py +140 -32
  51. vectordb_bench/frontend/{const → config}/styles.py +2 -0
  52. vectordb_bench/frontend/pages/concurrent.py +65 -0
  53. vectordb_bench/frontend/pages/custom.py +64 -0
  54. vectordb_bench/frontend/pages/quries_per_dollar.py +5 -5
  55. vectordb_bench/frontend/pages/run_test.py +4 -0
  56. vectordb_bench/frontend/pages/tables.py +24 -0
  57. vectordb_bench/frontend/utils.py +17 -1
  58. vectordb_bench/frontend/vdb_benchmark.py +3 -3
  59. vectordb_bench/interface.py +21 -25
  60. vectordb_bench/metric.py +23 -1
  61. vectordb_bench/models.py +45 -1
  62. vectordb_bench/results/getLeaderboardData.py +1 -1
  63. {vectordb_bench-0.0.10.dist-info → vectordb_bench-0.0.12.dist-info}/METADATA +228 -14
  64. vectordb_bench-0.0.12.dist-info/RECORD +115 -0
  65. {vectordb_bench-0.0.10.dist-info → vectordb_bench-0.0.12.dist-info}/WHEEL +1 -1
  66. {vectordb_bench-0.0.10.dist-info → vectordb_bench-0.0.12.dist-info}/entry_points.txt +1 -0
  67. vectordb_bench-0.0.10.dist-info/RECORD +0 -88
  68. /vectordb_bench/frontend/{const → config}/dbPrices.py +0 -0
  69. {vectordb_bench-0.0.10.dist-info → vectordb_bench-0.0.12.dist-info}/LICENSE +0 -0
  70. {vectordb_bench-0.0.10.dist-info → vectordb_bench-0.0.12.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,79 @@
1
+ from vectordb_bench.frontend.components.check_results.expanderStyle import (
2
+ initMainExpanderStyle,
3
+ )
4
+ import plotly.express as px
5
+
6
+ from vectordb_bench.frontend.config.styles import COLOR_MAP
7
+
8
+
9
+ def drawChartsByCase(allData, showCaseNames: list[str], st):
10
+ initMainExpanderStyle(st)
11
+ for caseName in showCaseNames:
12
+ chartContainer = st.expander(caseName, True)
13
+ caseDataList = [data for data in allData if data["case_name"] == caseName]
14
+ data = [
15
+ {
16
+ "conc_num": caseData["conc_num_list"][i],
17
+ "qps": caseData["conc_qps_list"][i],
18
+ "latency_p99": caseData["conc_latency_p99_list"][i] * 1000,
19
+ "db_name": caseData["db_name"],
20
+ "db": caseData["db"],
21
+ }
22
+ for caseData in caseDataList
23
+ for i in range(len(caseData["conc_num_list"]))
24
+ ]
25
+ drawChart(data, chartContainer)
26
+
27
+
28
+ def getRange(metric, data, padding_multipliers):
29
+ minV = min([d.get(metric, 0) for d in data])
30
+ maxV = max([d.get(metric, 0) for d in data])
31
+ padding = maxV - minV
32
+ rangeV = [
33
+ minV - padding * padding_multipliers[0],
34
+ maxV + padding * padding_multipliers[1],
35
+ ]
36
+ return rangeV
37
+
38
+
39
+ def drawChart(data, st):
40
+ if len(data) == 0:
41
+ return
42
+
43
+ x = "latency_p99"
44
+ xrange = getRange(x, data, [0.05, 0.1])
45
+
46
+ y = "qps"
47
+ yrange = getRange(y, data, [0.2, 0.1])
48
+
49
+ color = "db"
50
+ color_discrete_map = COLOR_MAP
51
+ color = "db_name"
52
+ color_discrete_map = None
53
+ line_group = "db_name"
54
+ text = "conc_num"
55
+
56
+ data.sort(key=lambda a: a["conc_num"])
57
+
58
+ fig = px.line(
59
+ data,
60
+ x=x,
61
+ y=y,
62
+ color=color,
63
+ color_discrete_map=color_discrete_map,
64
+ line_group=line_group,
65
+ text=text,
66
+ markers=True,
67
+ hover_data={
68
+ "conc_num": True,
69
+ },
70
+ height=720,
71
+ )
72
+ fig.update_xaxes(range=xrange, title_text="Latency P99 (ms)")
73
+ fig.update_yaxes(range=yrange, title_text="QPS")
74
+ fig.update_traces(textposition="bottom right", texttemplate="conc-%{text:,.4~r}")
75
+
76
+ st.plotly_chart(
77
+ fig,
78
+ use_container_width=True,
79
+ )
@@ -0,0 +1,31 @@
1
+
2
+ from vectordb_bench.frontend.components.custom.getCustomConfig import CustomCaseConfig
3
+
4
+
5
+ def displayCustomCase(customCase: CustomCaseConfig, st, key):
6
+
7
+ columns = st.columns([1, 2])
8
+ customCase.dataset_config.name = columns[0].text_input(
9
+ "Name", key=f"{key}_name", value=customCase.dataset_config.name)
10
+ customCase.name = f"{customCase.dataset_config.name} (Performace Case)"
11
+ customCase.dataset_config.dir = columns[1].text_input(
12
+ "Folder Path", key=f"{key}_dir", value=customCase.dataset_config.dir)
13
+
14
+ columns = st.columns(4)
15
+ customCase.dataset_config.dim = columns[0].number_input(
16
+ "dim", key=f"{key}_dim", value=customCase.dataset_config.dim)
17
+ customCase.dataset_config.size = columns[1].number_input(
18
+ "size", key=f"{key}_size", value=customCase.dataset_config.size)
19
+ customCase.dataset_config.metric_type = columns[2].selectbox(
20
+ "metric type", key=f"{key}_metric_type", options=["L2", "Cosine", "IP"])
21
+ customCase.dataset_config.file_count = columns[3].number_input(
22
+ "train file count", key=f"{key}_file_count", value=customCase.dataset_config.file_count)
23
+
24
+ columns = st.columns(4)
25
+ customCase.dataset_config.use_shuffled = columns[0].checkbox(
26
+ "use shuffled data", key=f"{key}_use_shuffled", value=customCase.dataset_config.use_shuffled)
27
+ customCase.dataset_config.with_gt = columns[1].checkbox(
28
+ "with groundtruth", key=f"{key}_with_gt", value=customCase.dataset_config.with_gt)
29
+
30
+ customCase.description = st.text_area(
31
+ "description", key=f"{key}_description", value=customCase.description)
@@ -0,0 +1,11 @@
1
+ def displayParams(st):
2
+ st.markdown("""
3
+ - `Folder Path` - The path to the folder containing all the files. Please ensure that all files in the folder are in the `Parquet` format.
4
+ - Vectors data files: The file must be named `train.parquet` and should have two columns: `id` as an incrementing `int` and `emb` as an array of `float32`.
5
+ - Query test vectors: The file must be named `test.parquet` and should have two columns: `id` as an incrementing `int` and `emb` as an array of `float32`.
6
+ - Ground truth file: The file must be named `neighbors.parquet` and should have two columns: `id` corresponding to query vectors and `neighbors_id` as an array of `int`.
7
+
8
+ - `Train File Count` - If the vector file is too large, you can consider splitting it into multiple files. The naming format for the split files should be `train-[index]-of-[file_count].parquet`. For example, `train-01-of-10.parquet` represents the second file (0-indexed) among 10 split files.
9
+
10
+ - `Use Shuffled Data` - If you check this option, the vector data files need to be modified. VectorDBBench will load the data labeled with `shuffle`. For example, use `shuffle_train.parquet` instead of `train.parquet` and `shuffle_train-04-of-10.parquet` instead of `train-04-of-10.parquet`. The `id` column in the shuffled data can be in any order.
11
+ """)
@@ -0,0 +1,40 @@
1
+ import json
2
+
3
+ from pydantic import BaseModel
4
+
5
+ from vectordb_bench import config
6
+
7
+
8
+ class CustomDatasetConfig(BaseModel):
9
+ name: str = "custom_dataset"
10
+ dir: str = ""
11
+ size: int = 0
12
+ dim: int = 0
13
+ metric_type: str = "L2"
14
+ file_count: int = 1
15
+ use_shuffled: bool = False
16
+ with_gt: bool = True
17
+
18
+
19
+ class CustomCaseConfig(BaseModel):
20
+ name: str = "custom_dataset (Performace Case)"
21
+ description: str = ""
22
+ load_timeout: int = 36000
23
+ optimize_timeout: int = 36000
24
+ dataset_config: CustomDatasetConfig = CustomDatasetConfig()
25
+
26
+
27
+ def get_custom_configs():
28
+ with open(config.CUSTOM_CONFIG_DIR, "r") as f:
29
+ custom_configs = json.load(f)
30
+ return [CustomCaseConfig(**custom_config) for custom_config in custom_configs]
31
+
32
+
33
+ def save_custom_configs(custom_configs: list[CustomDatasetConfig]):
34
+ with open(config.CUSTOM_CONFIG_DIR, "w") as f:
35
+ json.dump([custom_config.dict()
36
+ for custom_config in custom_configs], f, indent=4)
37
+
38
+
39
+ def generate_custom_case():
40
+ return CustomCaseConfig()
@@ -0,0 +1,15 @@
1
+ def initStyle(st):
2
+ st.markdown(
3
+ """<style>
4
+ /* expander - header */
5
+ .main div[data-testid='stExpander'] summary p {font-size: 20px; font-weight: 600;}
6
+ /*
7
+ button {
8
+ height: auto;
9
+ padding-left: 8px !important;
10
+ padding-right: 6px !important;
11
+ }
12
+ */
13
+ </style>""",
14
+ unsafe_allow_html=True,
15
+ )
@@ -1,5 +1,5 @@
1
1
  from streamlit_autorefresh import st_autorefresh
2
- from vectordb_bench.frontend.const.styles import *
2
+ from vectordb_bench.frontend.config.styles import *
3
3
 
4
4
 
5
5
  def autoRefresh():
@@ -1,9 +1,13 @@
1
- from vectordb_bench.frontend.const.styles import *
1
+
2
+ from vectordb_bench.frontend.config.styles import *
2
3
  from vectordb_bench.backend.cases import CaseType
3
- from vectordb_bench.frontend.const.dbCaseConfigs import *
4
+ from vectordb_bench.frontend.config.dbCaseConfigs import *
5
+ from collections import defaultdict
6
+
7
+ from vectordb_bench.frontend.utils import addHorizontalLine
4
8
 
5
9
 
6
- def caseSelector(st, activedDbList):
10
+ def caseSelector(st, activedDbList: list[DB]):
7
11
  st.markdown(
8
12
  "<div style='height: 24px;'></div>",
9
13
  unsafe_allow_html=True,
@@ -14,41 +18,49 @@ def caseSelector(st, activedDbList):
14
18
  unsafe_allow_html=True,
15
19
  )
16
20
 
17
- caseIsActived = {case: False for case in CASE_LIST}
18
- allCaseConfigs = {db: {case: {} for case in CASE_LIST} for db in DB_LIST}
19
- for caseOrDivider in CASE_LIST_WITH_DIVIDER:
20
- if caseOrDivider == DIVIDER:
21
- caseItemContainer.markdown(
22
- "<div style='border: 1px solid #cccccc60; margin-bottom: 24px;'></div>",
23
- unsafe_allow_html=True,
24
- )
21
+ activedCaseList: list[CaseConfig] = []
22
+ dbToCaseClusterConfigs = defaultdict(lambda: defaultdict(dict))
23
+ dbToCaseConfigs = defaultdict(lambda: defaultdict(dict))
24
+ caseClusters = UI_CASE_CLUSTERS + [get_custom_case_cluter()]
25
+ for caseCluster in caseClusters:
26
+ activedCaseList += caseClusterExpander(
27
+ st, caseCluster, dbToCaseClusterConfigs, activedDbList)
28
+ for db in dbToCaseClusterConfigs:
29
+ for uiCaseItem in dbToCaseClusterConfigs[db]:
30
+ for case in uiCaseItem.cases:
31
+ dbToCaseConfigs[db][case] = dbToCaseClusterConfigs[db][uiCaseItem]
32
+
33
+ return activedCaseList, dbToCaseConfigs
34
+
35
+
36
+ def caseClusterExpander(st, caseCluster: UICaseItemCluster, dbToCaseClusterConfigs, activedDbList: list[DB]):
37
+ expander = st.expander(caseCluster.label, False)
38
+ activedCases: list[CaseConfig] = []
39
+ for uiCaseItem in caseCluster.uiCaseItems:
40
+ if uiCaseItem.isLine:
41
+ addHorizontalLine(expander)
25
42
  else:
26
- case = caseOrDivider
27
- caseItemContainer = st.container()
28
- caseIsActived[case] = caseItem(
29
- caseItemContainer, allCaseConfigs, case, activedDbList
30
- )
31
- activedCaseList = [case for case in CASE_LIST if caseIsActived[case]]
32
- return activedCaseList, allCaseConfigs
43
+ activedCases += caseItemCheckbox(expander,
44
+ dbToCaseClusterConfigs, uiCaseItem, activedDbList)
45
+ return activedCases
33
46
 
34
47
 
35
- def caseItem(st, allCaseConfigs, case: CaseType, activedDbList):
36
- selected = st.checkbox(case.case_name)
48
+ def caseItemCheckbox(st, dbToCaseClusterConfigs, uiCaseItem: UICaseItem, activedDbList: list[DB]):
49
+ selected = st.checkbox(uiCaseItem.label)
37
50
  st.markdown(
38
- f"<div style='color: #1D2939; margin: -8px 0 20px {CHECKBOX_INDENT}px; font-size: 14px;'>{case.case_description}</div>",
51
+ f"<div style='color: #1D2939; margin: -8px 0 20px {CHECKBOX_INDENT}px; font-size: 14px;'>{uiCaseItem.description}</div>",
39
52
  unsafe_allow_html=True,
40
53
  )
41
54
 
42
55
  if selected:
43
- caseConfigSettingContainer = st.container()
44
56
  caseConfigSetting(
45
- caseConfigSettingContainer, allCaseConfigs, case, activedDbList
57
+ st.container(), dbToCaseClusterConfigs, uiCaseItem, activedDbList
46
58
  )
47
59
 
48
- return selected
60
+ return uiCaseItem.cases if selected else []
49
61
 
50
62
 
51
- def caseConfigSetting(st, allCaseConfigs, case, activedDbList):
63
+ def caseConfigSetting(st, dbToCaseClusterConfigs, uiCaseItem: UICaseItem, activedDbList: list[DB]):
52
64
  for db in activedDbList:
53
65
  columns = st.columns(1 + CASE_CONFIG_SETTING_COLUMNS)
54
66
  # column 0 - title
@@ -57,12 +69,12 @@ def caseConfigSetting(st, allCaseConfigs, case, activedDbList):
57
69
  f"<div style='margin: 0 0 24px {CHECKBOX_INDENT}px; font-size: 18px; font-weight: 600;'>{db.name}</div>",
58
70
  unsafe_allow_html=True,
59
71
  )
60
- caseConfig = allCaseConfigs[db][case]
61
72
  k = 0
62
- for config in CASE_CONFIG_MAP.get(db, {}).get(case.case_cls().label, []):
73
+ caseConfig = dbToCaseClusterConfigs[db][uiCaseItem]
74
+ for config in CASE_CONFIG_MAP.get(db, {}).get(uiCaseItem.caseLabel, []):
63
75
  if config.isDisplayed(caseConfig):
64
76
  column = columns[1 + k % CASE_CONFIG_SETTING_COLUMNS]
65
- key = "%s-%s-%s" % (db, case, config.label.value)
77
+ key = "%s-%s-%s" % (db, uiCaseItem.label, config.label.value)
66
78
  if config.inputType == InputType.Text:
67
79
  caseConfig[config.label] = column.text_input(
68
80
  config.displayLabel if config.displayLabel else config.label.value,
@@ -1,13 +1,9 @@
1
1
  from pydantic import ValidationError
2
- from vectordb_bench.frontend.const.styles import *
2
+ from vectordb_bench.frontend.config.styles import *
3
3
  from vectordb_bench.frontend.utils import inputIsPassword
4
4
 
5
5
 
6
6
  def dbConfigSettings(st, activedDbList):
7
- st.markdown(
8
- "<style> .streamlit-expanderHeader p {font-size: 20px; font-weight: 600;}</style>",
9
- unsafe_allow_html=True,
10
- )
11
7
  expander = st.expander("Configurations for the selected databases", True)
12
8
 
13
9
  dbConfigs = {}
@@ -1,5 +1,6 @@
1
- from vectordb_bench.frontend.const.styles import *
2
- from vectordb_bench.frontend.const.dbCaseConfigs import DB_LIST
1
+ from streamlit.runtime.media_file_storage import MediaFileStorageError
2
+ from vectordb_bench.frontend.config.styles import DB_SELECTOR_COLUMNS, DB_TO_ICON
3
+ from vectordb_bench.frontend.config.dbCaseConfigs import DB_LIST
3
4
 
4
5
 
5
6
  def dbSelector(st):
@@ -16,21 +17,14 @@ def dbSelector(st):
16
17
  dbContainerColumns = st.columns(DB_SELECTOR_COLUMNS, gap="small")
17
18
  dbIsActived = {db: False for db in DB_LIST}
18
19
 
19
- # style - image; column gap; checkbox font;
20
- st.markdown(
21
- """
22
- <style>
23
- div[data-testid='stImage'] {margin: auto;}
24
- div[data-testid='stHorizontalBlock'] {gap: 8px;}
25
- .stCheckbox p { color: #000; font-size: 18px; font-weight: 600; }
26
- </style>
27
- """,
28
- unsafe_allow_html=True,
29
- )
30
20
  for i, db in enumerate(DB_LIST):
31
21
  column = dbContainerColumns[i % DB_SELECTOR_COLUMNS]
32
22
  dbIsActived[db] = column.checkbox(db.name)
33
- column.image(DB_TO_ICON.get(db, ""))
23
+ try:
24
+ column.image(DB_TO_ICON.get(db, ""))
25
+ except MediaFileStorageError as e:
26
+ column.warning(f"{db.name} image not available")
27
+ pass
34
28
  activedDbList = [db for db in DB_LIST if dbIsActived[db]]
35
29
 
36
30
  return activedDbList
@@ -1,17 +1,15 @@
1
+ from vectordb_bench.backend.clients import DB
1
2
  from vectordb_bench.models import CaseConfig, CaseConfigParamType, TaskConfig
2
3
 
3
4
 
4
- def generate_tasks(activedDbList, dbConfigs, activedCaseList, allCaseConfigs):
5
+ def generate_tasks(activedDbList: list[DB], dbConfigs, activedCaseList: list[CaseConfig], allCaseConfigs):
5
6
  tasks = []
6
7
  for db in activedDbList:
7
8
  for case in activedCaseList:
8
9
  task = TaskConfig(
9
10
  db=db.value,
10
11
  db_config=dbConfigs[db],
11
- case_config=CaseConfig(
12
- case_id=case.value,
13
- custom_case={},
14
- ),
12
+ case_config=case,
15
13
  db_case_config=db.case_config_cls(
16
14
  allCaseConfigs[db][case].get(CaseConfigParamType.IndexType, None)
17
15
  )(**{key.value: value for key, value in allCaseConfigs[db][case].items()}),
@@ -0,0 +1,14 @@
1
+ def initStyle(st):
2
+ st.markdown(
3
+ """<style>
4
+ /* expander - header */
5
+ .main div[data-testid='stExpander'] p {font-size: 18px; font-weight: 600;}
6
+ /* db icon */
7
+ div[data-testid='stImage'] {margin: auto;}
8
+ /* db column gap */
9
+ div[data-testid='stHorizontalBlock'] {gap: 8px;}
10
+ /* check box */
11
+ .stCheckbox p { color: #000; font-size: 18px; font-weight: 600; }
12
+ </style>""",
13
+ unsafe_allow_html=True,
14
+ )
@@ -1,5 +1,5 @@
1
1
  from datetime import datetime
2
- from vectordb_bench.frontend.const.styles import *
2
+ from vectordb_bench.frontend.config.styles import *
3
3
  from vectordb_bench.interface import benchMarkRunner
4
4
 
5
5
 
@@ -37,22 +37,30 @@ def taskLabelInput(st):
37
37
  def advancedSettings(st):
38
38
  container = st.columns([1, 2])
39
39
  index_already_exists = container[0].checkbox("Index already exists", value=False)
40
- container[1].caption("if actived, inserting and building will be skipped.")
40
+ container[1].caption("if selected, inserting and building will be skipped.")
41
41
 
42
42
  container = st.columns([1, 2])
43
43
  use_aliyun = container[0].checkbox("Dataset from Aliyun (Shanghai)", value=False)
44
44
  container[1].caption(
45
- "if actived, the dataset will be downloaded from Aliyun OSS shanghai, default AWS S3 aws-us-west."
45
+ "if selected, the dataset will be downloaded from Aliyun OSS shanghai, default AWS S3 aws-us-west."
46
46
  )
47
47
 
48
- return index_already_exists, use_aliyun
48
+ container = st.columns([1, 2])
49
+ k = container[0].number_input("k",min_value=1, value=100, label_visibility="collapsed")
50
+ container[1].caption(
51
+ "K value for number of nearest neighbors to search"
52
+ )
53
+
54
+ return index_already_exists, use_aliyun, k
49
55
 
50
56
 
51
57
  def controlPanel(st, tasks, taskLabel, isAllValid):
52
- index_already_exists, use_aliyun = advancedSettings(st)
58
+ index_already_exists, use_aliyun, k = advancedSettings(st)
53
59
 
54
60
  def runHandler():
55
61
  benchMarkRunner.set_drop_old(not index_already_exists)
62
+ for task in tasks:
63
+ task.case_config.k = k
56
64
  benchMarkRunner.set_download_address(use_aliyun)
57
65
  benchMarkRunner.run(tasks, taskLabel)
58
66
 
@@ -0,0 +1,44 @@
1
+ from dataclasses import asdict
2
+ from vectordb_bench.backend.cases import CaseType
3
+ from vectordb_bench.interface import benchMarkRunner
4
+ from vectordb_bench.models import CaseResult, ResultLabel
5
+ import pandas as pd
6
+
7
+
8
+ def getNewResults():
9
+ allResults = benchMarkRunner.get_results()
10
+ newResults: list[CaseResult] = []
11
+
12
+ for res in allResults:
13
+ results = res.results
14
+ for result in results:
15
+ if result.label == ResultLabel.NORMAL:
16
+ newResults.append(result)
17
+
18
+
19
+ df = pd.DataFrame(formatData(newResults))
20
+ return df
21
+
22
+
23
+ def formatData(caseResults: list[CaseResult]):
24
+ data = []
25
+ for caseResult in caseResults:
26
+ db = caseResult.task_config.db.value
27
+ db_label = caseResult.task_config.db_config.db_label
28
+ case_config = caseResult.task_config.case_config
29
+ db_case_config = caseResult.task_config.db_case_config
30
+ case = case_config.case_id.case_cls()
31
+ filter_rate = case.filter_rate
32
+ dataset = case.dataset.data.name
33
+ metrics = asdict(caseResult.metrics)
34
+ data.append(
35
+ {
36
+ "db": db,
37
+ "db_label": db_label,
38
+ "case_name": case.name,
39
+ "dataset": dataset,
40
+ "filter_rate": filter_rate,
41
+ **metrics,
42
+ }
43
+ )
44
+ return data