vectordb-bench 0.0.10__py3-none-any.whl → 0.0.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. vectordb_bench/__init__.py +19 -5
  2. vectordb_bench/backend/assembler.py +1 -1
  3. vectordb_bench/backend/cases.py +93 -27
  4. vectordb_bench/backend/clients/__init__.py +14 -0
  5. vectordb_bench/backend/clients/api.py +1 -1
  6. vectordb_bench/backend/clients/aws_opensearch/aws_opensearch.py +159 -0
  7. vectordb_bench/backend/clients/aws_opensearch/cli.py +44 -0
  8. vectordb_bench/backend/clients/aws_opensearch/config.py +58 -0
  9. vectordb_bench/backend/clients/aws_opensearch/run.py +125 -0
  10. vectordb_bench/backend/clients/milvus/cli.py +291 -0
  11. vectordb_bench/backend/clients/milvus/milvus.py +13 -6
  12. vectordb_bench/backend/clients/pgvector/cli.py +116 -0
  13. vectordb_bench/backend/clients/pgvector/config.py +1 -1
  14. vectordb_bench/backend/clients/pgvector/pgvector.py +7 -4
  15. vectordb_bench/backend/clients/redis/cli.py +74 -0
  16. vectordb_bench/backend/clients/test/cli.py +25 -0
  17. vectordb_bench/backend/clients/test/config.py +18 -0
  18. vectordb_bench/backend/clients/test/test.py +62 -0
  19. vectordb_bench/backend/clients/weaviate_cloud/cli.py +41 -0
  20. vectordb_bench/backend/clients/zilliz_cloud/cli.py +55 -0
  21. vectordb_bench/backend/dataset.py +27 -5
  22. vectordb_bench/backend/runner/mp_runner.py +14 -3
  23. vectordb_bench/backend/runner/serial_runner.py +7 -3
  24. vectordb_bench/backend/task_runner.py +76 -26
  25. vectordb_bench/cli/__init__.py +0 -0
  26. vectordb_bench/cli/cli.py +362 -0
  27. vectordb_bench/cli/vectordbbench.py +22 -0
  28. vectordb_bench/config-files/sample_config.yml +17 -0
  29. vectordb_bench/custom/custom_case.json +18 -0
  30. vectordb_bench/frontend/components/check_results/charts.py +6 -6
  31. vectordb_bench/frontend/components/check_results/data.py +23 -20
  32. vectordb_bench/frontend/components/check_results/expanderStyle.py +1 -1
  33. vectordb_bench/frontend/components/check_results/filters.py +20 -13
  34. vectordb_bench/frontend/components/check_results/headerIcon.py +1 -1
  35. vectordb_bench/frontend/components/check_results/priceTable.py +1 -1
  36. vectordb_bench/frontend/components/check_results/stPageConfig.py +1 -1
  37. vectordb_bench/frontend/components/concurrent/charts.py +79 -0
  38. vectordb_bench/frontend/components/custom/displayCustomCase.py +31 -0
  39. vectordb_bench/frontend/components/custom/displaypPrams.py +11 -0
  40. vectordb_bench/frontend/components/custom/getCustomConfig.py +40 -0
  41. vectordb_bench/frontend/components/custom/initStyle.py +15 -0
  42. vectordb_bench/frontend/components/run_test/autoRefresh.py +1 -1
  43. vectordb_bench/frontend/components/run_test/caseSelector.py +40 -28
  44. vectordb_bench/frontend/components/run_test/dbConfigSetting.py +1 -5
  45. vectordb_bench/frontend/components/run_test/dbSelector.py +8 -14
  46. vectordb_bench/frontend/components/run_test/generateTasks.py +3 -5
  47. vectordb_bench/frontend/components/run_test/initStyle.py +14 -0
  48. vectordb_bench/frontend/components/run_test/submitTask.py +13 -5
  49. vectordb_bench/frontend/components/tables/data.py +44 -0
  50. vectordb_bench/frontend/{const → config}/dbCaseConfigs.py +140 -32
  51. vectordb_bench/frontend/{const → config}/styles.py +2 -0
  52. vectordb_bench/frontend/pages/concurrent.py +65 -0
  53. vectordb_bench/frontend/pages/custom.py +64 -0
  54. vectordb_bench/frontend/pages/quries_per_dollar.py +5 -5
  55. vectordb_bench/frontend/pages/run_test.py +4 -0
  56. vectordb_bench/frontend/pages/tables.py +24 -0
  57. vectordb_bench/frontend/utils.py +17 -1
  58. vectordb_bench/frontend/vdb_benchmark.py +3 -3
  59. vectordb_bench/interface.py +21 -25
  60. vectordb_bench/metric.py +23 -1
  61. vectordb_bench/models.py +45 -1
  62. vectordb_bench/results/getLeaderboardData.py +1 -1
  63. {vectordb_bench-0.0.10.dist-info → vectordb_bench-0.0.12.dist-info}/METADATA +228 -14
  64. vectordb_bench-0.0.12.dist-info/RECORD +115 -0
  65. {vectordb_bench-0.0.10.dist-info → vectordb_bench-0.0.12.dist-info}/WHEEL +1 -1
  66. {vectordb_bench-0.0.10.dist-info → vectordb_bench-0.0.12.dist-info}/entry_points.txt +1 -0
  67. vectordb_bench-0.0.10.dist-info/RECORD +0 -88
  68. /vectordb_bench/frontend/{const → config}/dbPrices.py +0 -0
  69. {vectordb_bench-0.0.10.dist-info → vectordb_bench-0.0.12.dist-info}/LICENSE +0 -0
  70. {vectordb_bench-0.0.10.dist-info → vectordb_bench-0.0.12.dist-info}/top_level.txt +0 -0
@@ -1,42 +1,147 @@
1
- from enum import IntEnum
1
+ from enum import IntEnum, Enum
2
2
  import typing
3
3
  from pydantic import BaseModel
4
4
  from vectordb_bench.backend.cases import CaseLabel, CaseType
5
5
  from vectordb_bench.backend.clients import DB
6
6
  from vectordb_bench.backend.clients.api import IndexType
7
+ from vectordb_bench.frontend.components.custom.getCustomConfig import get_custom_configs
7
8
 
8
- from vectordb_bench.models import CaseConfigParamType
9
+ from vectordb_bench.models import CaseConfig, CaseConfigParamType
9
10
 
10
11
  MAX_STREAMLIT_INT = (1 << 53) - 1
11
12
 
12
- DB_LIST = [d for d in DB]
13
+ DB_LIST = [d for d in DB if d != DB.Test]
14
+
15
+
16
+ class Delimiter(Enum):
17
+ Line = "line"
18
+
19
+
20
+ class BatchCaseConfig(BaseModel):
21
+ label: str = ""
22
+ description: str = ""
23
+ cases: list[CaseConfig] = []
24
+
25
+
26
+ class UICaseItem(BaseModel):
27
+ isLine: bool = False
28
+ label: str = ""
29
+ description: str = ""
30
+ cases: list[CaseConfig] = []
31
+ caseLabel: CaseLabel = CaseLabel.Performance
32
+
33
+ def __init__(
34
+ self,
35
+ isLine: bool = False,
36
+ case_id: CaseType = None,
37
+ custom_case: dict = {},
38
+ cases: list[CaseConfig] = [],
39
+ label: str = "",
40
+ description: str = "",
41
+ caseLabel: CaseLabel = CaseLabel.Performance,
42
+ ):
43
+ if isLine is True:
44
+ super().__init__(isLine=True)
45
+ elif case_id is not None and isinstance(case_id, CaseType):
46
+ c = case_id.case_cls(custom_case)
47
+ super().__init__(
48
+ label=c.name,
49
+ description=c.description,
50
+ cases=[CaseConfig(case_id=case_id, custom_case=custom_case)],
51
+ caseLabel=c.label,
52
+ )
53
+ else:
54
+ super().__init__(
55
+ label=label,
56
+ description=description,
57
+ cases=cases,
58
+ caseLabel=caseLabel,
59
+ )
60
+
61
+ def __hash__(self) -> int:
62
+ return hash(self.json())
63
+
64
+
65
+ class UICaseItemCluster(BaseModel):
66
+ label: str = ""
67
+ uiCaseItems: list[UICaseItem] = []
68
+
69
+
70
+ def get_custom_case_items() -> list[UICaseItem]:
71
+ custom_configs = get_custom_configs()
72
+ return [
73
+ UICaseItem(
74
+ case_id=CaseType.PerformanceCustomDataset, custom_case=custom_config.dict()
75
+ )
76
+ for custom_config in custom_configs
77
+ ]
78
+
79
+
80
+ def get_custom_case_cluter() -> UICaseItemCluster:
81
+ return UICaseItemCluster(
82
+ label="Custom Search Performance Test", uiCaseItems=get_custom_case_items()
83
+ )
84
+
85
+
86
+ UI_CASE_CLUSTERS: list[UICaseItemCluster] = [
87
+ UICaseItemCluster(
88
+ label="Search Performance Test",
89
+ uiCaseItems=[
90
+ UICaseItem(case_id=CaseType.Performance768D100M),
91
+ UICaseItem(case_id=CaseType.Performance768D10M),
92
+ UICaseItem(case_id=CaseType.Performance768D1M),
93
+ UICaseItem(isLine=True),
94
+ UICaseItem(case_id=CaseType.Performance1536D5M),
95
+ UICaseItem(case_id=CaseType.Performance1536D500K),
96
+ UICaseItem(case_id=CaseType.Performance1536D50K),
97
+ ],
98
+ ),
99
+ UICaseItemCluster(
100
+ label="Filter Search Performance Test",
101
+ uiCaseItems=[
102
+ UICaseItem(case_id=CaseType.Performance768D10M1P),
103
+ UICaseItem(case_id=CaseType.Performance768D10M99P),
104
+ UICaseItem(case_id=CaseType.Performance768D1M1P),
105
+ UICaseItem(case_id=CaseType.Performance768D1M99P),
106
+ UICaseItem(isLine=True),
107
+ UICaseItem(case_id=CaseType.Performance1536D5M1P),
108
+ UICaseItem(case_id=CaseType.Performance1536D5M99P),
109
+ UICaseItem(case_id=CaseType.Performance1536D500K1P),
110
+ UICaseItem(case_id=CaseType.Performance1536D500K99P),
111
+ ],
112
+ ),
113
+ UICaseItemCluster(
114
+ label="Capacity Test",
115
+ uiCaseItems=[
116
+ UICaseItem(case_id=CaseType.CapacityDim960),
117
+ UICaseItem(case_id=CaseType.CapacityDim128),
118
+ ],
119
+ ),
120
+ ]
13
121
 
14
- DIVIDER = "DIVIDER"
15
- CASE_LIST_WITH_DIVIDER = [
122
+ # DIVIDER = "DIVIDER"
123
+ DISPLAY_CASE_ORDER: list[CaseType] = [
16
124
  CaseType.Performance768D100M,
17
125
  CaseType.Performance768D10M,
18
126
  CaseType.Performance768D1M,
19
- DIVIDER,
20
127
  CaseType.Performance1536D5M,
21
128
  CaseType.Performance1536D500K,
22
- DIVIDER,
129
+ CaseType.Performance1536D50K,
23
130
  CaseType.Performance768D10M1P,
24
131
  CaseType.Performance768D1M1P,
25
- DIVIDER,
26
132
  CaseType.Performance1536D5M1P,
27
133
  CaseType.Performance1536D500K1P,
28
- DIVIDER,
29
134
  CaseType.Performance768D10M99P,
30
135
  CaseType.Performance768D1M99P,
31
- DIVIDER,
32
136
  CaseType.Performance1536D5M99P,
33
137
  CaseType.Performance1536D500K99P,
34
- DIVIDER,
35
138
  CaseType.CapacityDim960,
36
139
  CaseType.CapacityDim128,
37
140
  ]
141
+ CASE_NAME_ORDER = [case.case_cls().name for case in DISPLAY_CASE_ORDER]
38
142
 
39
- CASE_LIST = [item for item in CASE_LIST_WITH_DIVIDER if isinstance(item, CaseType)]
143
+ # CASE_LIST = [
144
+ # item for item in CASE_LIST_WITH_DIVIDER if isinstance(item, CaseType)]
40
145
 
41
146
 
42
147
  class InputType(IntEnum):
@@ -52,7 +157,7 @@ class CaseConfigInput(BaseModel):
52
157
  inputHelp: str = ""
53
158
  displayLabel: str = ""
54
159
  # todo type should be a function
55
- isDisplayed: typing.Any = lambda x: True
160
+ isDisplayed: typing.Any = lambda config: True
56
161
 
57
162
 
58
163
  CaseConfigParamInput_IndexType = CaseConfigInput(
@@ -145,7 +250,7 @@ CaseConfigParamInput_EFConstruction_ES = CaseConfigInput(
145
250
  CaseConfigParamInput_maintenance_work_mem_PgVector = CaseConfigInput(
146
251
  label=CaseConfigParamType.maintenance_work_mem,
147
252
  inputHelp="Recommended value: 1.33x the index size, not to exceed the available free memory."
148
- "Specify in gigabytes. e.g. 8GB",
253
+ "Specify in gigabytes. e.g. 8GB",
149
254
  inputType=InputType.Text,
150
255
  inputConfig={
151
256
  "value": "8GB",
@@ -156,7 +261,7 @@ CaseConfigParamInput_max_parallel_workers_PgVector = CaseConfigInput(
156
261
  label=CaseConfigParamType.max_parallel_workers,
157
262
  displayLabel="Max parallel workers",
158
263
  inputHelp="Recommended value: (cpu cores - 1). This will set the parameters: max_parallel_maintenance_workers,"
159
- " max_parallel_workers & table(parallel_workers)",
264
+ " max_parallel_workers & table(parallel_workers)",
160
265
  inputType=InputType.Number,
161
266
  inputConfig={
162
267
  "min": 0,
@@ -513,7 +618,8 @@ CaseConfigParamInput_QuantizationRatio_PgVectoRS = CaseConfigInput(
513
618
  "options": ["x4", "x8", "x16", "x32", "x64"],
514
619
  },
515
620
  isDisplayed=lambda config: config.get(CaseConfigParamType.quantizationType, None)
516
- == "product" and config.get(CaseConfigParamType.IndexType, None)
621
+ == "product"
622
+ and config.get(CaseConfigParamType.IndexType, None)
517
623
  in [
518
624
  IndexType.HNSW.value,
519
625
  IndexType.IVFFlat.value,
@@ -581,22 +687,24 @@ ESPerformanceConfig = [
581
687
  CaseConfigParamInput_NumCandidates_ES,
582
688
  ]
583
689
 
584
- PgVectorLoadingConfig = [CaseConfigParamInput_IndexType_PgVector,
585
- CaseConfigParamInput_Lists_PgVector,
586
- CaseConfigParamInput_m,
587
- CaseConfigParamInput_EFConstruction_PgVector,
588
- CaseConfigParamInput_maintenance_work_mem_PgVector,
589
- CaseConfigParamInput_max_parallel_workers_PgVector,
590
- ]
591
- PgVectorPerformanceConfig = [CaseConfigParamInput_IndexType_PgVector,
592
- CaseConfigParamInput_m,
593
- CaseConfigParamInput_EFConstruction_PgVector,
594
- CaseConfigParamInput_EFSearch_PgVector,
595
- CaseConfigParamInput_Lists_PgVector,
596
- CaseConfigParamInput_Probes_PgVector,
597
- CaseConfigParamInput_maintenance_work_mem_PgVector,
598
- CaseConfigParamInput_max_parallel_workers_PgVector,
599
- ]
690
+ PgVectorLoadingConfig = [
691
+ CaseConfigParamInput_IndexType_PgVector,
692
+ CaseConfigParamInput_Lists_PgVector,
693
+ CaseConfigParamInput_m,
694
+ CaseConfigParamInput_EFConstruction_PgVector,
695
+ CaseConfigParamInput_maintenance_work_mem_PgVector,
696
+ CaseConfigParamInput_max_parallel_workers_PgVector,
697
+ ]
698
+ PgVectorPerformanceConfig = [
699
+ CaseConfigParamInput_IndexType_PgVector,
700
+ CaseConfigParamInput_m,
701
+ CaseConfigParamInput_EFConstruction_PgVector,
702
+ CaseConfigParamInput_EFSearch_PgVector,
703
+ CaseConfigParamInput_Lists_PgVector,
704
+ CaseConfigParamInput_Probes_PgVector,
705
+ CaseConfigParamInput_maintenance_work_mem_PgVector,
706
+ CaseConfigParamInput_max_parallel_workers_PgVector,
707
+ ]
600
708
 
601
709
  PgVectoRSLoadingConfig = [
602
710
  CaseConfigParamInput_IndexType,
@@ -46,6 +46,7 @@ DB_TO_ICON = {
46
46
  DB.PgVectoRS: "https://assets.zilliz.com/PG_Vector_d464f2ef5f.png",
47
47
  DB.Redis: "https://assets.zilliz.com/Redis_Cloud_74b8bfef39.png",
48
48
  DB.Chroma: "https://assets.zilliz.com/chroma_ceb3f06ed7.png",
49
+ DB.AWSOpenSearch: "https://assets.zilliz.com/opensearch_1eee37584e.jpeg",
49
50
  }
50
51
 
51
52
  # RedisCloud color: #0D6EFD
@@ -59,4 +60,5 @@ COLOR_MAP = {
59
60
  DB.WeaviateCloud.value: "#20C997",
60
61
  DB.PgVector.value: "#4C779A",
61
62
  DB.Redis.value: "#0D6EFD",
63
+ DB.AWSOpenSearch.value: "#0DCAF0",
62
64
  }
@@ -0,0 +1,65 @@
1
+ import streamlit as st
2
+ from vectordb_bench.frontend.components.check_results.footer import footer
3
+ from vectordb_bench.frontend.components.check_results.headerIcon import drawHeaderIcon
4
+ from vectordb_bench.frontend.components.check_results.nav import (
5
+ NavToResults,
6
+ NavToRunTest,
7
+ )
8
+ from vectordb_bench.frontend.components.check_results.filters import getshownData
9
+ from vectordb_bench.frontend.components.concurrent.charts import drawChartsByCase
10
+ from vectordb_bench.frontend.components.get_results.saveAsImage import getResults
11
+ from vectordb_bench.frontend.config.styles import FAVICON
12
+ from vectordb_bench.interface import benchMarkRunner
13
+ from vectordb_bench.models import TestResult
14
+
15
+
16
+ def main():
17
+ # set page config
18
+ st.set_page_config(
19
+ page_title="VDBBench Conc Perf",
20
+ page_icon=FAVICON,
21
+ layout="wide",
22
+ # initial_sidebar_state="collapsed",
23
+ )
24
+
25
+ # header
26
+ drawHeaderIcon(st)
27
+
28
+ allResults = benchMarkRunner.get_results()
29
+
30
+ def check_conc_data(res: TestResult):
31
+ case_results = res.results
32
+ count = 0
33
+ for case_result in case_results:
34
+ if len(case_result.metrics.conc_num_list) > 0:
35
+ count += 1
36
+
37
+ return count > 0
38
+
39
+ checkedResults = [res for res in allResults if check_conc_data(res)]
40
+
41
+ st.title("VectorDB Benchmark (Concurrent Performance)")
42
+
43
+ # results selector
44
+ resultSelectorContainer = st.sidebar.container()
45
+ shownData, _, showCaseNames = getshownData(checkedResults, resultSelectorContainer)
46
+
47
+ resultSelectorContainer.divider()
48
+
49
+ # nav
50
+ navContainer = st.sidebar.container()
51
+ NavToRunTest(navContainer)
52
+ NavToResults(navContainer)
53
+
54
+ # save or share
55
+ resultesContainer = st.sidebar.container()
56
+ getResults(resultesContainer, "vectordb_bench_concurrent")
57
+
58
+ drawChartsByCase(shownData, showCaseNames, st.container())
59
+
60
+ # footer
61
+ footer(st.container())
62
+
63
+
64
+ if __name__ == "__main__":
65
+ main()
@@ -0,0 +1,64 @@
1
+ import streamlit as st
2
+ from vectordb_bench.frontend.components.check_results.headerIcon import drawHeaderIcon
3
+ from vectordb_bench.frontend.components.custom.displayCustomCase import displayCustomCase
4
+ from vectordb_bench.frontend.components.custom.displaypPrams import displayParams
5
+ from vectordb_bench.frontend.components.custom.getCustomConfig import CustomCaseConfig, generate_custom_case, get_custom_configs, save_custom_configs
6
+ from vectordb_bench.frontend.components.custom.initStyle import initStyle
7
+ from vectordb_bench.frontend.config.styles import FAVICON, PAGE_TITLE
8
+
9
+
10
+ class CustomCaseManager():
11
+ customCaseItems: list[CustomCaseConfig]
12
+
13
+ def __init__(self):
14
+ self.customCaseItems = get_custom_configs()
15
+
16
+ def addCase(self):
17
+ new_custom_case = generate_custom_case()
18
+ new_custom_case.dataset_config.name = f"{new_custom_case.dataset_config.name} {len(self.customCaseItems)}"
19
+ self.customCaseItems += [new_custom_case]
20
+ self.save()
21
+
22
+ def deleteCase(self, idx: int):
23
+ self.customCaseItems.pop(idx)
24
+ self.save()
25
+
26
+ def save(self):
27
+ save_custom_configs(self.customCaseItems)
28
+
29
+
30
+ def main():
31
+ st.set_page_config(
32
+ page_title=PAGE_TITLE,
33
+ page_icon=FAVICON,
34
+ # layout="wide",
35
+ # initial_sidebar_state="collapsed",
36
+ )
37
+
38
+ # header
39
+ drawHeaderIcon(st)
40
+
41
+ # init style
42
+ initStyle(st)
43
+
44
+ st.title("Custom Dataset")
45
+ displayParams(st)
46
+ customCaseManager = CustomCaseManager()
47
+
48
+ for idx, customCase in enumerate(customCaseManager.customCaseItems):
49
+ expander = st.expander(customCase.dataset_config.name, expanded=True)
50
+ key = f"custom_case_{idx}"
51
+ displayCustomCase(customCase, expander, key=key)
52
+
53
+ columns = expander.columns(8)
54
+ columns[0].button(
55
+ "Save", key=f"{key}_", type="secondary", on_click=lambda: customCaseManager.save())
56
+ columns[1].button(":red[Delete]", key=f"{key}_delete", type="secondary",
57
+ on_click=lambda: customCaseManager.deleteCase(idx))
58
+
59
+ st.button("\+ New Dataset", key=f"add_custom_configs",
60
+ type="primary", on_click=lambda: customCaseManager.addCase())
61
+
62
+
63
+ if __name__ == "__main__":
64
+ main()
@@ -8,7 +8,7 @@ from vectordb_bench.frontend.components.check_results.nav import NavToResults, N
8
8
  from vectordb_bench.frontend.components.check_results.charts import drawMetricChart
9
9
  from vectordb_bench.frontend.components.check_results.filters import getshownData
10
10
  from vectordb_bench.frontend.components.get_results.saveAsImage import getResults
11
- from vectordb_bench.frontend.const.styles import *
11
+ from vectordb_bench.frontend.config.styles import *
12
12
  from vectordb_bench.interface import benchMarkRunner
13
13
  from vectordb_bench.metric import QURIES_PER_DOLLAR_METRIC
14
14
 
@@ -26,7 +26,7 @@ def main():
26
26
 
27
27
  # results selector
28
28
  resultSelectorContainer = st.sidebar.container()
29
- shownData, _, showCases = getshownData(allResults, resultSelectorContainer)
29
+ shownData, _, showCaseNames = getshownData(allResults, resultSelectorContainer)
30
30
 
31
31
  resultSelectorContainer.divider()
32
32
 
@@ -45,8 +45,8 @@ def main():
45
45
  priceMap = priceTable(priceTableContainer, shownData)
46
46
 
47
47
  # charts
48
- for case in showCases:
49
- data = [data for data in shownData if data["case_name"] == case.name]
48
+ for caseName in showCaseNames:
49
+ data = [data for data in shownData if data["case_name"] == caseName]
50
50
  dataWithMetric = []
51
51
  metric = QURIES_PER_DOLLAR_METRIC
52
52
  for d in data:
@@ -56,7 +56,7 @@ def main():
56
56
  d[metric] = d["qps"] / price * 3.6
57
57
  dataWithMetric.append(d)
58
58
  if len(dataWithMetric) > 0:
59
- chartContainer = st.expander(case.name, True)
59
+ chartContainer = st.expander(caseName, True)
60
60
  drawMetricChart(data, metric, chartContainer)
61
61
 
62
62
  # footer
@@ -5,6 +5,7 @@ from vectordb_bench.frontend.components.run_test.dbConfigSetting import dbConfig
5
5
  from vectordb_bench.frontend.components.run_test.dbSelector import dbSelector
6
6
  from vectordb_bench.frontend.components.run_test.generateTasks import generate_tasks
7
7
  from vectordb_bench.frontend.components.run_test.hideSidebar import hideSidebar
8
+ from vectordb_bench.frontend.components.run_test.initStyle import initStyle
8
9
  from vectordb_bench.frontend.components.run_test.submitTask import submitTask
9
10
  from vectordb_bench.frontend.components.check_results.nav import NavToResults
10
11
  from vectordb_bench.frontend.components.check_results.headerIcon import drawHeaderIcon
@@ -15,6 +16,9 @@ def main():
15
16
  # set page config
16
17
  initRunTestPageConfig(st)
17
18
 
19
+ # init style
20
+ initStyle(st)
21
+
18
22
  # header
19
23
  drawHeaderIcon(st)
20
24
 
@@ -0,0 +1,24 @@
1
+ import streamlit as st
2
+ from vectordb_bench.frontend.components.check_results.headerIcon import drawHeaderIcon
3
+ from vectordb_bench.frontend.components.tables.data import getNewResults
4
+ from vectordb_bench.frontend.config.styles import FAVICON
5
+
6
+
7
+ def main():
8
+ # set page config
9
+ st.set_page_config(
10
+ page_title="Table",
11
+ page_icon=FAVICON,
12
+ layout="wide",
13
+ # initial_sidebar_state="collapsed",
14
+ )
15
+
16
+ # header
17
+ drawHeaderIcon(st)
18
+
19
+ df = getNewResults()
20
+ st.dataframe(df, height=800)
21
+
22
+
23
+ if __name__ == "__main__":
24
+ main()
@@ -1,6 +1,22 @@
1
- from vectordb_bench.models import CaseType
1
+ import random
2
+ import string
3
+
2
4
 
3
5
  passwordKeys = ["password", "api_key"]
6
+
7
+
4
8
  def inputIsPassword(key: str) -> bool:
5
9
  return key.lower() in passwordKeys
6
10
 
11
+
12
+ def addHorizontalLine(st):
13
+ st.markdown(
14
+ "<div style='border: 1px solid #cccccc60; margin-bottom: 24px;'></div>",
15
+ unsafe_allow_html=True,
16
+ )
17
+
18
+
19
+ def generate_random_string(length):
20
+ letters = string.ascii_letters + string.digits
21
+ result = ''.join(random.choice(letters) for _ in range(length))
22
+ return result
@@ -6,7 +6,7 @@ from vectordb_bench.frontend.components.check_results.nav import NavToQuriesPerD
6
6
  from vectordb_bench.frontend.components.check_results.charts import drawCharts
7
7
  from vectordb_bench.frontend.components.check_results.filters import getshownData
8
8
  from vectordb_bench.frontend.components.get_results.saveAsImage import getResults
9
- from vectordb_bench.frontend.const.styles import *
9
+ from vectordb_bench.frontend.config.styles import *
10
10
  from vectordb_bench.interface import benchMarkRunner
11
11
 
12
12
 
@@ -24,7 +24,7 @@ def main():
24
24
 
25
25
  # results selector and filter
26
26
  resultSelectorContainer = st.sidebar.container()
27
- shownData, failedTasks, showCases = getshownData(
27
+ shownData, failedTasks, showCaseNames = getshownData(
28
28
  allResults, resultSelectorContainer
29
29
  )
30
30
 
@@ -40,7 +40,7 @@ def main():
40
40
  getResults(resultesContainer, "vectordb_bench")
41
41
 
42
42
  # charts
43
- drawCharts(st, shownData, failedTasks, showCases)
43
+ drawCharts(st, shownData, failedTasks, showCaseNames)
44
44
 
45
45
  # footer
46
46
  footer(st.container())
@@ -1,38 +1,33 @@
1
- import traceback
1
+ import concurrent.futures
2
+ import logging
3
+ import multiprocessing as mp
2
4
  import pathlib
3
5
  import signal
4
- import logging
6
+ import traceback
5
7
  import uuid
6
- import concurrent
7
- import multiprocessing as mp
8
+ from enum import Enum
8
9
  from multiprocessing.connection import Connection
9
10
 
10
11
  import psutil
11
- from enum import Enum
12
12
 
13
13
  from . import config
14
- from .metric import Metric
15
- from .models import (
16
- TaskConfig,
17
- TestResult,
18
- CaseResult,
19
- LoadTimeoutError,
20
- PerformanceTimeoutError,
21
- ResultLabel,
22
- )
23
- from .backend.result_collector import ResultCollector
24
14
  from .backend.assembler import Assembler
25
- from .backend.task_runner import TaskRunner
26
15
  from .backend.data_source import DatasetSource
16
+ from .backend.result_collector import ResultCollector
17
+ from .backend.task_runner import TaskRunner
18
+ from .metric import Metric
19
+ from .models import (CaseResult, LoadTimeoutError, PerformanceTimeoutError,
20
+ ResultLabel, TaskConfig, TaskStage, TestResult)
27
21
 
28
22
  log = logging.getLogger(__name__)
29
23
 
30
24
  global_result_future: concurrent.futures.Future | None = None
31
25
 
26
+
32
27
  class SIGNAL(Enum):
33
- SUCCESS=0
34
- ERROR=1
35
- WIP=2
28
+ SUCCESS = 0
29
+ ERROR = 1
30
+ WIP = 2
36
31
 
37
32
 
38
33
  class BenchMarkRunner:
@@ -42,9 +37,11 @@ class BenchMarkRunner:
42
37
  self.drop_old: bool = True
43
38
  self.dataset_source: DatasetSource = DatasetSource.S3
44
39
 
40
+
45
41
  def set_drop_old(self, drop_old: bool):
46
42
  self.drop_old = drop_old
47
43
 
44
+
48
45
  def set_download_address(self, use_aliyun: bool):
49
46
  if use_aliyun:
50
47
  self.dataset_source = DatasetSource.AliyunOSS
@@ -152,13 +149,13 @@ class BenchMarkRunner:
152
149
  latest_runner, cached_load_duration = None, None
153
150
  for idx, runner in enumerate(running_task.case_runners):
154
151
  case_res = CaseResult(
155
- result_id=idx,
156
152
  metrics=Metric(),
157
153
  task_config=runner.config,
158
154
  )
159
155
 
160
156
  # drop_old = False if latest_runner and runner == latest_runner else config.DROP_OLD
161
- drop_old = config.DROP_OLD
157
+ # drop_old = config.DROP_OLD
158
+ drop_old = TaskStage.DROP_OLD in runner.config.stages
162
159
  if latest_runner and runner == latest_runner:
163
160
  drop_old = False
164
161
  elif not self.drop_old:
@@ -167,7 +164,7 @@ class BenchMarkRunner:
167
164
  log.info(f"[{idx+1}/{running_task.num_cases()}] start case: {runner.display()}, drop_old={drop_old}")
168
165
  case_res.metrics = runner.run(drop_old)
169
166
  log.info(f"[{idx+1}/{running_task.num_cases()}] finish case: {runner.display()}, "
170
- f"result={case_res.metrics}, label={case_res.label}")
167
+ f"result={case_res.metrics}, label={case_res.label}")
171
168
 
172
169
  # cache the latest succeeded runner
173
170
  latest_runner = runner
@@ -193,7 +190,6 @@ class BenchMarkRunner:
193
190
  c_results.append(case_res)
194
191
  send_conn.send((SIGNAL.WIP, idx))
195
192
 
196
-
197
193
  test_result = TestResult(
198
194
  run_id=running_task.run_id,
199
195
  task_label=running_task.task_label,
@@ -204,7 +200,7 @@ class BenchMarkRunner:
204
200
 
205
201
  send_conn.send((SIGNAL.SUCCESS, None))
206
202
  send_conn.close()
207
- log.info(f"Succes to finish task: label={running_task.task_label}, run_id={running_task.run_id}")
203
+ log.info(f"Success to finish task: label={running_task.task_label}, run_id={running_task.run_id}")
208
204
 
209
205
  except Exception as e:
210
206
  err_msg = f"An error occurs when running task={running_task.task_label}, run_id={running_task.run_id}, err={e}"
@@ -246,7 +242,7 @@ class BenchMarkRunner:
246
242
  called as soon as a child terminates.
247
243
  """
248
244
  children = psutil.Process().children(recursive=True)
249
- for p in children:
245
+ for p in children:
250
246
  try:
251
247
  log.warning(f"sending SIGTERM to child process: {p}")
252
248
  p.send_signal(sig)
vectordb_bench/metric.py CHANGED
@@ -1,7 +1,7 @@
1
1
  import logging
2
2
  import numpy as np
3
3
 
4
- from dataclasses import dataclass
4
+ from dataclasses import dataclass, field
5
5
 
6
6
 
7
7
  log = logging.getLogger(__name__)
@@ -19,6 +19,10 @@ class Metric:
19
19
  qps: float = 0.0
20
20
  serial_latency_p99: float = 0.0
21
21
  recall: float = 0.0
22
+ ndcg: float = 0.0
23
+ conc_num_list: list[int] = field(default_factory=list)
24
+ conc_qps_list: list[float] = field(default_factory=list)
25
+ conc_latency_p99_list: list[float] = field(default_factory=list)
22
26
 
23
27
 
24
28
  QURIES_PER_DOLLAR_METRIC = "QP$ (Quries per Dollar)"
@@ -60,3 +64,21 @@ def calc_recall(count: int, ground_truth: list[int], got: list[int]) -> float:
60
64
  recalls[i] = 1
61
65
 
62
66
  return np.mean(recalls)
67
+
68
+
69
+ def get_ideal_dcg(k: int):
70
+ ideal_dcg = 0
71
+ for i in range(k):
72
+ ideal_dcg += 1 / np.log2(i+2)
73
+
74
+ return ideal_dcg
75
+
76
+
77
+ def calc_ndcg(ground_truth: list[int], got: list[int], ideal_dcg: float) -> float:
78
+ dcg = 0
79
+ ground_truth = list(ground_truth)
80
+ for id in set(got):
81
+ if id in ground_truth:
82
+ idx = ground_truth.index(id)
83
+ dcg += 1 / np.log2(idx+2)
84
+ return dcg / ideal_dcg