vectordb-bench 0.0.11__py3-none-any.whl → 0.0.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. vectordb_bench/__init__.py +1 -0
  2. vectordb_bench/backend/assembler.py +1 -1
  3. vectordb_bench/backend/cases.py +64 -18
  4. vectordb_bench/backend/clients/__init__.py +13 -0
  5. vectordb_bench/backend/clients/aws_opensearch/aws_opensearch.py +159 -0
  6. vectordb_bench/backend/clients/aws_opensearch/cli.py +44 -0
  7. vectordb_bench/backend/clients/aws_opensearch/config.py +58 -0
  8. vectordb_bench/backend/clients/aws_opensearch/run.py +125 -0
  9. vectordb_bench/backend/dataset.py +27 -5
  10. vectordb_bench/cli/vectordbbench.py +2 -0
  11. vectordb_bench/custom/custom_case.json +18 -0
  12. vectordb_bench/frontend/components/check_results/charts.py +6 -6
  13. vectordb_bench/frontend/components/check_results/data.py +12 -12
  14. vectordb_bench/frontend/components/check_results/expanderStyle.py +1 -1
  15. vectordb_bench/frontend/components/check_results/filters.py +20 -13
  16. vectordb_bench/frontend/components/check_results/headerIcon.py +1 -1
  17. vectordb_bench/frontend/components/check_results/priceTable.py +1 -1
  18. vectordb_bench/frontend/components/check_results/stPageConfig.py +1 -1
  19. vectordb_bench/frontend/components/concurrent/charts.py +26 -29
  20. vectordb_bench/frontend/components/custom/displayCustomCase.py +31 -0
  21. vectordb_bench/frontend/components/custom/displaypPrams.py +11 -0
  22. vectordb_bench/frontend/components/custom/getCustomConfig.py +40 -0
  23. vectordb_bench/frontend/components/custom/initStyle.py +15 -0
  24. vectordb_bench/frontend/components/run_test/autoRefresh.py +1 -1
  25. vectordb_bench/frontend/components/run_test/caseSelector.py +40 -28
  26. vectordb_bench/frontend/components/run_test/dbConfigSetting.py +1 -5
  27. vectordb_bench/frontend/components/run_test/dbSelector.py +2 -14
  28. vectordb_bench/frontend/components/run_test/generateTasks.py +3 -5
  29. vectordb_bench/frontend/components/run_test/initStyle.py +14 -0
  30. vectordb_bench/frontend/components/run_test/submitTask.py +1 -1
  31. vectordb_bench/frontend/{const → config}/dbCaseConfigs.py +138 -31
  32. vectordb_bench/frontend/{const → config}/styles.py +2 -0
  33. vectordb_bench/frontend/pages/concurrent.py +11 -18
  34. vectordb_bench/frontend/pages/custom.py +64 -0
  35. vectordb_bench/frontend/pages/quries_per_dollar.py +5 -5
  36. vectordb_bench/frontend/pages/run_test.py +4 -0
  37. vectordb_bench/frontend/pages/tables.py +2 -2
  38. vectordb_bench/frontend/utils.py +17 -1
  39. vectordb_bench/frontend/vdb_benchmark.py +3 -3
  40. vectordb_bench/models.py +8 -4
  41. vectordb_bench/results/getLeaderboardData.py +1 -1
  42. {vectordb_bench-0.0.11.dist-info → vectordb_bench-0.0.12.dist-info}/METADATA +36 -13
  43. {vectordb_bench-0.0.11.dist-info → vectordb_bench-0.0.12.dist-info}/RECORD +48 -37
  44. {vectordb_bench-0.0.11.dist-info → vectordb_bench-0.0.12.dist-info}/WHEEL +1 -1
  45. /vectordb_bench/frontend/{const → config}/dbPrices.py +0 -0
  46. {vectordb_bench-0.0.11.dist-info → vectordb_bench-0.0.12.dist-info}/LICENSE +0 -0
  47. {vectordb_bench-0.0.11.dist-info → vectordb_bench-0.0.12.dist-info}/entry_points.txt +0 -0
  48. {vectordb_bench-0.0.11.dist-info → vectordb_bench-0.0.12.dist-info}/top_level.txt +0 -0
@@ -1,43 +1,147 @@
1
- from enum import IntEnum
1
+ from enum import IntEnum, Enum
2
2
  import typing
3
3
  from pydantic import BaseModel
4
4
  from vectordb_bench.backend.cases import CaseLabel, CaseType
5
5
  from vectordb_bench.backend.clients import DB
6
6
  from vectordb_bench.backend.clients.api import IndexType
7
+ from vectordb_bench.frontend.components.custom.getCustomConfig import get_custom_configs
7
8
 
8
- from vectordb_bench.models import CaseConfigParamType
9
+ from vectordb_bench.models import CaseConfig, CaseConfigParamType
9
10
 
10
11
  MAX_STREAMLIT_INT = (1 << 53) - 1
11
12
 
12
13
  DB_LIST = [d for d in DB if d != DB.Test]
13
14
 
14
- DIVIDER = "DIVIDER"
15
- CASE_LIST_WITH_DIVIDER = [
15
+
16
+ class Delimiter(Enum):
17
+ Line = "line"
18
+
19
+
20
+ class BatchCaseConfig(BaseModel):
21
+ label: str = ""
22
+ description: str = ""
23
+ cases: list[CaseConfig] = []
24
+
25
+
26
+ class UICaseItem(BaseModel):
27
+ isLine: bool = False
28
+ label: str = ""
29
+ description: str = ""
30
+ cases: list[CaseConfig] = []
31
+ caseLabel: CaseLabel = CaseLabel.Performance
32
+
33
+ def __init__(
34
+ self,
35
+ isLine: bool = False,
36
+ case_id: CaseType = None,
37
+ custom_case: dict = {},
38
+ cases: list[CaseConfig] = [],
39
+ label: str = "",
40
+ description: str = "",
41
+ caseLabel: CaseLabel = CaseLabel.Performance,
42
+ ):
43
+ if isLine is True:
44
+ super().__init__(isLine=True)
45
+ elif case_id is not None and isinstance(case_id, CaseType):
46
+ c = case_id.case_cls(custom_case)
47
+ super().__init__(
48
+ label=c.name,
49
+ description=c.description,
50
+ cases=[CaseConfig(case_id=case_id, custom_case=custom_case)],
51
+ caseLabel=c.label,
52
+ )
53
+ else:
54
+ super().__init__(
55
+ label=label,
56
+ description=description,
57
+ cases=cases,
58
+ caseLabel=caseLabel,
59
+ )
60
+
61
+ def __hash__(self) -> int:
62
+ return hash(self.json())
63
+
64
+
65
+ class UICaseItemCluster(BaseModel):
66
+ label: str = ""
67
+ uiCaseItems: list[UICaseItem] = []
68
+
69
+
70
+ def get_custom_case_items() -> list[UICaseItem]:
71
+ custom_configs = get_custom_configs()
72
+ return [
73
+ UICaseItem(
74
+ case_id=CaseType.PerformanceCustomDataset, custom_case=custom_config.dict()
75
+ )
76
+ for custom_config in custom_configs
77
+ ]
78
+
79
+
80
+ def get_custom_case_cluter() -> UICaseItemCluster:
81
+ return UICaseItemCluster(
82
+ label="Custom Search Performance Test", uiCaseItems=get_custom_case_items()
83
+ )
84
+
85
+
86
+ UI_CASE_CLUSTERS: list[UICaseItemCluster] = [
87
+ UICaseItemCluster(
88
+ label="Search Performance Test",
89
+ uiCaseItems=[
90
+ UICaseItem(case_id=CaseType.Performance768D100M),
91
+ UICaseItem(case_id=CaseType.Performance768D10M),
92
+ UICaseItem(case_id=CaseType.Performance768D1M),
93
+ UICaseItem(isLine=True),
94
+ UICaseItem(case_id=CaseType.Performance1536D5M),
95
+ UICaseItem(case_id=CaseType.Performance1536D500K),
96
+ UICaseItem(case_id=CaseType.Performance1536D50K),
97
+ ],
98
+ ),
99
+ UICaseItemCluster(
100
+ label="Filter Search Performance Test",
101
+ uiCaseItems=[
102
+ UICaseItem(case_id=CaseType.Performance768D10M1P),
103
+ UICaseItem(case_id=CaseType.Performance768D10M99P),
104
+ UICaseItem(case_id=CaseType.Performance768D1M1P),
105
+ UICaseItem(case_id=CaseType.Performance768D1M99P),
106
+ UICaseItem(isLine=True),
107
+ UICaseItem(case_id=CaseType.Performance1536D5M1P),
108
+ UICaseItem(case_id=CaseType.Performance1536D5M99P),
109
+ UICaseItem(case_id=CaseType.Performance1536D500K1P),
110
+ UICaseItem(case_id=CaseType.Performance1536D500K99P),
111
+ ],
112
+ ),
113
+ UICaseItemCluster(
114
+ label="Capacity Test",
115
+ uiCaseItems=[
116
+ UICaseItem(case_id=CaseType.CapacityDim960),
117
+ UICaseItem(case_id=CaseType.CapacityDim128),
118
+ ],
119
+ ),
120
+ ]
121
+
122
+ # DIVIDER = "DIVIDER"
123
+ DISPLAY_CASE_ORDER: list[CaseType] = [
16
124
  CaseType.Performance768D100M,
17
125
  CaseType.Performance768D10M,
18
126
  CaseType.Performance768D1M,
19
- DIVIDER,
20
127
  CaseType.Performance1536D5M,
21
128
  CaseType.Performance1536D500K,
22
129
  CaseType.Performance1536D50K,
23
- DIVIDER,
24
130
  CaseType.Performance768D10M1P,
25
131
  CaseType.Performance768D1M1P,
26
- DIVIDER,
27
132
  CaseType.Performance1536D5M1P,
28
133
  CaseType.Performance1536D500K1P,
29
- DIVIDER,
30
134
  CaseType.Performance768D10M99P,
31
135
  CaseType.Performance768D1M99P,
32
- DIVIDER,
33
136
  CaseType.Performance1536D5M99P,
34
137
  CaseType.Performance1536D500K99P,
35
- DIVIDER,
36
138
  CaseType.CapacityDim960,
37
139
  CaseType.CapacityDim128,
38
140
  ]
141
+ CASE_NAME_ORDER = [case.case_cls().name for case in DISPLAY_CASE_ORDER]
39
142
 
40
- CASE_LIST = [item for item in CASE_LIST_WITH_DIVIDER if isinstance(item, CaseType)]
143
+ # CASE_LIST = [
144
+ # item for item in CASE_LIST_WITH_DIVIDER if isinstance(item, CaseType)]
41
145
 
42
146
 
43
147
  class InputType(IntEnum):
@@ -53,7 +157,7 @@ class CaseConfigInput(BaseModel):
53
157
  inputHelp: str = ""
54
158
  displayLabel: str = ""
55
159
  # todo type should be a function
56
- isDisplayed: typing.Any = lambda x: True
160
+ isDisplayed: typing.Any = lambda config: True
57
161
 
58
162
 
59
163
  CaseConfigParamInput_IndexType = CaseConfigInput(
@@ -146,7 +250,7 @@ CaseConfigParamInput_EFConstruction_ES = CaseConfigInput(
146
250
  CaseConfigParamInput_maintenance_work_mem_PgVector = CaseConfigInput(
147
251
  label=CaseConfigParamType.maintenance_work_mem,
148
252
  inputHelp="Recommended value: 1.33x the index size, not to exceed the available free memory."
149
- "Specify in gigabytes. e.g. 8GB",
253
+ "Specify in gigabytes. e.g. 8GB",
150
254
  inputType=InputType.Text,
151
255
  inputConfig={
152
256
  "value": "8GB",
@@ -157,7 +261,7 @@ CaseConfigParamInput_max_parallel_workers_PgVector = CaseConfigInput(
157
261
  label=CaseConfigParamType.max_parallel_workers,
158
262
  displayLabel="Max parallel workers",
159
263
  inputHelp="Recommended value: (cpu cores - 1). This will set the parameters: max_parallel_maintenance_workers,"
160
- " max_parallel_workers & table(parallel_workers)",
264
+ " max_parallel_workers & table(parallel_workers)",
161
265
  inputType=InputType.Number,
162
266
  inputConfig={
163
267
  "min": 0,
@@ -514,7 +618,8 @@ CaseConfigParamInput_QuantizationRatio_PgVectoRS = CaseConfigInput(
514
618
  "options": ["x4", "x8", "x16", "x32", "x64"],
515
619
  },
516
620
  isDisplayed=lambda config: config.get(CaseConfigParamType.quantizationType, None)
517
- == "product" and config.get(CaseConfigParamType.IndexType, None)
621
+ == "product"
622
+ and config.get(CaseConfigParamType.IndexType, None)
518
623
  in [
519
624
  IndexType.HNSW.value,
520
625
  IndexType.IVFFlat.value,
@@ -582,22 +687,24 @@ ESPerformanceConfig = [
582
687
  CaseConfigParamInput_NumCandidates_ES,
583
688
  ]
584
689
 
585
- PgVectorLoadingConfig = [CaseConfigParamInput_IndexType_PgVector,
586
- CaseConfigParamInput_Lists_PgVector,
587
- CaseConfigParamInput_m,
588
- CaseConfigParamInput_EFConstruction_PgVector,
589
- CaseConfigParamInput_maintenance_work_mem_PgVector,
590
- CaseConfigParamInput_max_parallel_workers_PgVector,
591
- ]
592
- PgVectorPerformanceConfig = [CaseConfigParamInput_IndexType_PgVector,
593
- CaseConfigParamInput_m,
594
- CaseConfigParamInput_EFConstruction_PgVector,
595
- CaseConfigParamInput_EFSearch_PgVector,
596
- CaseConfigParamInput_Lists_PgVector,
597
- CaseConfigParamInput_Probes_PgVector,
598
- CaseConfigParamInput_maintenance_work_mem_PgVector,
599
- CaseConfigParamInput_max_parallel_workers_PgVector,
600
- ]
690
+ PgVectorLoadingConfig = [
691
+ CaseConfigParamInput_IndexType_PgVector,
692
+ CaseConfigParamInput_Lists_PgVector,
693
+ CaseConfigParamInput_m,
694
+ CaseConfigParamInput_EFConstruction_PgVector,
695
+ CaseConfigParamInput_maintenance_work_mem_PgVector,
696
+ CaseConfigParamInput_max_parallel_workers_PgVector,
697
+ ]
698
+ PgVectorPerformanceConfig = [
699
+ CaseConfigParamInput_IndexType_PgVector,
700
+ CaseConfigParamInput_m,
701
+ CaseConfigParamInput_EFConstruction_PgVector,
702
+ CaseConfigParamInput_EFSearch_PgVector,
703
+ CaseConfigParamInput_Lists_PgVector,
704
+ CaseConfigParamInput_Probes_PgVector,
705
+ CaseConfigParamInput_maintenance_work_mem_PgVector,
706
+ CaseConfigParamInput_max_parallel_workers_PgVector,
707
+ ]
601
708
 
602
709
  PgVectoRSLoadingConfig = [
603
710
  CaseConfigParamInput_IndexType,
@@ -46,6 +46,7 @@ DB_TO_ICON = {
46
46
  DB.PgVectoRS: "https://assets.zilliz.com/PG_Vector_d464f2ef5f.png",
47
47
  DB.Redis: "https://assets.zilliz.com/Redis_Cloud_74b8bfef39.png",
48
48
  DB.Chroma: "https://assets.zilliz.com/chroma_ceb3f06ed7.png",
49
+ DB.AWSOpenSearch: "https://assets.zilliz.com/opensearch_1eee37584e.jpeg",
49
50
  }
50
51
 
51
52
  # RedisCloud color: #0D6EFD
@@ -59,4 +60,5 @@ COLOR_MAP = {
59
60
  DB.WeaviateCloud.value: "#20C997",
60
61
  DB.PgVector.value: "#4C779A",
61
62
  DB.Redis.value: "#0D6EFD",
63
+ DB.AWSOpenSearch.value: "#0DCAF0",
62
64
  }
@@ -1,18 +1,14 @@
1
-
2
-
3
-
4
1
  import streamlit as st
5
- from vectordb_bench.backend.cases import CaseType
6
2
  from vectordb_bench.frontend.components.check_results.footer import footer
7
- from vectordb_bench.frontend.components.check_results.expanderStyle import initMainExpanderStyle
8
- from vectordb_bench.frontend.components.check_results.priceTable import priceTable
9
3
  from vectordb_bench.frontend.components.check_results.headerIcon import drawHeaderIcon
10
- from vectordb_bench.frontend.components.check_results.nav import NavToResults, NavToRunTest
11
- from vectordb_bench.frontend.components.check_results.charts import drawMetricChart
4
+ from vectordb_bench.frontend.components.check_results.nav import (
5
+ NavToResults,
6
+ NavToRunTest,
7
+ )
12
8
  from vectordb_bench.frontend.components.check_results.filters import getshownData
13
9
  from vectordb_bench.frontend.components.concurrent.charts import drawChartsByCase
14
10
  from vectordb_bench.frontend.components.get_results.saveAsImage import getResults
15
- from vectordb_bench.frontend.const.styles import *
11
+ from vectordb_bench.frontend.config.styles import FAVICON
16
12
  from vectordb_bench.interface import benchMarkRunner
17
13
  from vectordb_bench.models import TestResult
18
14
 
@@ -30,26 +26,23 @@ def main():
30
26
  drawHeaderIcon(st)
31
27
 
32
28
  allResults = benchMarkRunner.get_results()
33
-
29
+
34
30
  def check_conc_data(res: TestResult):
35
31
  case_results = res.results
36
32
  count = 0
37
33
  for case_result in case_results:
38
34
  if len(case_result.metrics.conc_num_list) > 0:
39
35
  count += 1
40
-
36
+
41
37
  return count > 0
42
-
38
+
43
39
  checkedResults = [res for res in allResults if check_conc_data(res)]
44
-
45
40
 
46
41
  st.title("VectorDB Benchmark (Concurrent Performance)")
47
42
 
48
43
  # results selector
49
44
  resultSelectorContainer = st.sidebar.container()
50
- shownData, _, showCases = getshownData(
51
- checkedResults, resultSelectorContainer)
52
-
45
+ shownData, _, showCaseNames = getshownData(checkedResults, resultSelectorContainer)
53
46
 
54
47
  resultSelectorContainer.divider()
55
48
 
@@ -61,8 +54,8 @@ def main():
61
54
  # save or share
62
55
  resultesContainer = st.sidebar.container()
63
56
  getResults(resultesContainer, "vectordb_bench_concurrent")
64
-
65
- drawChartsByCase(shownData, showCases, st.container())
57
+
58
+ drawChartsByCase(shownData, showCaseNames, st.container())
66
59
 
67
60
  # footer
68
61
  footer(st.container())
@@ -0,0 +1,64 @@
1
+ import streamlit as st
2
+ from vectordb_bench.frontend.components.check_results.headerIcon import drawHeaderIcon
3
+ from vectordb_bench.frontend.components.custom.displayCustomCase import displayCustomCase
4
+ from vectordb_bench.frontend.components.custom.displaypPrams import displayParams
5
+ from vectordb_bench.frontend.components.custom.getCustomConfig import CustomCaseConfig, generate_custom_case, get_custom_configs, save_custom_configs
6
+ from vectordb_bench.frontend.components.custom.initStyle import initStyle
7
+ from vectordb_bench.frontend.config.styles import FAVICON, PAGE_TITLE
8
+
9
+
10
+ class CustomCaseManager():
11
+ customCaseItems: list[CustomCaseConfig]
12
+
13
+ def __init__(self):
14
+ self.customCaseItems = get_custom_configs()
15
+
16
+ def addCase(self):
17
+ new_custom_case = generate_custom_case()
18
+ new_custom_case.dataset_config.name = f"{new_custom_case.dataset_config.name} {len(self.customCaseItems)}"
19
+ self.customCaseItems += [new_custom_case]
20
+ self.save()
21
+
22
+ def deleteCase(self, idx: int):
23
+ self.customCaseItems.pop(idx)
24
+ self.save()
25
+
26
+ def save(self):
27
+ save_custom_configs(self.customCaseItems)
28
+
29
+
30
+ def main():
31
+ st.set_page_config(
32
+ page_title=PAGE_TITLE,
33
+ page_icon=FAVICON,
34
+ # layout="wide",
35
+ # initial_sidebar_state="collapsed",
36
+ )
37
+
38
+ # header
39
+ drawHeaderIcon(st)
40
+
41
+ # init style
42
+ initStyle(st)
43
+
44
+ st.title("Custom Dataset")
45
+ displayParams(st)
46
+ customCaseManager = CustomCaseManager()
47
+
48
+ for idx, customCase in enumerate(customCaseManager.customCaseItems):
49
+ expander = st.expander(customCase.dataset_config.name, expanded=True)
50
+ key = f"custom_case_{idx}"
51
+ displayCustomCase(customCase, expander, key=key)
52
+
53
+ columns = expander.columns(8)
54
+ columns[0].button(
55
+ "Save", key=f"{key}_", type="secondary", on_click=lambda: customCaseManager.save())
56
+ columns[1].button(":red[Delete]", key=f"{key}_delete", type="secondary",
57
+ on_click=lambda: customCaseManager.deleteCase(idx))
58
+
59
+ st.button("\+ New Dataset", key=f"add_custom_configs",
60
+ type="primary", on_click=lambda: customCaseManager.addCase())
61
+
62
+
63
+ if __name__ == "__main__":
64
+ main()
@@ -8,7 +8,7 @@ from vectordb_bench.frontend.components.check_results.nav import NavToResults, N
8
8
  from vectordb_bench.frontend.components.check_results.charts import drawMetricChart
9
9
  from vectordb_bench.frontend.components.check_results.filters import getshownData
10
10
  from vectordb_bench.frontend.components.get_results.saveAsImage import getResults
11
- from vectordb_bench.frontend.const.styles import *
11
+ from vectordb_bench.frontend.config.styles import *
12
12
  from vectordb_bench.interface import benchMarkRunner
13
13
  from vectordb_bench.metric import QURIES_PER_DOLLAR_METRIC
14
14
 
@@ -26,7 +26,7 @@ def main():
26
26
 
27
27
  # results selector
28
28
  resultSelectorContainer = st.sidebar.container()
29
- shownData, _, showCases = getshownData(allResults, resultSelectorContainer)
29
+ shownData, _, showCaseNames = getshownData(allResults, resultSelectorContainer)
30
30
 
31
31
  resultSelectorContainer.divider()
32
32
 
@@ -45,8 +45,8 @@ def main():
45
45
  priceMap = priceTable(priceTableContainer, shownData)
46
46
 
47
47
  # charts
48
- for case in showCases:
49
- data = [data for data in shownData if data["case_name"] == case.name]
48
+ for caseName in showCaseNames:
49
+ data = [data for data in shownData if data["case_name"] == caseName]
50
50
  dataWithMetric = []
51
51
  metric = QURIES_PER_DOLLAR_METRIC
52
52
  for d in data:
@@ -56,7 +56,7 @@ def main():
56
56
  d[metric] = d["qps"] / price * 3.6
57
57
  dataWithMetric.append(d)
58
58
  if len(dataWithMetric) > 0:
59
- chartContainer = st.expander(case.name, True)
59
+ chartContainer = st.expander(caseName, True)
60
60
  drawMetricChart(data, metric, chartContainer)
61
61
 
62
62
  # footer
@@ -5,6 +5,7 @@ from vectordb_bench.frontend.components.run_test.dbConfigSetting import dbConfig
5
5
  from vectordb_bench.frontend.components.run_test.dbSelector import dbSelector
6
6
  from vectordb_bench.frontend.components.run_test.generateTasks import generate_tasks
7
7
  from vectordb_bench.frontend.components.run_test.hideSidebar import hideSidebar
8
+ from vectordb_bench.frontend.components.run_test.initStyle import initStyle
8
9
  from vectordb_bench.frontend.components.run_test.submitTask import submitTask
9
10
  from vectordb_bench.frontend.components.check_results.nav import NavToResults
10
11
  from vectordb_bench.frontend.components.check_results.headerIcon import drawHeaderIcon
@@ -15,6 +16,9 @@ def main():
15
16
  # set page config
16
17
  initRunTestPageConfig(st)
17
18
 
19
+ # init style
20
+ initStyle(st)
21
+
18
22
  # header
19
23
  drawHeaderIcon(st)
20
24
 
@@ -1,7 +1,7 @@
1
1
  import streamlit as st
2
2
  from vectordb_bench.frontend.components.check_results.headerIcon import drawHeaderIcon
3
3
  from vectordb_bench.frontend.components.tables.data import getNewResults
4
- from vectordb_bench.frontend.const.styles import FAVICON
4
+ from vectordb_bench.frontend.config.styles import FAVICON
5
5
 
6
6
 
7
7
  def main():
@@ -21,4 +21,4 @@ def main():
21
21
 
22
22
 
23
23
  if __name__ == "__main__":
24
- main()
24
+ main()
@@ -1,6 +1,22 @@
1
- from vectordb_bench.models import CaseType
1
+ import random
2
+ import string
3
+
2
4
 
3
5
  passwordKeys = ["password", "api_key"]
6
+
7
+
4
8
  def inputIsPassword(key: str) -> bool:
5
9
  return key.lower() in passwordKeys
6
10
 
11
+
12
+ def addHorizontalLine(st):
13
+ st.markdown(
14
+ "<div style='border: 1px solid #cccccc60; margin-bottom: 24px;'></div>",
15
+ unsafe_allow_html=True,
16
+ )
17
+
18
+
19
+ def generate_random_string(length):
20
+ letters = string.ascii_letters + string.digits
21
+ result = ''.join(random.choice(letters) for _ in range(length))
22
+ return result
@@ -6,7 +6,7 @@ from vectordb_bench.frontend.components.check_results.nav import NavToQuriesPerD
6
6
  from vectordb_bench.frontend.components.check_results.charts import drawCharts
7
7
  from vectordb_bench.frontend.components.check_results.filters import getshownData
8
8
  from vectordb_bench.frontend.components.get_results.saveAsImage import getResults
9
- from vectordb_bench.frontend.const.styles import *
9
+ from vectordb_bench.frontend.config.styles import *
10
10
  from vectordb_bench.interface import benchMarkRunner
11
11
 
12
12
 
@@ -24,7 +24,7 @@ def main():
24
24
 
25
25
  # results selector and filter
26
26
  resultSelectorContainer = st.sidebar.container()
27
- shownData, failedTasks, showCases = getshownData(
27
+ shownData, failedTasks, showCaseNames = getshownData(
28
28
  allResults, resultSelectorContainer
29
29
  )
30
30
 
@@ -40,7 +40,7 @@ def main():
40
40
  getResults(resultesContainer, "vectordb_bench")
41
41
 
42
42
  # charts
43
- drawCharts(st, shownData, failedTasks, showCases)
43
+ drawCharts(st, shownData, failedTasks, showCaseNames)
44
44
 
45
45
  # footer
46
46
  footer(st.container())
vectordb_bench/models.py CHANGED
@@ -94,6 +94,10 @@ class CaseConfig(BaseModel):
94
94
  self._k = value
95
95
  '''
96
96
 
97
+ def __hash__(self) -> int:
98
+ return hash(self.json())
99
+
100
+
97
101
  class TaskStage(StrEnum):
98
102
  """Enumerations of various stages of the task"""
99
103
 
@@ -250,18 +254,18 @@ class TestResult(BaseModel):
250
254
 
251
255
  max_db = max(map(len, [f.task_config.db.name for f in filtered_results]))
252
256
  max_db_labels = (
253
- max(map(len, [f.task_config.db_config.db_label for f in filtered_results]))
254
- + 3
257
+ max(map(len, [f.task_config.db_config.db_label for f in filtered_results]))
258
+ + 3
255
259
  )
256
260
  max_case = max(
257
261
  map(len, [f.task_config.case_config.case_id.name for f in filtered_results])
258
262
  )
259
263
  max_load_dur = (
260
- max(map(len, [str(f.metrics.load_duration) for f in filtered_results])) + 3
264
+ max(map(len, [str(f.metrics.load_duration) for f in filtered_results])) + 3
261
265
  )
262
266
  max_qps = max(map(len, [str(f.metrics.qps) for f in filtered_results])) + 3
263
267
  max_recall = (
264
- max(map(len, [str(f.metrics.recall) for f in filtered_results])) + 3
268
+ max(map(len, [str(f.metrics.recall) for f in filtered_results])) + 3
265
269
  )
266
270
 
267
271
  max_db_labels = 8 if max_db_labels < 8 else max_db_labels
@@ -2,7 +2,7 @@ from vectordb_bench import config
2
2
  import ujson
3
3
  import pathlib
4
4
  from vectordb_bench.backend.cases import CaseType
5
- from vectordb_bench.frontend.const.dbPrices import DB_DBLABEL_TO_PRICE
5
+ from vectordb_bench.frontend.config.dbPrices import DB_DBLABEL_TO_PRICE
6
6
  from vectordb_bench.interface import benchMarkRunner
7
7
  from vectordb_bench.models import CaseResult, ResultLabel, TestResult
8
8
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vectordb-bench
3
- Version: 0.0.11
3
+ Version: 0.0.12
4
4
  Summary: VectorDBBench is not just an offering of benchmark results for mainstream vector databases and cloud services, it's your go-to tool for the ultimate performance and cost-effectiveness comparison. Designed with ease-of-use in mind, VectorDBBench is devised to help users, even non-professionals, reproduce results or test new systems, making the hunt for the optimal choice amongst a plethora of cloud services and open-source vector databases a breeze.
5
5
  Author-email: XuanYang-cn <xuan.yang@zilliz.com>
6
6
  Project-URL: repository, https://github.com/zilliztech/VectorDBBench
@@ -39,6 +39,10 @@ Requires-Dist: chromadb ; extra == 'all'
39
39
  Requires-Dist: psycopg2 ; extra == 'all'
40
40
  Requires-Dist: psycopg ; extra == 'all'
41
41
  Requires-Dist: psycopg-binary ; extra == 'all'
42
+ Requires-Dist: opensearch-dsl ==2.1.0 ; extra == 'all'
43
+ Requires-Dist: opensearch-py ==2.6.0 ; extra == 'all'
44
+ Provides-Extra: awsopensearch
45
+ Requires-Dist: awsopensearch ; extra == 'awsopensearch'
42
46
  Provides-Extra: chromadb
43
47
  Requires-Dist: chromadb ; extra == 'chromadb'
44
48
  Provides-Extra: elastic
@@ -91,18 +95,19 @@ pip install vectordb-bench[pinecone]
91
95
  ```
92
96
  All the database client supported
93
97
 
94
- |Optional database client|install command|
95
- |---------------|---------------|
96
- |pymilvus(*default*)|`pip install vectordb-bench`|
97
- |all|`pip install vectordb-bench[all]`|
98
- |qdrant|`pip install vectordb-bench[qdrant]`|
99
- |pinecone|`pip install vectordb-bench[pinecone]`|
100
- |weaviate|`pip install vectordb-bench[weaviate]`|
101
- |elastic|`pip install vectordb-bench[elastic]`|
102
- |pgvector|`pip install vectordb-bench[pgvector]`|
103
- |pgvecto.rs|`pip install vectordb-bench[pgvecto_rs]`|
104
- |redis|`pip install vectordb-bench[redis]`|
105
- |chromadb|`pip install vectordb-bench[chromadb]`|
98
+ | Optional database client | install command |
99
+ |--------------------------|---------------------------------------------|
100
+ | pymilvus(*default*) | `pip install vectordb-bench` |
101
+ | all | `pip install vectordb-bench[all]` |
102
+ | qdrant | `pip install vectordb-bench[qdrant]` |
103
+ | pinecone | `pip install vectordb-bench[pinecone]` |
104
+ | weaviate | `pip install vectordb-bench[weaviate]` |
105
+ | elastic | `pip install vectordb-bench[elastic]` |
106
+ | pgvector | `pip install vectordb-bench[pgvector]` |
107
+ | pgvecto.rs | `pip install vectordb-bench[pgvecto_rs]` |
108
+ | redis | `pip install vectordb-bench[redis]` |
109
+ | chromadb | `pip install vectordb-bench[chromadb]` |
110
+ | awsopensearch | `pip install vectordb-bench[awsopensearch]` |
106
111
 
107
112
  ### Run
108
113
 
@@ -345,6 +350,24 @@ Case No. | Case Type | Dataset Size | Filtering Rate | Results |
345
350
 
346
351
  Each case provides an in-depth examination of a vector database's abilities, providing you a comprehensive view of the database's performance.
347
352
 
353
+ #### Custom Dataset for Performance case
354
+
355
+ Through the `/custom` page, users can customize their own performance case using local datasets. After saving, the corresponding case can be selected from the `/run_test` page to perform the test.
356
+
357
+ ![image](fig/custom_dataset.png)
358
+ ![image](fig/custom_case_run_test.png)
359
+
360
+ We have strict requirements for the data set format, please follow them.
361
+ - `Folder Path` - The path to the folder containing all the files. Please ensure that all files in the folder are in the `Parquet` format.
362
+ - Vectors data files: The file must be named `train.parquet` and should have two columns: `id` as an incrementing `int` and `emb` as an array of `float32`.
363
+ - Query test vectors: The file must be named `test.parquet` and should have two columns: `id` as an incrementing `int` and `emb` as an array of `float32`.
364
+ - Ground truth file: The file must be named `neighbors.parquet` and should have two columns: `id` corresponding to query vectors and `neighbors_id` as an array of `int`.
365
+
366
+ - `Train File Count` - If the vector file is too large, you can consider splitting it into multiple files. The naming format for the split files should be `train-[index]-of-[file_count].parquet`. For example, `train-01-of-10.parquet` represents the second file (0-indexed) among 10 split files.
367
+
368
+ - `Use Shuffled Data` - If you check this option, the vector data files need to be modified. VectorDBBench will load the data labeled with `shuffle`. For example, use `shuffle_train.parquet` instead of `train.parquet` and `shuffle_train-04-of-10.parquet` instead of `train-04-of-10.parquet`. The `id` column in the shuffled data can be in any order.
369
+
370
+
348
371
  ## Goals
349
372
  Our goals of this benchmark are:
350
373
  ### Reproducibility & Usability