vectordb-bench 0.0.18__py3-none-any.whl → 0.0.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. vectordb_bench/__init__.py +49 -24
  2. vectordb_bench/__main__.py +4 -3
  3. vectordb_bench/backend/assembler.py +12 -13
  4. vectordb_bench/backend/cases.py +56 -46
  5. vectordb_bench/backend/clients/__init__.py +101 -14
  6. vectordb_bench/backend/clients/aliyun_elasticsearch/aliyun_elasticsearch.py +26 -0
  7. vectordb_bench/backend/clients/aliyun_elasticsearch/config.py +18 -0
  8. vectordb_bench/backend/clients/aliyun_opensearch/aliyun_opensearch.py +345 -0
  9. vectordb_bench/backend/clients/aliyun_opensearch/config.py +47 -0
  10. vectordb_bench/backend/clients/alloydb/alloydb.py +58 -80
  11. vectordb_bench/backend/clients/alloydb/cli.py +52 -35
  12. vectordb_bench/backend/clients/alloydb/config.py +30 -30
  13. vectordb_bench/backend/clients/api.py +8 -9
  14. vectordb_bench/backend/clients/aws_opensearch/aws_opensearch.py +46 -47
  15. vectordb_bench/backend/clients/aws_opensearch/cli.py +4 -7
  16. vectordb_bench/backend/clients/aws_opensearch/config.py +13 -9
  17. vectordb_bench/backend/clients/aws_opensearch/run.py +69 -59
  18. vectordb_bench/backend/clients/chroma/chroma.py +38 -36
  19. vectordb_bench/backend/clients/chroma/config.py +4 -2
  20. vectordb_bench/backend/clients/elastic_cloud/config.py +5 -5
  21. vectordb_bench/backend/clients/elastic_cloud/elastic_cloud.py +23 -22
  22. vectordb_bench/backend/clients/memorydb/cli.py +8 -8
  23. vectordb_bench/backend/clients/memorydb/config.py +2 -2
  24. vectordb_bench/backend/clients/memorydb/memorydb.py +65 -53
  25. vectordb_bench/backend/clients/milvus/cli.py +62 -80
  26. vectordb_bench/backend/clients/milvus/config.py +31 -7
  27. vectordb_bench/backend/clients/milvus/milvus.py +23 -26
  28. vectordb_bench/backend/clients/pgdiskann/cli.py +29 -22
  29. vectordb_bench/backend/clients/pgdiskann/config.py +29 -26
  30. vectordb_bench/backend/clients/pgdiskann/pgdiskann.py +55 -73
  31. vectordb_bench/backend/clients/pgvecto_rs/cli.py +9 -11
  32. vectordb_bench/backend/clients/pgvecto_rs/config.py +8 -14
  33. vectordb_bench/backend/clients/pgvecto_rs/pgvecto_rs.py +33 -34
  34. vectordb_bench/backend/clients/pgvector/cli.py +40 -31
  35. vectordb_bench/backend/clients/pgvector/config.py +63 -73
  36. vectordb_bench/backend/clients/pgvector/pgvector.py +97 -98
  37. vectordb_bench/backend/clients/pgvectorscale/cli.py +38 -24
  38. vectordb_bench/backend/clients/pgvectorscale/config.py +14 -15
  39. vectordb_bench/backend/clients/pgvectorscale/pgvectorscale.py +38 -43
  40. vectordb_bench/backend/clients/pinecone/config.py +1 -0
  41. vectordb_bench/backend/clients/pinecone/pinecone.py +14 -21
  42. vectordb_bench/backend/clients/qdrant_cloud/config.py +11 -10
  43. vectordb_bench/backend/clients/qdrant_cloud/qdrant_cloud.py +40 -31
  44. vectordb_bench/backend/clients/redis/cli.py +6 -12
  45. vectordb_bench/backend/clients/redis/config.py +7 -5
  46. vectordb_bench/backend/clients/redis/redis.py +94 -58
  47. vectordb_bench/backend/clients/test/cli.py +1 -2
  48. vectordb_bench/backend/clients/test/config.py +2 -2
  49. vectordb_bench/backend/clients/test/test.py +4 -5
  50. vectordb_bench/backend/clients/weaviate_cloud/cli.py +3 -4
  51. vectordb_bench/backend/clients/weaviate_cloud/config.py +2 -2
  52. vectordb_bench/backend/clients/weaviate_cloud/weaviate_cloud.py +36 -22
  53. vectordb_bench/backend/clients/zilliz_cloud/cli.py +14 -11
  54. vectordb_bench/backend/clients/zilliz_cloud/config.py +2 -4
  55. vectordb_bench/backend/clients/zilliz_cloud/zilliz_cloud.py +1 -1
  56. vectordb_bench/backend/data_source.py +30 -18
  57. vectordb_bench/backend/dataset.py +47 -27
  58. vectordb_bench/backend/result_collector.py +2 -3
  59. vectordb_bench/backend/runner/__init__.py +4 -6
  60. vectordb_bench/backend/runner/mp_runner.py +85 -34
  61. vectordb_bench/backend/runner/rate_runner.py +51 -23
  62. vectordb_bench/backend/runner/read_write_runner.py +140 -46
  63. vectordb_bench/backend/runner/serial_runner.py +99 -50
  64. vectordb_bench/backend/runner/util.py +4 -19
  65. vectordb_bench/backend/task_runner.py +95 -74
  66. vectordb_bench/backend/utils.py +17 -9
  67. vectordb_bench/base.py +0 -1
  68. vectordb_bench/cli/cli.py +65 -60
  69. vectordb_bench/cli/vectordbbench.py +6 -7
  70. vectordb_bench/frontend/components/check_results/charts.py +8 -19
  71. vectordb_bench/frontend/components/check_results/data.py +4 -16
  72. vectordb_bench/frontend/components/check_results/filters.py +8 -16
  73. vectordb_bench/frontend/components/check_results/nav.py +4 -4
  74. vectordb_bench/frontend/components/check_results/priceTable.py +1 -3
  75. vectordb_bench/frontend/components/check_results/stPageConfig.py +2 -1
  76. vectordb_bench/frontend/components/concurrent/charts.py +12 -12
  77. vectordb_bench/frontend/components/custom/displayCustomCase.py +17 -11
  78. vectordb_bench/frontend/components/custom/displaypPrams.py +4 -2
  79. vectordb_bench/frontend/components/custom/getCustomConfig.py +1 -2
  80. vectordb_bench/frontend/components/custom/initStyle.py +1 -1
  81. vectordb_bench/frontend/components/get_results/saveAsImage.py +2 -0
  82. vectordb_bench/frontend/components/run_test/caseSelector.py +3 -9
  83. vectordb_bench/frontend/components/run_test/dbConfigSetting.py +1 -4
  84. vectordb_bench/frontend/components/run_test/dbSelector.py +1 -1
  85. vectordb_bench/frontend/components/run_test/generateTasks.py +8 -8
  86. vectordb_bench/frontend/components/run_test/submitTask.py +14 -18
  87. vectordb_bench/frontend/components/tables/data.py +3 -6
  88. vectordb_bench/frontend/config/dbCaseConfigs.py +108 -83
  89. vectordb_bench/frontend/pages/concurrent.py +3 -5
  90. vectordb_bench/frontend/pages/custom.py +30 -9
  91. vectordb_bench/frontend/pages/quries_per_dollar.py +3 -3
  92. vectordb_bench/frontend/pages/run_test.py +3 -7
  93. vectordb_bench/frontend/utils.py +1 -1
  94. vectordb_bench/frontend/vdb_benchmark.py +4 -6
  95. vectordb_bench/interface.py +56 -26
  96. vectordb_bench/log_util.py +59 -64
  97. vectordb_bench/metric.py +10 -11
  98. vectordb_bench/models.py +26 -43
  99. {vectordb_bench-0.0.18.dist-info → vectordb_bench-0.0.20.dist-info}/METADATA +34 -42
  100. vectordb_bench-0.0.20.dist-info/RECORD +135 -0
  101. {vectordb_bench-0.0.18.dist-info → vectordb_bench-0.0.20.dist-info}/WHEEL +1 -1
  102. vectordb_bench-0.0.18.dist-info/RECORD +0 -131
  103. {vectordb_bench-0.0.18.dist-info → vectordb_bench-0.0.20.dist-info}/LICENSE +0 -0
  104. {vectordb_bench-0.0.18.dist-info → vectordb_bench-0.0.20.dist-info}/entry_points.txt +0 -0
  105. {vectordb_bench-0.0.18.dist-info → vectordb_bench-0.0.20.dist-info}/top_level.txt +0 -0
@@ -22,46 +22,71 @@ class config:
22
22
  DROP_OLD = env.bool("DROP_OLD", True)
23
23
  USE_SHUFFLED_DATA = env.bool("USE_SHUFFLED_DATA", True)
24
24
 
25
- NUM_CONCURRENCY = env.list("NUM_CONCURRENCY", [1, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 100], subcast=int )
25
+ NUM_CONCURRENCY = env.list(
26
+ "NUM_CONCURRENCY",
27
+ [
28
+ 1,
29
+ 5,
30
+ 10,
31
+ 15,
32
+ 20,
33
+ 25,
34
+ 30,
35
+ 35,
36
+ 40,
37
+ 45,
38
+ 50,
39
+ 55,
40
+ 60,
41
+ 65,
42
+ 70,
43
+ 75,
44
+ 80,
45
+ 85,
46
+ 90,
47
+ 95,
48
+ 100,
49
+ ],
50
+ subcast=int,
51
+ )
26
52
 
27
53
  CONCURRENCY_DURATION = 30
28
54
 
29
55
  RESULTS_LOCAL_DIR = env.path(
30
- "RESULTS_LOCAL_DIR", pathlib.Path(__file__).parent.joinpath("results")
56
+ "RESULTS_LOCAL_DIR",
57
+ pathlib.Path(__file__).parent.joinpath("results"),
31
58
  )
32
59
  CONFIG_LOCAL_DIR = env.path(
33
- "CONFIG_LOCAL_DIR", pathlib.Path(__file__).parent.joinpath("config-files")
60
+ "CONFIG_LOCAL_DIR",
61
+ pathlib.Path(__file__).parent.joinpath("config-files"),
34
62
  )
35
63
 
36
-
37
64
  K_DEFAULT = 100 # default return top k nearest neighbors during search
38
65
  CUSTOM_CONFIG_DIR = pathlib.Path(__file__).parent.joinpath("custom/custom_case.json")
39
66
 
40
- CAPACITY_TIMEOUT_IN_SECONDS = 24 * 3600 # 24h
41
- LOAD_TIMEOUT_DEFAULT = 24 * 3600 # 24h
42
- LOAD_TIMEOUT_768D_1M = 24 * 3600 # 24h
43
- LOAD_TIMEOUT_768D_10M = 240 * 3600 # 10d
44
- LOAD_TIMEOUT_768D_100M = 2400 * 3600 # 100d
67
+ CAPACITY_TIMEOUT_IN_SECONDS = 24 * 3600 # 24h
68
+ LOAD_TIMEOUT_DEFAULT = 24 * 3600 # 24h
69
+ LOAD_TIMEOUT_768D_1M = 24 * 3600 # 24h
70
+ LOAD_TIMEOUT_768D_10M = 240 * 3600 # 10d
71
+ LOAD_TIMEOUT_768D_100M = 2400 * 3600 # 100d
45
72
 
46
- LOAD_TIMEOUT_1536D_500K = 24 * 3600 # 24h
47
- LOAD_TIMEOUT_1536D_5M = 240 * 3600 # 10d
73
+ LOAD_TIMEOUT_1536D_500K = 24 * 3600 # 24h
74
+ LOAD_TIMEOUT_1536D_5M = 240 * 3600 # 10d
48
75
 
49
- OPTIMIZE_TIMEOUT_DEFAULT = 24 * 3600 # 24h
50
- OPTIMIZE_TIMEOUT_768D_1M = 24 * 3600 # 24h
51
- OPTIMIZE_TIMEOUT_768D_10M = 240 * 3600 # 10d
52
- OPTIMIZE_TIMEOUT_768D_100M = 2400 * 3600 # 100d
76
+ OPTIMIZE_TIMEOUT_DEFAULT = 24 * 3600 # 24h
77
+ OPTIMIZE_TIMEOUT_768D_1M = 24 * 3600 # 24h
78
+ OPTIMIZE_TIMEOUT_768D_10M = 240 * 3600 # 10d
79
+ OPTIMIZE_TIMEOUT_768D_100M = 2400 * 3600 # 100d
53
80
 
81
+ OPTIMIZE_TIMEOUT_1536D_500K = 24 * 3600 # 24h
82
+ OPTIMIZE_TIMEOUT_1536D_5M = 240 * 3600 # 10d
54
83
 
55
- OPTIMIZE_TIMEOUT_1536D_500K = 24 * 3600 # 24h
56
- OPTIMIZE_TIMEOUT_1536D_5M = 240 * 3600 # 10d
57
-
58
84
  def display(self) -> str:
59
- tmp = [
60
- i for i in inspect.getmembers(self)
61
- if not inspect.ismethod(i[1])
62
- and not i[0].startswith('_')
63
- and "TIMEOUT" not in i[0]
85
+ return [
86
+ i
87
+ for i in inspect.getmembers(self)
88
+ if not inspect.ismethod(i[1]) and not i[0].startswith("_") and "TIMEOUT" not in i[0]
64
89
  ]
65
- return tmp
90
+
66
91
 
67
92
  log_util.init(config.LOG_LEVEL)
@@ -1,7 +1,8 @@
1
- import traceback
2
1
  import logging
2
+ import pathlib
3
3
  import subprocess
4
- import os
4
+ import traceback
5
+
5
6
  from . import config
6
7
 
7
8
  log = logging.getLogger("vectordb_bench")
@@ -16,7 +17,7 @@ def run_streamlit():
16
17
  cmd = [
17
18
  "streamlit",
18
19
  "run",
19
- f"{os.path.dirname(__file__)}/frontend/vdb_benchmark.py",
20
+ f"{pathlib.Path(__file__).parent}/frontend/vdb_benchmark.py",
20
21
  "--logger.level",
21
22
  "info",
22
23
  "--theme.base",
@@ -1,24 +1,25 @@
1
- from .cases import CaseLabel
2
- from .task_runner import CaseRunner, RunningStatus, TaskRunner
3
- from ..models import TaskConfig
4
- from ..backend.clients import EmptyDBCaseConfig
5
- from ..backend.data_source import DatasetSource
6
1
  import logging
7
2
 
3
+ from vectordb_bench.backend.clients import EmptyDBCaseConfig
4
+ from vectordb_bench.backend.data_source import DatasetSource
5
+ from vectordb_bench.models import TaskConfig
6
+
7
+ from .cases import CaseLabel
8
+ from .task_runner import CaseRunner, RunningStatus, TaskRunner
8
9
 
9
10
  log = logging.getLogger(__name__)
10
11
 
11
12
 
12
13
  class Assembler:
13
14
  @classmethod
14
- def assemble(cls, run_id , task: TaskConfig, source: DatasetSource) -> CaseRunner:
15
+ def assemble(cls, run_id: str, task: TaskConfig, source: DatasetSource) -> CaseRunner:
15
16
  c_cls = task.case_config.case_id.case_cls
16
17
 
17
18
  c = c_cls(task.case_config.custom_case)
18
- if type(task.db_case_config) != EmptyDBCaseConfig:
19
+ if type(task.db_case_config) is not EmptyDBCaseConfig:
19
20
  task.db_case_config.metric_type = c.dataset.data.metric_type
20
21
 
21
- runner = CaseRunner(
22
+ return CaseRunner(
22
23
  run_id=run_id,
23
24
  config=task,
24
25
  ca=c,
@@ -26,8 +27,6 @@ class Assembler:
26
27
  dataset_source=source,
27
28
  )
28
29
 
29
- return runner
30
-
31
30
  @classmethod
32
31
  def assemble_all(
33
32
  cls,
@@ -50,12 +49,12 @@ class Assembler:
50
49
  db2runner[db].append(r)
51
50
 
52
51
  # check dbclient installed
53
- for k in db2runner.keys():
52
+ for k in db2runner:
54
53
  _ = k.init_cls
55
54
 
56
55
  # sort by dataset size
57
- for k in db2runner.keys():
58
- db2runner[k].sort(key=lambda x:x.ca.dataset.data.size)
56
+ for k, _ in db2runner:
57
+ db2runner[k].sort(key=lambda x: x.ca.dataset.data.size)
59
58
 
60
59
  all_runners = []
61
60
  all_runners.extend(load_runners)
@@ -1,7 +1,5 @@
1
- import typing
2
1
  import logging
3
2
  from enum import Enum, auto
4
- from typing import Type
5
3
 
6
4
  from vectordb_bench import config
7
5
  from vectordb_bench.backend.clients.api import MetricType
@@ -12,7 +10,6 @@ from vectordb_bench.frontend.components.custom.getCustomConfig import (
12
10
 
13
11
  from .dataset import CustomDataset, Dataset, DatasetManager
14
12
 
15
-
16
13
  log = logging.getLogger(__name__)
17
14
 
18
15
 
@@ -50,11 +47,10 @@ class CaseType(Enum):
50
47
  Custom = 100
51
48
  PerformanceCustomDataset = 101
52
49
 
53
- def case_cls(self, custom_configs: dict | None = None) -> Type["Case"]:
50
+ def case_cls(self, custom_configs: dict | None = None) -> type["Case"]:
54
51
  if custom_configs is None:
55
52
  return type2case.get(self)()
56
- else:
57
- return type2case.get(self)(**custom_configs)
53
+ return type2case.get(self)(**custom_configs)
58
54
 
59
55
  def case_name(self, custom_configs: dict | None = None) -> str:
60
56
  c = self.case_cls(custom_configs)
@@ -99,10 +95,10 @@ class Case(BaseModel):
99
95
  @property
100
96
  def filters(self) -> dict | None:
101
97
  if self.filter_rate is not None:
102
- ID = round(self.filter_rate * self.dataset.data.size)
98
+ target_id = round(self.filter_rate * self.dataset.data.size)
103
99
  return {
104
- "metadata": f">={ID}",
105
- "id": ID,
100
+ "metadata": f">={target_id}",
101
+ "id": target_id,
106
102
  }
107
103
 
108
104
  return None
@@ -126,8 +122,8 @@ class CapacityDim960(CapacityCase):
126
122
  case_id: CaseType = CaseType.CapacityDim960
127
123
  dataset: DatasetManager = Dataset.GIST.manager(100_000)
128
124
  name: str = "Capacity Test (960 Dim Repeated)"
129
- description: str = """This case tests the vector database's loading capacity by repeatedly inserting large-dimension
130
- vectors (GIST 100K vectors, <b>960 dimensions</b>) until it is fully loaded. Number of inserted vectors will be
125
+ description: str = """This case tests the vector database's loading capacity by repeatedly inserting large-dimension
126
+ vectors (GIST 100K vectors, <b>960 dimensions</b>) until it is fully loaded. Number of inserted vectors will be
131
127
  reported."""
132
128
 
133
129
 
@@ -136,7 +132,7 @@ class CapacityDim128(CapacityCase):
136
132
  dataset: DatasetManager = Dataset.SIFT.manager(500_000)
137
133
  name: str = "Capacity Test (128 Dim Repeated)"
138
134
  description: str = """This case tests the vector database's loading capacity by repeatedly inserting small-dimension
139
- vectors (SIFT 100K vectors, <b>128 dimensions</b>) until it is fully loaded. Number of inserted vectors will be
135
+ vectors (SIFT 100K vectors, <b>128 dimensions</b>) until it is fully loaded. Number of inserted vectors will be
140
136
  reported."""
141
137
 
142
138
 
@@ -144,8 +140,9 @@ class Performance768D10M(PerformanceCase):
144
140
  case_id: CaseType = CaseType.Performance768D10M
145
141
  dataset: DatasetManager = Dataset.COHERE.manager(10_000_000)
146
142
  name: str = "Search Performance Test (10M Dataset, 768 Dim)"
147
- description: str = """This case tests the search performance of a vector database with a large dataset (<b>Cohere 10M vectors</b>, 768 dimensions) at varying parallel levels.
148
- Results will show index building time, recall, and maximum QPS."""
143
+ description: str = """This case tests the search performance of a vector database with a large dataset
144
+ (<b>Cohere 10M vectors</b>, 768 dimensions) at varying parallel levels.
145
+ Results will show index building time, recall, and maximum QPS."""
149
146
  load_timeout: float | int = config.LOAD_TIMEOUT_768D_10M
150
147
  optimize_timeout: float | int | None = config.OPTIMIZE_TIMEOUT_768D_10M
151
148
 
@@ -154,8 +151,9 @@ class Performance768D1M(PerformanceCase):
154
151
  case_id: CaseType = CaseType.Performance768D1M
155
152
  dataset: DatasetManager = Dataset.COHERE.manager(1_000_000)
156
153
  name: str = "Search Performance Test (1M Dataset, 768 Dim)"
157
- description: str = """This case tests the search performance of a vector database with a medium dataset (<b>Cohere 1M vectors</b>, 768 dimensions) at varying parallel levels.
158
- Results will show index building time, recall, and maximum QPS."""
154
+ description: str = """This case tests the search performance of a vector database with a medium dataset
155
+ (<b>Cohere 1M vectors</b>, 768 dimensions) at varying parallel levels.
156
+ Results will show index building time, recall, and maximum QPS."""
159
157
  load_timeout: float | int = config.LOAD_TIMEOUT_768D_1M
160
158
  optimize_timeout: float | int | None = config.OPTIMIZE_TIMEOUT_768D_1M
161
159
 
@@ -165,8 +163,9 @@ class Performance768D10M1P(PerformanceCase):
165
163
  filter_rate: float | int | None = 0.01
166
164
  dataset: DatasetManager = Dataset.COHERE.manager(10_000_000)
167
165
  name: str = "Filtering Search Performance Test (10M Dataset, 768 Dim, Filter 1%)"
168
- description: str = """This case tests the search performance of a vector database with a large dataset (<b>Cohere 10M vectors</b>, 768 dimensions) under a low filtering rate (<b>1% vectors</b>), at varying parallel levels.
169
- Results will show index building time, recall, and maximum QPS."""
166
+ description: str = """This case tests the search performance of a vector database with a large dataset
167
+ (<b>Cohere 10M vectors</b>, 768 dimensions) under a low filtering rate (<b>1% vectors</b>), at varying parallel
168
+ levels. Results will show index building time, recall, and maximum QPS."""
170
169
  load_timeout: float | int = config.LOAD_TIMEOUT_768D_10M
171
170
  optimize_timeout: float | int | None = config.OPTIMIZE_TIMEOUT_768D_10M
172
171
 
@@ -176,8 +175,9 @@ class Performance768D1M1P(PerformanceCase):
176
175
  filter_rate: float | int | None = 0.01
177
176
  dataset: DatasetManager = Dataset.COHERE.manager(1_000_000)
178
177
  name: str = "Filtering Search Performance Test (1M Dataset, 768 Dim, Filter 1%)"
179
- description: str = """This case tests the search performance of a vector database with a medium dataset (<b>Cohere 1M vectors</b>, 768 dimensions) under a low filtering rate (<b>1% vectors</b>), at varying parallel levels.
180
- Results will show index building time, recall, and maximum QPS."""
178
+ description: str = """This case tests the search performance of a vector database with a medium dataset
179
+ (<b>Cohere 1M vectors</b>, 768 dimensions) under a low filtering rate (<b>1% vectors</b>),
180
+ at varying parallel levels. Results will show index building time, recall, and maximum QPS."""
181
181
  load_timeout: float | int = config.LOAD_TIMEOUT_768D_1M
182
182
  optimize_timeout: float | int | None = config.OPTIMIZE_TIMEOUT_768D_1M
183
183
 
@@ -187,8 +187,9 @@ class Performance768D10M99P(PerformanceCase):
187
187
  filter_rate: float | int | None = 0.99
188
188
  dataset: DatasetManager = Dataset.COHERE.manager(10_000_000)
189
189
  name: str = "Filtering Search Performance Test (10M Dataset, 768 Dim, Filter 99%)"
190
- description: str = """This case tests the search performance of a vector database with a large dataset (<b>Cohere 10M vectors</b>, 768 dimensions) under a high filtering rate (<b>99% vectors</b>), at varying parallel levels.
191
- Results will show index building time, recall, and maximum QPS."""
190
+ description: str = """This case tests the search performance of a vector database with a large dataset
191
+ (<b>Cohere 10M vectors</b>, 768 dimensions) under a high filtering rate (<b>99% vectors</b>),
192
+ at varying parallel levels. Results will show index building time, recall, and maximum QPS."""
192
193
  load_timeout: float | int = config.LOAD_TIMEOUT_768D_10M
193
194
  optimize_timeout: float | int | None = config.OPTIMIZE_TIMEOUT_768D_10M
194
195
 
@@ -198,8 +199,9 @@ class Performance768D1M99P(PerformanceCase):
198
199
  filter_rate: float | int | None = 0.99
199
200
  dataset: DatasetManager = Dataset.COHERE.manager(1_000_000)
200
201
  name: str = "Filtering Search Performance Test (1M Dataset, 768 Dim, Filter 99%)"
201
- description: str = """This case tests the search performance of a vector database with a medium dataset (<b>Cohere 1M vectors</b>, 768 dimensions) under a high filtering rate (<b>99% vectors</b>), at varying parallel levels.
202
- Results will show index building time, recall, and maximum QPS."""
202
+ description: str = """This case tests the search performance of a vector database with a medium dataset
203
+ (<b>Cohere 1M vectors</b>, 768 dimensions) under a high filtering rate (<b>99% vectors</b>),
204
+ at varying parallel levels. Results will show index building time, recall, and maximum QPS."""
203
205
  load_timeout: float | int = config.LOAD_TIMEOUT_768D_1M
204
206
  optimize_timeout: float | int | None = config.OPTIMIZE_TIMEOUT_768D_1M
205
207
 
@@ -209,8 +211,9 @@ class Performance768D100M(PerformanceCase):
209
211
  filter_rate: float | int | None = None
210
212
  dataset: DatasetManager = Dataset.LAION.manager(100_000_000)
211
213
  name: str = "Search Performance Test (100M Dataset, 768 Dim)"
212
- description: str = """This case tests the search performance of a vector database with a large 100M dataset (<b>LAION 100M vectors</b>, 768 dimensions), at varying parallel levels.
213
- Results will show index building time, recall, and maximum QPS."""
214
+ description: str = """This case tests the search performance of a vector database with a large 100M dataset
215
+ (<b>LAION 100M vectors</b>, 768 dimensions), at varying parallel levels. Results will show index building time,
216
+ recall, and maximum QPS."""
214
217
  load_timeout: float | int = config.LOAD_TIMEOUT_768D_100M
215
218
  optimize_timeout: float | int | None = config.OPTIMIZE_TIMEOUT_768D_100M
216
219
 
@@ -220,8 +223,9 @@ class Performance1536D500K(PerformanceCase):
220
223
  filter_rate: float | int | None = None
221
224
  dataset: DatasetManager = Dataset.OPENAI.manager(500_000)
222
225
  name: str = "Search Performance Test (500K Dataset, 1536 Dim)"
223
- description: str = """This case tests the search performance of a vector database with a medium 500K dataset (<b>OpenAI 500K vectors</b>, 1536 dimensions), at varying parallel levels.
224
- Results will show index building time, recall, and maximum QPS."""
226
+ description: str = """This case tests the search performance of a vector database with a medium 500K dataset
227
+ (<b>OpenAI 500K vectors</b>, 1536 dimensions), at varying parallel levels. Results will show index building time,
228
+ recall, and maximum QPS."""
225
229
  load_timeout: float | int = config.LOAD_TIMEOUT_1536D_500K
226
230
  optimize_timeout: float | int | None = config.OPTIMIZE_TIMEOUT_1536D_500K
227
231
 
@@ -231,8 +235,9 @@ class Performance1536D5M(PerformanceCase):
231
235
  filter_rate: float | int | None = None
232
236
  dataset: DatasetManager = Dataset.OPENAI.manager(5_000_000)
233
237
  name: str = "Search Performance Test (5M Dataset, 1536 Dim)"
234
- description: str = """This case tests the search performance of a vector database with a medium 5M dataset (<b>OpenAI 5M vectors</b>, 1536 dimensions), at varying parallel levels.
235
- Results will show index building time, recall, and maximum QPS."""
238
+ description: str = """This case tests the search performance of a vector database with a medium 5M dataset
239
+ (<b>OpenAI 5M vectors</b>, 1536 dimensions), at varying parallel levels. Results will show index building time,
240
+ recall, and maximum QPS."""
236
241
  load_timeout: float | int = config.LOAD_TIMEOUT_1536D_5M
237
242
  optimize_timeout: float | int | None = config.OPTIMIZE_TIMEOUT_1536D_5M
238
243
 
@@ -242,8 +247,9 @@ class Performance1536D500K1P(PerformanceCase):
242
247
  filter_rate: float | int | None = 0.01
243
248
  dataset: DatasetManager = Dataset.OPENAI.manager(500_000)
244
249
  name: str = "Filtering Search Performance Test (500K Dataset, 1536 Dim, Filter 1%)"
245
- description: str = """This case tests the search performance of a vector database with a large dataset (<b>OpenAI 500K vectors</b>, 1536 dimensions) under a low filtering rate (<b>1% vectors</b>), at varying parallel levels.
246
- Results will show index building time, recall, and maximum QPS."""
250
+ description: str = """This case tests the search performance of a vector database with a large dataset
251
+ (<b>OpenAI 500K vectors</b>, 1536 dimensions) under a low filtering rate (<b>1% vectors</b>),
252
+ at varying parallel levels. Results will show index building time, recall, and maximum QPS."""
247
253
  load_timeout: float | int = config.LOAD_TIMEOUT_1536D_500K
248
254
  optimize_timeout: float | int | None = config.OPTIMIZE_TIMEOUT_1536D_500K
249
255
 
@@ -253,8 +259,9 @@ class Performance1536D5M1P(PerformanceCase):
253
259
  filter_rate: float | int | None = 0.01
254
260
  dataset: DatasetManager = Dataset.OPENAI.manager(5_000_000)
255
261
  name: str = "Filtering Search Performance Test (5M Dataset, 1536 Dim, Filter 1%)"
256
- description: str = """This case tests the search performance of a vector database with a large dataset (<b>OpenAI 5M vectors</b>, 1536 dimensions) under a low filtering rate (<b>1% vectors</b>), at varying parallel levels.
257
- Results will show index building time, recall, and maximum QPS."""
262
+ description: str = """This case tests the search performance of a vector database with a large dataset
263
+ (<b>OpenAI 5M vectors</b>, 1536 dimensions) under a low filtering rate (<b>1% vectors</b>),
264
+ at varying parallel levels. Results will show index building time, recall, and maximum QPS."""
258
265
  load_timeout: float | int = config.LOAD_TIMEOUT_1536D_5M
259
266
  optimize_timeout: float | int | None = config.OPTIMIZE_TIMEOUT_1536D_5M
260
267
 
@@ -264,8 +271,9 @@ class Performance1536D500K99P(PerformanceCase):
264
271
  filter_rate: float | int | None = 0.99
265
272
  dataset: DatasetManager = Dataset.OPENAI.manager(500_000)
266
273
  name: str = "Filtering Search Performance Test (500K Dataset, 1536 Dim, Filter 99%)"
267
- description: str = """This case tests the search performance of a vector database with a medium dataset (<b>OpenAI 500K vectors</b>, 1536 dimensions) under a high filtering rate (<b>99% vectors</b>), at varying parallel levels.
268
- Results will show index building time, recall, and maximum QPS."""
274
+ description: str = """This case tests the search performance of a vector database with a medium dataset
275
+ (<b>OpenAI 500K vectors</b>, 1536 dimensions) under a high filtering rate (<b>99% vectors</b>),
276
+ at varying parallel levels. Results will show index building time, recall, and maximum QPS."""
269
277
  load_timeout: float | int = config.LOAD_TIMEOUT_1536D_500K
270
278
  optimize_timeout: float | int | None = config.OPTIMIZE_TIMEOUT_1536D_500K
271
279
 
@@ -275,8 +283,9 @@ class Performance1536D5M99P(PerformanceCase):
275
283
  filter_rate: float | int | None = 0.99
276
284
  dataset: DatasetManager = Dataset.OPENAI.manager(5_000_000)
277
285
  name: str = "Filtering Search Performance Test (5M Dataset, 1536 Dim, Filter 99%)"
278
- description: str = """This case tests the search performance of a vector database with a medium dataset (<b>OpenAI 5M vectors</b>, 1536 dimensions) under a high filtering rate (<b>99% vectors</b>), at varying parallel levels.
279
- Results will show index building time, recall, and maximum QPS."""
286
+ description: str = """This case tests the search performance of a vector database with a medium dataset
287
+ (<b>OpenAI 5M vectors</b>, 1536 dimensions) under a high filtering rate (<b>99% vectors</b>),
288
+ at varying parallel levels. Results will show index building time, recall, and maximum QPS."""
280
289
  load_timeout: float | int = config.LOAD_TIMEOUT_1536D_5M
281
290
  optimize_timeout: float | int | None = config.OPTIMIZE_TIMEOUT_1536D_5M
282
291
 
@@ -286,10 +295,11 @@ class Performance1536D50K(PerformanceCase):
286
295
  filter_rate: float | int | None = None
287
296
  dataset: DatasetManager = Dataset.OPENAI.manager(50_000)
288
297
  name: str = "Search Performance Test (50K Dataset, 1536 Dim)"
289
- description: str = """This case tests the search performance of a vector database with a medium 50K dataset (<b>OpenAI 50K vectors</b>, 1536 dimensions), at varying parallel levels.
290
- Results will show index building time, recall, and maximum QPS."""
298
+ description: str = """This case tests the search performance of a vector database with a medium 50K dataset
299
+ (<b>OpenAI 50K vectors</b>, 1536 dimensions), at varying parallel levels. Results will show index building time,
300
+ recall, and maximum QPS."""
291
301
  load_timeout: float | int = 3600
292
- optimize_timeout: float | int | None = 15 * 60
302
+ optimize_timeout: float | int | None = config.OPTIMIZE_TIMEOUT_DEFAULT
293
303
 
294
304
 
295
305
  def metric_type_map(s: str) -> MetricType:
@@ -312,11 +322,11 @@ class PerformanceCustomDataset(PerformanceCase):
312
322
 
313
323
  def __init__(
314
324
  self,
315
- name,
316
- description,
317
- load_timeout,
318
- optimize_timeout,
319
- dataset_config,
325
+ name: str,
326
+ description: str,
327
+ load_timeout: float,
328
+ optimize_timeout: float,
329
+ dataset_config: dict,
320
330
  **kwargs,
321
331
  ):
322
332
  dataset_config = CustomDatasetConfig(**dataset_config)