vectordb-bench 0.0.18__py3-none-any.whl → 0.0.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (24) hide show
  1. vectordb_bench/backend/cases.py +1 -1
  2. vectordb_bench/backend/clients/__init__.py +26 -0
  3. vectordb_bench/backend/clients/aliyun_elasticsearch/aliyun_elasticsearch.py +27 -0
  4. vectordb_bench/backend/clients/aliyun_elasticsearch/config.py +19 -0
  5. vectordb_bench/backend/clients/aliyun_opensearch/aliyun_opensearch.py +304 -0
  6. vectordb_bench/backend/clients/aliyun_opensearch/config.py +48 -0
  7. vectordb_bench/backend/clients/alloydb/cli.py +1 -1
  8. vectordb_bench/backend/clients/api.py +3 -0
  9. vectordb_bench/backend/clients/milvus/cli.py +25 -1
  10. vectordb_bench/backend/clients/milvus/config.py +16 -2
  11. vectordb_bench/backend/clients/milvus/milvus.py +5 -7
  12. vectordb_bench/backend/runner/rate_runner.py +32 -15
  13. vectordb_bench/backend/runner/read_write_runner.py +102 -36
  14. vectordb_bench/backend/runner/serial_runner.py +8 -2
  15. vectordb_bench/backend/runner/util.py +0 -16
  16. vectordb_bench/backend/task_runner.py +4 -3
  17. vectordb_bench/backend/utils.py +1 -0
  18. vectordb_bench/frontend/config/dbCaseConfigs.py +58 -0
  19. {vectordb_bench-0.0.18.dist-info → vectordb_bench-0.0.19.dist-info}/METADATA +13 -28
  20. {vectordb_bench-0.0.18.dist-info → vectordb_bench-0.0.19.dist-info}/RECORD +24 -20
  21. {vectordb_bench-0.0.18.dist-info → vectordb_bench-0.0.19.dist-info}/LICENSE +0 -0
  22. {vectordb_bench-0.0.18.dist-info → vectordb_bench-0.0.19.dist-info}/WHEEL +0 -0
  23. {vectordb_bench-0.0.18.dist-info → vectordb_bench-0.0.19.dist-info}/entry_points.txt +0 -0
  24. {vectordb_bench-0.0.18.dist-info → vectordb_bench-0.0.19.dist-info}/top_level.txt +0 -0
@@ -289,7 +289,7 @@ class Performance1536D50K(PerformanceCase):
289
289
  description: str = """This case tests the search performance of a vector database with a medium 50K dataset (<b>OpenAI 50K vectors</b>, 1536 dimensions), at varying parallel levels.
290
290
  Results will show index building time, recall, and maximum QPS."""
291
291
  load_timeout: float | int = 3600
292
- optimize_timeout: float | int | None = 15 * 60
292
+ optimize_timeout: float | int | None = config.OPTIMIZE_TIMEOUT_DEFAULT
293
293
 
294
294
 
295
295
  def metric_type_map(s: str) -> MetricType:
@@ -37,7 +37,9 @@ class DB(Enum):
37
37
  MemoryDB = "MemoryDB"
38
38
  Chroma = "Chroma"
39
39
  AWSOpenSearch = "OpenSearch"
40
+ AliyunElasticsearch = "AliyunElasticsearch"
40
41
  Test = "test"
42
+ AliyunOpenSearch = "AliyunOpenSearch"
41
43
 
42
44
 
43
45
  @property
@@ -103,6 +105,14 @@ class DB(Enum):
103
105
  from .alloydb.alloydb import AlloyDB
104
106
  return AlloyDB
105
107
 
108
+ if self == DB.AliyunElasticsearch:
109
+ from .aliyun_elasticsearch.aliyun_elasticsearch import AliyunElasticsearch
110
+ return AliyunElasticsearch
111
+
112
+ if self == DB.AliyunOpenSearch:
113
+ from .aliyun_opensearch.aliyun_opensearch import AliyunOpenSearch
114
+ return AliyunOpenSearch
115
+
106
116
  @property
107
117
  def config_cls(self) -> Type[DBConfig]:
108
118
  """Import while in use"""
@@ -166,6 +176,14 @@ class DB(Enum):
166
176
  from .alloydb.config import AlloyDBConfig
167
177
  return AlloyDBConfig
168
178
 
179
+ if self == DB.AliyunElasticsearch:
180
+ from .aliyun_elasticsearch.config import AliyunElasticsearchConfig
181
+ return AliyunElasticsearchConfig
182
+
183
+ if self == DB.AliyunOpenSearch:
184
+ from .aliyun_opensearch.config import AliyunOpenSearchConfig
185
+ return AliyunOpenSearchConfig
186
+
169
187
  def case_config_cls(self, index_type: IndexType | None = None) -> Type[DBCaseConfig]:
170
188
  if self == DB.Milvus:
171
189
  from .milvus.config import _milvus_case_config
@@ -211,6 +229,14 @@ class DB(Enum):
211
229
  from .alloydb.config import _alloydb_case_config
212
230
  return _alloydb_case_config.get(index_type)
213
231
 
232
+ if self == DB.AliyunElasticsearch:
233
+ from .elastic_cloud.config import ElasticCloudIndexConfig
234
+ return ElasticCloudIndexConfig
235
+
236
+ if self == DB.AliyunOpenSearch:
237
+ from .aliyun_opensearch.config import AliyunOpenSearchIndexConfig
238
+ return AliyunOpenSearchIndexConfig
239
+
214
240
  # DB.Pinecone, DB.Chroma, DB.Redis
215
241
  return EmptyDBCaseConfig
216
242
 
@@ -0,0 +1,27 @@
1
+ from ..elastic_cloud.elastic_cloud import ElasticCloud
2
+ from ..elastic_cloud.config import ElasticCloudIndexConfig
3
+
4
+
5
+ class AliyunElasticsearch(ElasticCloud):
6
+ def __init__(
7
+ self,
8
+ dim: int,
9
+ db_config: dict,
10
+ db_case_config: ElasticCloudIndexConfig,
11
+ indice: str = "vdb_bench_indice", # must be lowercase
12
+ id_col_name: str = "id",
13
+ vector_col_name: str = "vector",
14
+ drop_old: bool = False,
15
+ **kwargs,
16
+ ):
17
+ super().__init__(
18
+ dim=dim,
19
+ db_config=db_config,
20
+ db_case_config=db_case_config,
21
+ indice=indice,
22
+ id_col_name=id_col_name,
23
+ vector_col_name=vector_col_name,
24
+ drop_old=drop_old,
25
+ **kwargs,
26
+ )
27
+
@@ -0,0 +1,19 @@
1
+ from enum import Enum
2
+ from pydantic import SecretStr, BaseModel
3
+
4
+ from ..api import DBConfig, DBCaseConfig, MetricType, IndexType
5
+
6
+
7
+ class AliyunElasticsearchConfig(DBConfig, BaseModel):
8
+ #: Protocol in use to connect to the node
9
+ scheme: str = "http"
10
+ host: str = ""
11
+ port: int = 9200
12
+ user: str = "elastic"
13
+ password: SecretStr
14
+
15
+ def to_dict(self) -> dict:
16
+ return {
17
+ "hosts": [{'scheme': self.scheme, 'host': self.host, 'port': self.port}],
18
+ "basic_auth": (self.user, self.password.get_secret_value()),
19
+ }
@@ -0,0 +1,304 @@
1
+ import json
2
+ import logging
3
+ from contextlib import contextmanager
4
+ import time
5
+
6
+ from alibabacloud_ha3engine_vector.models import QueryRequest
7
+
8
+ from ..api import VectorDB, MetricType
9
+ from .config import AliyunOpenSearchIndexConfig
10
+
11
+ from alibabacloud_searchengine20211025.client import Client as searchengineClient
12
+ from alibabacloud_searchengine20211025 import models as searchengine_models
13
+ from alibabacloud_tea_openapi import models as open_api_models
14
+ from alibabacloud_ha3engine_vector import models, client
15
+
16
+ log = logging.getLogger(__name__)
17
+
18
+ ALIYUN_OPENSEARCH_MAX_SIZE_PER_BATCH = 2 * 1024 * 1024 # 2MB
19
+ ALIYUN_OPENSEARCH_MAX_NUM_PER_BATCH = 100
20
+
21
+ class AliyunOpenSearch(VectorDB):
22
+ def __init__(
23
+ self,
24
+ dim: int,
25
+ db_config: dict,
26
+ db_case_config: AliyunOpenSearchIndexConfig,
27
+ collection_name: str = "VectorDBBenchCollection",
28
+ drop_old: bool = False,
29
+ **kwargs,
30
+ ):
31
+ self.control_client = None
32
+ self.dim = dim
33
+ self.db_config = db_config
34
+ self.case_config = db_case_config
35
+ self.collection_name = collection_name
36
+ self.instance_id = db_config["host"].split(".")[0].replace("http://", "").replace("https://", "")
37
+
38
+ self._primary_field = "id"
39
+ self._scalar_field = "int_id"
40
+ self._vector_field = "vector"
41
+ self._index_name = "vector_idx"
42
+
43
+ self.batch_size = int(
44
+ min(ALIYUN_OPENSEARCH_MAX_SIZE_PER_BATCH / (dim * 25), ALIYUN_OPENSEARCH_MAX_NUM_PER_BATCH)
45
+ )
46
+
47
+ log.info(f"Aliyun_OpenSearch client config: {self.db_config}")
48
+ control_config = open_api_models.Config(
49
+ access_key_id=self.db_config["ak"],
50
+ access_key_secret=self.db_config["sk"],
51
+ endpoint=self.db_config["control_host"]
52
+ )
53
+ self.control_client = searchengineClient(control_config)
54
+
55
+ if drop_old:
56
+ log.info(f"aliyun_OpenSearch client drop old index: {self.collection_name}")
57
+ if self._index_exists(self.control_client):
58
+ self._modify_index(self.control_client)
59
+ else:
60
+ self._create_index(self.control_client)
61
+
62
+ def _create_index(self, client: searchengineClient):
63
+ create_table_request = searchengine_models.CreateTableRequest()
64
+ create_table_request.name = self.collection_name
65
+ create_table_request.primary_key = self._primary_field
66
+ create_table_request.partition_count = 1
67
+ create_table_request.field_schema = {
68
+ self._primary_field: "INT64",
69
+ self._vector_field: "MULTI_FLOAT",
70
+ self._scalar_field: "INT64"
71
+ }
72
+ vector_index = searchengine_models.ModifyTableRequestVectorIndex()
73
+ vector_index.index_name = self._index_name
74
+ vector_index.dimension = self.dim
75
+ vector_index.distance_type = self.case_config.distance_type()
76
+ vector_index.vector_field = self._vector_field
77
+ vector_index.vector_index_type = "HNSW"
78
+
79
+ advance_params = searchengine_models.ModifyTableRequestVectorIndexAdvanceParams()
80
+ advance_params.build_index_params = "{\"proxima.hnsw.builder.max_neighbor_count\":" + str(self.case_config.M) + ",\"proxima.hnsw.builder.efconstruction\":" + str(self.case_config.efConstruction) + ",\"proxima.hnsw.builder.enable_adsampling\":true,\"proxima.hnsw.builder.slack_pruning_factor\":1.1,\"proxima.hnsw.builder.thread_count\":16}"
81
+ advance_params.search_index_params = "{\"proxima.hnsw.searcher.ef\":400,\"proxima.hnsw.searcher.dynamic_termination.prob_threshold\":0.7}"
82
+ vector_index.advance_params = advance_params
83
+ create_table_request.vector_index = [vector_index]
84
+
85
+ try:
86
+ response = client.create_table(self.instance_id, create_table_request)
87
+ log.info(f"create table success: {response.body}")
88
+ except Exception as error:
89
+ log.info(error.message)
90
+ log.info(error.data.get("Recommend"))
91
+ log.info(f"Failed to create index: error: {str(error)}")
92
+ raise error from None
93
+
94
+ # check if index create success
95
+ self._active_index(client)
96
+
97
+ # check if index create success
98
+ def _active_index(self, client: searchengineClient) -> None:
99
+ retry_times = 0
100
+ while True:
101
+ time.sleep(10)
102
+ log.info(f"begin to {retry_times} times get table")
103
+ retry_times += 1
104
+ response = client.get_table(self.instance_id, self.collection_name)
105
+ if response.body.result.status == 'IN_USE':
106
+ log.info(f"{self.collection_name} table begin to use.")
107
+ return
108
+
109
+ def _index_exists(self, client: searchengineClient) -> bool:
110
+ try:
111
+ client.get_table(self.instance_id, self.collection_name)
112
+ return True
113
+ except Exception as error:
114
+ log.info(f'get table from searchengine error')
115
+ log.info(error.message)
116
+ return False
117
+
118
+ # check if index build success, Insert the embeddings to the vector database after index build success
119
+ def _index_build_success(self, client: searchengineClient) -> None:
120
+ log.info(f"begin to check if table build success.")
121
+ time.sleep(50)
122
+
123
+ retry_times = 0
124
+ while True:
125
+ time.sleep(10)
126
+ log.info(f"begin to {retry_times} times get table fsm")
127
+ retry_times += 1
128
+ request = searchengine_models.ListTasksRequest()
129
+ request.start = (int(time.time()) - 3600) * 1000
130
+ request.end = int(time.time()) * 1000
131
+ response = client.list_tasks(self.instance_id, request)
132
+ fsms = response.body.result
133
+ cur_fsm = None
134
+ for fsm in fsms:
135
+ if fsm["type"] != "datasource_flow_fsm":
136
+ continue
137
+ if self.collection_name not in fsm["fsmId"]:
138
+ continue
139
+ cur_fsm = fsm
140
+ break
141
+ if cur_fsm is None:
142
+ print("no build index fsm")
143
+ return
144
+ if "success" == cur_fsm["status"]:
145
+ return
146
+
147
+ def _modify_index(self, client: searchengineClient) -> None:
148
+ # check if index create success
149
+ self._active_index(client)
150
+
151
+ modify_table_request = searchengine_models.ModifyTableRequest()
152
+ modify_table_request.partition_count = 1
153
+ modify_table_request.primary_key = self._primary_field
154
+ modify_table_request.field_schema = {
155
+ self._primary_field: "INT64",
156
+ self._vector_field: "MULTI_FLOAT",
157
+ self._scalar_field: "INT64"
158
+ }
159
+ vector_index = searchengine_models.ModifyTableRequestVectorIndex()
160
+ vector_index.index_name = self._index_name
161
+ vector_index.dimension = self.dim
162
+ vector_index.distance_type = self.case_config.distance_type()
163
+ vector_index.vector_field = self._vector_field
164
+ vector_index.vector_index_type = "HNSW"
165
+ advance_params = searchengine_models.ModifyTableRequestVectorIndexAdvanceParams()
166
+ advance_params.build_index_params = "{\"proxima.hnsw.builder.max_neighbor_count\":" + str(self.case_config.M) + ",\"proxima.hnsw.builder.efconstruction\":" + str(self.case_config.efConstruction) + ",\"proxima.hnsw.builder.enable_adsampling\":true,\"proxima.hnsw.builder.slack_pruning_factor\":1.1,\"proxima.hnsw.builder.thread_count\":16}"
167
+ advance_params.search_index_params = "{\"proxima.hnsw.searcher.ef\":400,\"proxima.hnsw.searcher.dynamic_termination.prob_threshold\":0.7}"
168
+ vector_index.advance_params = advance_params
169
+
170
+ modify_table_request.vector_index = [vector_index]
171
+
172
+ try:
173
+ response = client.modify_table(self.instance_id, self.collection_name, modify_table_request)
174
+ log.info(f"modify table success: {response.body}")
175
+ except Exception as error:
176
+ log.info(error.message)
177
+ log.info(error.data.get("Recommend"))
178
+ log.info(f"Failed to modify index: error: {str(error)}")
179
+ raise error from None
180
+
181
+ # check if modify index & delete data fsm success
182
+ self._index_build_success(client)
183
+
184
+ # get collection records total count
185
+ def _get_total_count(self):
186
+ try:
187
+ response = self.client.stats(self.collection_name)
188
+ body = json.loads(response.body)
189
+ log.info(f"stats info: {response.body}")
190
+
191
+ if "result" in body and "totalDocCount" in body.get("result"):
192
+ return body.get("result").get("totalDocCount")
193
+ else:
194
+ return 0
195
+ except Exception as e:
196
+ print(f"Error querying index: {e}")
197
+ return 0
198
+
199
+ @contextmanager
200
+ def init(self) -> None:
201
+ """connect to aliyun opensearch"""
202
+ config = models.Config(
203
+ endpoint=self.db_config["host"],
204
+ protocol="http",
205
+ access_user_name=self.db_config["user"],
206
+ access_pass_word=self.db_config["password"]
207
+ )
208
+
209
+ self.client = client.Client(config)
210
+
211
+ yield
212
+ # self.client.transport.close()
213
+ self.client = None
214
+ del self.client
215
+
216
+ def insert_embeddings(
217
+ self,
218
+ embeddings: list[list[float]],
219
+ metadata: list[int],
220
+ **kwargs,
221
+ ) -> tuple[int, Exception]:
222
+ """Insert the embeddings to the opensearch."""
223
+ assert self.client is not None, "should self.init() first"
224
+ assert len(embeddings) == len(metadata)
225
+ insert_count = 0
226
+
227
+ try:
228
+ for batch_start_offset in range(0, len(embeddings), self.batch_size):
229
+ batch_end_offset = min(
230
+ batch_start_offset + self.batch_size, len(embeddings)
231
+ )
232
+ documents = []
233
+ for i in range(batch_start_offset, batch_end_offset):
234
+ documentFields = {
235
+ self._primary_field: metadata[i],
236
+ self._vector_field: embeddings[i],
237
+ self._scalar_field: metadata[i],
238
+ "ops_build_channel": "inc"
239
+ }
240
+ document = {
241
+ "fields": documentFields,
242
+ "cmd": "add"
243
+ }
244
+ documents.append(document)
245
+
246
+ pushDocumentsRequest = models.PushDocumentsRequest({}, documents)
247
+ self.client.push_documents(self.collection_name, self._primary_field, pushDocumentsRequest)
248
+ insert_count += batch_end_offset - batch_start_offset
249
+ except Exception as e:
250
+ log.info(f"Failed to insert data: {e}")
251
+ return (insert_count, e)
252
+ return (insert_count, None)
253
+
254
+ def search_embedding(
255
+ self,
256
+ query: list[float],
257
+ k: int = 100,
258
+ filters: dict | None = None,
259
+ ) -> list[int]:
260
+ assert self.client is not None, "should self.init() first"
261
+ search_params = "{\"proxima.hnsw.searcher.ef\":"+ str(self.case_config.ef_search) +"}"
262
+
263
+ os_filter = f"{self._scalar_field} {filters.get('metadata')}" if filters else ""
264
+
265
+ try:
266
+ request = QueryRequest(table_name=self.collection_name,
267
+ vector=query,
268
+ top_k=k,
269
+ search_params=search_params, filter=os_filter)
270
+ result = self.client.query(request)
271
+ except Exception as e:
272
+ log.info(f"Error querying index: {e}")
273
+ raise e
274
+ res = json.loads(result.body)
275
+ id_res = [one_res["id"] for one_res in res["result"]]
276
+ return id_res
277
+
278
+ def need_normalize_cosine(self) -> bool:
279
+ """Wheather this database need to normalize dataset to support COSINE"""
280
+ if self.case_config.metric_type == MetricType.COSINE:
281
+ log.info(f"cosine dataset need normalize.")
282
+ return True
283
+
284
+ return False
285
+
286
+ def optimize(self):
287
+ pass
288
+
289
+ def optimize_with_size(self, data_size: int):
290
+ log.info(f"optimize count: {data_size}")
291
+ retry_times = 0
292
+ while True:
293
+ time.sleep(10)
294
+ log.info(f"begin to {retry_times} times get optimize table")
295
+ retry_times += 1
296
+ total_count = self._get_total_count()
297
+ # check if the data is inserted
298
+ if total_count == data_size:
299
+ log.info(f"optimize table finish.")
300
+ return
301
+
302
+ def ready_to_load(self):
303
+ """ready_to_load will be called before load in load cases."""
304
+ pass
@@ -0,0 +1,48 @@
1
+ import logging
2
+ from enum import Enum
3
+ from pydantic import SecretStr, BaseModel
4
+
5
+ from ..api import DBConfig, DBCaseConfig, MetricType, IndexType
6
+
7
+ log = logging.getLogger(__name__)
8
+
9
+
10
+ class AliyunOpenSearchConfig(DBConfig, BaseModel):
11
+ host: str = ""
12
+ user: str = ""
13
+ password: SecretStr = ""
14
+
15
+ ak: str = ""
16
+ sk: SecretStr = ""
17
+ control_host: str = "searchengine.cn-hangzhou.aliyuncs.com"
18
+
19
+ def to_dict(self) -> dict:
20
+ return {
21
+ "host": self.host,
22
+ "user": self.user,
23
+ "password": self.password.get_secret_value(),
24
+ "ak": self.ak,
25
+ "sk": self.sk.get_secret_value(),
26
+ "control_host": self.control_host,
27
+ }
28
+
29
+ class AliyunOpenSearchIndexConfig(BaseModel, DBCaseConfig):
30
+ metric_type: MetricType = MetricType.L2
31
+ efConstruction: int = 500
32
+ M: int = 100
33
+ ef_search: int = 40
34
+
35
+ def distance_type(self) -> str:
36
+ if self.metric_type == MetricType.L2:
37
+ return "SquaredEuclidean"
38
+ elif self.metric_type == MetricType.IP:
39
+ return "InnerProduct"
40
+ elif self.metric_type == MetricType.COSINE:
41
+ return "InnerProduct"
42
+ return "SquaredEuclidean"
43
+
44
+ def index_param(self) -> dict:
45
+ return {}
46
+
47
+ def search_param(self) -> dict:
48
+ return {}
@@ -106,7 +106,7 @@ class AlloyDBScaNNTypedDict(AlloyDBTypedDict):
106
106
  int,
107
107
  click.option(
108
108
  "--max-num-levels",
109
- type=click.Choice([1, 2]),
109
+ type=click.Choice(["1", "2"]),
110
110
  help="Maximum number of levels",
111
111
  default=1
112
112
  )
@@ -204,6 +204,9 @@ class VectorDB(ABC):
204
204
  """
205
205
  raise NotImplementedError
206
206
 
207
+ def optimize_with_size(self, data_size: int):
208
+ self.optimize()
209
+
207
210
  # TODO: remove
208
211
  @abstractmethod
209
212
  def ready_to_load(self):
@@ -1,4 +1,4 @@
1
- from typing import Annotated, TypedDict, Unpack
1
+ from typing import Annotated, TypedDict, Unpack, Optional
2
2
 
3
3
  import click
4
4
  from pydantic import SecretStr
@@ -21,6 +21,12 @@ class MilvusTypedDict(TypedDict):
21
21
  uri: Annotated[
22
22
  str, click.option("--uri", type=str, help="uri connection string", required=True)
23
23
  ]
24
+ user_name: Annotated[
25
+ Optional[str], click.option("--user-name", type=str, help="Db username", required=False)
26
+ ]
27
+ password: Annotated[
28
+ Optional[str], click.option("--password", type=str, help="Db password", required=False)
29
+ ]
24
30
 
25
31
 
26
32
  class MilvusAutoIndexTypedDict(CommonTypedDict, MilvusTypedDict):
@@ -37,6 +43,8 @@ def MilvusAutoIndex(**parameters: Unpack[MilvusAutoIndexTypedDict]):
37
43
  db_config=MilvusConfig(
38
44
  db_label=parameters["db_label"],
39
45
  uri=SecretStr(parameters["uri"]),
46
+ user=parameters["user_name"],
47
+ password=SecretStr(parameters["password"]),
40
48
  ),
41
49
  db_case_config=AutoIndexConfig(),
42
50
  **parameters,
@@ -53,6 +61,8 @@ def MilvusFlat(**parameters: Unpack[MilvusAutoIndexTypedDict]):
53
61
  db_config=MilvusConfig(
54
62
  db_label=parameters["db_label"],
55
63
  uri=SecretStr(parameters["uri"]),
64
+ user=parameters["user_name"],
65
+ password=SecretStr(parameters["password"]),
56
66
  ),
57
67
  db_case_config=FLATConfig(),
58
68
  **parameters,
@@ -73,6 +83,8 @@ def MilvusHNSW(**parameters: Unpack[MilvusHNSWTypedDict]):
73
83
  db_config=MilvusConfig(
74
84
  db_label=parameters["db_label"],
75
85
  uri=SecretStr(parameters["uri"]),
86
+ user=parameters["user_name"],
87
+ password=SecretStr(parameters["password"]) if parameters["password"] else None,
76
88
  ),
77
89
  db_case_config=HNSWConfig(
78
90
  M=parameters["m"],
@@ -97,6 +109,8 @@ def MilvusIVFFlat(**parameters: Unpack[MilvusIVFFlatTypedDict]):
97
109
  db_config=MilvusConfig(
98
110
  db_label=parameters["db_label"],
99
111
  uri=SecretStr(parameters["uri"]),
112
+ user=parameters["user_name"],
113
+ password=SecretStr(parameters["password"]),
100
114
  ),
101
115
  db_case_config=IVFFlatConfig(
102
116
  nlist=parameters["nlist"],
@@ -116,6 +130,8 @@ def MilvusIVFSQ8(**parameters: Unpack[MilvusIVFFlatTypedDict]):
116
130
  db_config=MilvusConfig(
117
131
  db_label=parameters["db_label"],
118
132
  uri=SecretStr(parameters["uri"]),
133
+ user=parameters["user_name"],
134
+ password=SecretStr(parameters["password"]),
119
135
  ),
120
136
  db_case_config=IVFSQ8Config(
121
137
  nlist=parameters["nlist"],
@@ -143,6 +159,8 @@ def MilvusDISKANN(**parameters: Unpack[MilvusDISKANNTypedDict]):
143
159
  db_config=MilvusConfig(
144
160
  db_label=parameters["db_label"],
145
161
  uri=SecretStr(parameters["uri"]),
162
+ user=parameters["user_name"],
163
+ password=SecretStr(parameters["password"]),
146
164
  ),
147
165
  db_case_config=DISKANNConfig(
148
166
  search_list=parameters["search_list"],
@@ -174,6 +192,8 @@ def MilvusGPUIVFFlat(**parameters: Unpack[MilvusGPUIVFTypedDict]):
174
192
  db_config=MilvusConfig(
175
193
  db_label=parameters["db_label"],
176
194
  uri=SecretStr(parameters["uri"]),
195
+ user=parameters["user_name"],
196
+ password=SecretStr(parameters["password"]),
177
197
  ),
178
198
  db_case_config=GPUIVFFlatConfig(
179
199
  nlist=parameters["nlist"],
@@ -208,6 +228,8 @@ def MilvusGPUIVFPQ(**parameters: Unpack[MilvusGPUIVFPQTypedDict]):
208
228
  db_config=MilvusConfig(
209
229
  db_label=parameters["db_label"],
210
230
  uri=SecretStr(parameters["uri"]),
231
+ user=parameters["user_name"],
232
+ password=SecretStr(parameters["password"]),
211
233
  ),
212
234
  db_case_config=GPUIVFPQConfig(
213
235
  nlist=parameters["nlist"],
@@ -274,6 +296,8 @@ def MilvusGPUCAGRA(**parameters: Unpack[MilvusGPUCAGRATypedDict]):
274
296
  db_config=MilvusConfig(
275
297
  db_label=parameters["db_label"],
276
298
  uri=SecretStr(parameters["uri"]),
299
+ user=parameters["user_name"],
300
+ password=SecretStr(parameters["password"]),
277
301
  ),
278
302
  db_case_config=GPUCAGRAConfig(
279
303
  intermediate_graph_degree=parameters["intermediate_graph_degree"],
@@ -1,12 +1,26 @@
1
- from pydantic import BaseModel, SecretStr
1
+ from pydantic import BaseModel, SecretStr, validator
2
2
  from ..api import DBConfig, DBCaseConfig, MetricType, IndexType
3
3
 
4
4
 
5
5
  class MilvusConfig(DBConfig):
6
6
  uri: SecretStr = "http://localhost:19530"
7
+ user: str | None = None
8
+ password: SecretStr | None = None
7
9
 
8
10
  def to_dict(self) -> dict:
9
- return {"uri": self.uri.get_secret_value()}
11
+ return {
12
+ "uri": self.uri.get_secret_value(),
13
+ "user": self.user if self.user else None,
14
+ "password": self.password.get_secret_value() if self.password else None,
15
+ }
16
+
17
+ @validator("*")
18
+ def not_empty_field(cls, v, field):
19
+ if field.name in cls.common_short_configs() or field.name in cls.common_long_configs() or field.name in ["user", "password"]:
20
+ return v
21
+ if isinstance(v, (str, SecretStr)) and len(v) == 0:
22
+ raise ValueError("Empty string!")
23
+ return v
10
24
 
11
25
 
12
26
  class MilvusIndexConfig(BaseModel):
@@ -8,7 +8,7 @@ from typing import Iterable
8
8
  from pymilvus import Collection, utility
9
9
  from pymilvus import CollectionSchema, DataType, FieldSchema, MilvusException
10
10
 
11
- from ..api import VectorDB, IndexType
11
+ from ..api import VectorDB
12
12
  from .config import MilvusIndexConfig
13
13
 
14
14
 
@@ -66,8 +66,7 @@ class Milvus(VectorDB):
66
66
  self.case_config.index_param(),
67
67
  index_name=self._index_name,
68
68
  )
69
- if kwargs.get("pre_load") is True:
70
- self._pre_load(col)
69
+ col.load()
71
70
 
72
71
  connections.disconnect("default")
73
72
 
@@ -90,16 +89,15 @@ class Milvus(VectorDB):
90
89
  connections.disconnect("default")
91
90
 
92
91
  def _optimize(self):
93
- self._post_insert()
94
92
  log.info(f"{self.name} optimizing before search")
93
+ self._post_insert()
95
94
  try:
96
- self.col.load()
95
+ self.col.load(refresh=True)
97
96
  except Exception as e:
98
97
  log.warning(f"{self.name} optimize error: {e}")
99
98
  raise e from None
100
99
 
101
100
  def _post_insert(self):
102
- log.info(f"{self.name} post insert before optimize")
103
101
  try:
104
102
  self.col.flush()
105
103
  # wait for index done and load refresh
@@ -130,7 +128,7 @@ class Milvus(VectorDB):
130
128
  log.warning(f"{self.name} compact error: {e}")
131
129
  if hasattr(e, 'code'):
132
130
  if e.code().name == 'PERMISSION_DENIED':
133
- log.warning(f"Skip compact due to permission denied.")
131
+ log.warning("Skip compact due to permission denied.")
134
132
  pass
135
133
  else:
136
134
  raise e
@@ -1,5 +1,6 @@
1
1
  import logging
2
2
  import time
3
+ import concurrent
3
4
  from concurrent.futures import ThreadPoolExecutor
4
5
  import multiprocessing as mp
5
6
 
@@ -9,7 +10,7 @@ from vectordb_bench.backend.dataset import DataSetIterator
9
10
  from vectordb_bench.backend.utils import time_it
10
11
  from vectordb_bench import config
11
12
 
12
- from .util import get_data, is_futures_completed, get_future_exceptions
13
+ from .util import get_data
13
14
  log = logging.getLogger(__name__)
14
15
 
15
16
 
@@ -54,26 +55,42 @@ class RatedMultiThreadingInsertRunner:
54
55
  start_time = time.perf_counter()
55
56
  finished, elapsed_time = submit_by_rate()
56
57
  if finished is True:
57
- q.put(None, block=True)
58
+ q.put(True, block=True)
58
59
  log.info(f"End of dataset, left unfinished={len(executing_futures)}")
59
- return
60
+ break
60
61
 
61
- q.put(True, block=False)
62
+ q.put(False, block=False)
62
63
  wait_interval = 1 - elapsed_time if elapsed_time < 1 else 0.001
63
64
 
64
- e, completed = is_futures_completed(executing_futures, wait_interval)
65
- if completed is True:
66
- ex = get_future_exceptions(executing_futures)
67
- if ex is not None:
68
- log.warn(f"task error, terminating, err={ex}")
69
- q.put(None)
70
- executor.shutdown(wait=True, cancel_futures=True)
71
- raise ex
65
+ try:
66
+ done, not_done = concurrent.futures.wait(
67
+ executing_futures,
68
+ timeout=wait_interval,
69
+ return_when=concurrent.futures.FIRST_EXCEPTION)
70
+
71
+ if len(not_done) > 0:
72
+ log.warning(f"Failed to finish all tasks in 1s, [{len(not_done)}/{len(executing_futures)}] tasks are not done, waited={wait_interval:.2f}, trying to wait in the next round")
73
+ executing_futures = list(not_done)
72
74
  else:
73
75
  log.debug(f"Finished {len(executing_futures)} insert-{config.NUM_PER_BATCH} task in 1s, wait_interval={wait_interval:.2f}")
74
- executing_futures = []
75
- else:
76
- log.warning(f"Failed to finish tasks in 1s, {e}, waited={wait_interval:.2f}, try to check the next round")
76
+ executing_futures = []
77
+ except Exception as e:
78
+ log.warn(f"task error, terminating, err={e}")
79
+ q.put(None, block=True)
80
+ executor.shutdown(wait=True, cancel_futures=True)
81
+ raise e
82
+
77
83
  dur = time.perf_counter() - start_time
78
84
  if dur < 1:
79
85
  time.sleep(1 - dur)
86
+
87
+ # wait for all tasks in executing_futures to complete
88
+ if len(executing_futures) > 0:
89
+ try:
90
+ done, _ = concurrent.futures.wait(executing_futures,
91
+ return_when=concurrent.futures.FIRST_EXCEPTION)
92
+ except Exception as e:
93
+ log.warn(f"task error, terminating, err={e}")
94
+ q.put(None, block=True)
95
+ executor.shutdown(wait=True, cancel_futures=True)
96
+ raise e
@@ -24,7 +24,7 @@ class ReadWriteRunner(MultiProcessingSearchRunner, RatedMultiThreadingInsertRunn
24
24
  k: int = 100,
25
25
  filters: dict | None = None,
26
26
  concurrencies: Iterable[int] = (1, 15, 50),
27
- search_stage: Iterable[float] = (0.5, 0.6, 0.7, 0.8, 0.9, 1.0), # search in any insert portion, 0.0 means search from the start
27
+ search_stage: Iterable[float] = (0.5, 0.6, 0.7, 0.8, 0.9), # search from insert portion, 0.0 means search from the start
28
28
  read_dur_after_write: int = 300, # seconds, search duration when insertion is done
29
29
  timeout: float | None = None,
30
30
  ):
@@ -32,7 +32,7 @@ class ReadWriteRunner(MultiProcessingSearchRunner, RatedMultiThreadingInsertRunn
32
32
  self.data_volume = dataset.data.size
33
33
 
34
34
  for stage in search_stage:
35
- assert 0.0 <= stage <= 1.0, "each search stage should be in [0.0, 1.0]"
35
+ assert 0.0 <= stage < 1.0, "each search stage should be in [0.0, 1.0)"
36
36
  self.search_stage = sorted(search_stage)
37
37
  self.read_dur_after_write = read_dur_after_write
38
38
 
@@ -65,48 +65,114 @@ class ReadWriteRunner(MultiProcessingSearchRunner, RatedMultiThreadingInsertRunn
65
65
  k=k,
66
66
  )
67
67
 
68
+ def run_optimize(self):
69
+ """Optimize needs to run in differenct process for pymilvus schema recursion problem"""
70
+ with self.db.init():
71
+ log.info("Search after write - Optimize start")
72
+ self.db.optimize()
73
+ log.info("Search after write - Optimize finished")
74
+
75
+ def run_search(self):
76
+ log.info("Search after write - Serial search start")
77
+ res, ssearch_dur = self.serial_search_runner.run()
78
+ recall, ndcg, p99_latency = res
79
+ log.info(f"Search after write - Serial search - recall={recall}, ndcg={ndcg}, p99={p99_latency}, dur={ssearch_dur:.4f}")
80
+ log.info(f"Search after wirte - Conc search start, dur for each conc={self.read_dur_after_write}")
81
+ max_qps = self.run_by_dur(self.read_dur_after_write)
82
+ log.info(f"Search after wirte - Conc search finished, max_qps={max_qps}")
83
+
84
+ return (max_qps, recall, ndcg, p99_latency)
85
+
68
86
  def run_read_write(self):
69
- futures = []
70
87
  with mp.Manager() as m:
71
88
  q = m.Queue()
72
89
  with concurrent.futures.ProcessPoolExecutor(mp_context=mp.get_context("spawn"), max_workers=2) as executor:
73
- futures.append(executor.submit(self.run_with_rate, q))
74
- futures.append(executor.submit(self.run_search_by_sig, q))
75
-
76
- for future in concurrent.futures.as_completed(futures):
77
- res = future.result()
78
- log.info(f"Result = {res}")
79
-
90
+ read_write_futures = []
91
+ read_write_futures.append(executor.submit(self.run_with_rate, q))
92
+ read_write_futures.append(executor.submit(self.run_search_by_sig, q))
93
+
94
+ try:
95
+ for f in concurrent.futures.as_completed(read_write_futures):
96
+ res = f.result()
97
+ log.info(f"Result = {res}")
98
+
99
+ # Wait for read_write_futures finishing and do optimize and search
100
+ op_future = executor.submit(self.run_optimize)
101
+ op_future.result()
102
+
103
+ search_future = executor.submit(self.run_search)
104
+ last_res = search_future.result()
105
+
106
+ log.info(f"Max QPS after optimze and search: {last_res}")
107
+ except Exception as e:
108
+ log.warning(f"Read and write error: {e}")
109
+ executor.shutdown(wait=True, cancel_futures=True)
110
+ raise e
80
111
  log.info("Concurrent read write all done")
81
112
 
82
-
83
113
  def run_search_by_sig(self, q):
84
- res = []
114
+ """
115
+ Args:
116
+ q: multiprocessing queue
117
+ (None) means abnormal exit
118
+ (False) means updating progress
119
+ (True) means normal exit
120
+ """
121
+ result, start_batch = [], 0
85
122
  total_batch = math.ceil(self.data_volume / self.insert_rate)
86
- batch = 0
87
- recall = 'x'
123
+ recall, ndcg, p99_latency = None, None, None
124
+
125
+ def wait_next_target(start, target_batch) -> bool:
126
+ """Return False when receive True or None"""
127
+ while start < target_batch:
128
+ sig = q.get(block=True)
129
+
130
+ if sig is None or sig is True:
131
+ return False
132
+ else:
133
+ start += 1
134
+ return True
88
135
 
89
136
  for idx, stage in enumerate(self.search_stage):
90
137
  target_batch = int(total_batch * stage)
91
- while q.get(block=True):
92
- batch += 1
93
- if batch >= target_batch:
94
- perc = int(stage * 100)
95
- log.info(f"Insert {perc}% done, total batch={total_batch}")
96
- log.info(f"[{batch}/{total_batch}] Serial search - {perc}% start")
97
- recall, ndcg, p99 =self.serial_search_runner.run()
98
-
99
- if idx < len(self.search_stage) - 1:
100
- stage_search_dur = (self.data_volume * (self.search_stage[idx + 1] - stage) // self.insert_rate) // len(self.concurrencies)
101
- if stage_search_dur < 30:
102
- log.warning(f"Search duration too short, please reduce concurrency count or insert rate, or increase dataset volume: dur={stage_search_dur}, concurrencies={len(self.concurrencies)}, insert_rate={self.insert_rate}")
103
- log.info(f"[{batch}/{total_batch}] Conc search - {perc}% start, dur for each conc={stage_search_dur}s")
104
- else:
105
- last_search_dur = self.data_volume * (1.0 - stage) // self.insert_rate
106
- stage_search_dur = last_search_dur + self.read_dur_after_write
107
- log.info(f"[{batch}/{total_batch}] Last conc search - {perc}% start, [read_until_write|read_after_write|total] =[{last_search_dur}s|{self.read_dur_after_write}s|{stage_search_dur}s]")
108
-
109
- max_qps = self.run_by_dur(stage_search_dur)
110
- res.append((perc, max_qps, recall))
111
- break
112
- return res
138
+ perc = int(stage * 100)
139
+
140
+ got = wait_next_target(start_batch, target_batch)
141
+ if got is False:
142
+ log.warning(f"Abnormal exit, target_batch={target_batch}, start_batch={start_batch}")
143
+ return
144
+
145
+ log.info(f"Insert {perc}% done, total batch={total_batch}")
146
+ log.info(f"[{target_batch}/{total_batch}] Serial search - {perc}% start")
147
+ res, ssearch_dur = self.serial_search_runner.run()
148
+ recall, ndcg, p99_latency = res
149
+ log.info(f"[{target_batch}/{total_batch}] Serial search - {perc}% done, recall={recall}, ndcg={ndcg}, p99={p99_latency}, dur={ssearch_dur:.4f}")
150
+
151
+ # Search duration for non-last search stage is carefully calculated.
152
+ # If duration for each concurrency is less than 30s, runner will raise error.
153
+ if idx < len(self.search_stage) - 1:
154
+ total_dur_between_stages = self.data_volume * (self.search_stage[idx + 1] - stage) // self.insert_rate
155
+ csearch_dur = total_dur_between_stages - ssearch_dur
156
+
157
+ # Try to leave room for init process executors
158
+ csearch_dur = csearch_dur - 30 if csearch_dur > 60 else csearch_dur
159
+
160
+ each_conc_search_dur = csearch_dur / len(self.concurrencies)
161
+ if each_conc_search_dur < 30:
162
+ warning_msg = f"Results might be inaccurate, duration[{csearch_dur:.4f}] left for conc-search is too short, total available dur={total_dur_between_stages}, serial_search_cost={ssearch_dur}."
163
+ log.warning(warning_msg)
164
+
165
+ # The last stage
166
+ else:
167
+ each_conc_search_dur = 60
168
+
169
+ log.info(f"[{target_batch}/{total_batch}] Concurrent search - {perc}% start, dur={each_conc_search_dur:.4f}")
170
+ max_qps = self.run_by_dur(each_conc_search_dur)
171
+ result.append((perc, max_qps, recall, ndcg, p99_latency))
172
+
173
+ start_batch = target_batch
174
+
175
+ # Drain the queue
176
+ while q.empty() is False:
177
+ q.get(block=True)
178
+ return result
@@ -167,7 +167,7 @@ class SerialSearchRunner:
167
167
  self.test_data = test_data
168
168
  self.ground_truth = ground_truth
169
169
 
170
- def search(self, args: tuple[list, pd.DataFrame]):
170
+ def search(self, args: tuple[list, pd.DataFrame]) -> tuple[float, float, float]:
171
171
  log.info(f"{mp.current_process().name:14} start search the entire test_data to get recall and latency")
172
172
  with self.db.init():
173
173
  test_data, ground_truth = args
@@ -224,5 +224,11 @@ class SerialSearchRunner:
224
224
  result = future.result()
225
225
  return result
226
226
 
227
- def run(self) -> tuple[float, float]:
227
+ @utils.time_it
228
+ def run(self) -> tuple[float, float, float]:
229
+ """
230
+ Returns:
231
+ tuple[tuple[float, float, float], float]: (avg_recall, avg_ndcg, p99_latency), cost
232
+
233
+ """
228
234
  return self._run_in_subprocess()
@@ -1,6 +1,4 @@
1
1
  import logging
2
- import concurrent
3
- from typing import Iterable
4
2
 
5
3
  from pandas import DataFrame
6
4
  import numpy as np
@@ -16,17 +14,3 @@ def get_data(data_df: DataFrame, normalize: bool) -> tuple[list[list[float]], li
16
14
  else:
17
15
  all_embeddings = emb_np.tolist()
18
16
  return all_embeddings, all_metadata
19
-
20
- def is_futures_completed(futures: Iterable[concurrent.futures.Future], interval) -> (Exception, bool):
21
- try:
22
- list(concurrent.futures.as_completed(futures, timeout=interval))
23
- except TimeoutError as e:
24
- return e, False
25
- return None, True
26
-
27
-
28
- def get_future_exceptions(futures: Iterable[concurrent.futures.Future]) -> BaseException | None:
29
- for f in futures:
30
- if f.exception() is not None:
31
- return f.exception()
32
- return
@@ -206,7 +206,7 @@ class CaseRunner(BaseModel):
206
206
  finally:
207
207
  runner = None
208
208
 
209
- def _serial_search(self) -> tuple[float, float]:
209
+ def _serial_search(self) -> tuple[float, float, float]:
210
210
  """Performance serial tests, search the entire test data once,
211
211
  calculate the recall, serial_latency_p99
212
212
 
@@ -214,7 +214,8 @@ class CaseRunner(BaseModel):
214
214
  tuple[float, float]: recall, serial_latency_p99
215
215
  """
216
216
  try:
217
- return self.serial_search_runner.run()
217
+ results, _ = self.serial_search_runner.run()
218
+ return results
218
219
  except Exception as e:
219
220
  log.warning(f"search error: {str(e)}, {e}")
220
221
  self.stop()
@@ -238,7 +239,7 @@ class CaseRunner(BaseModel):
238
239
  @utils.time_it
239
240
  def _task(self) -> None:
240
241
  with self.db.init():
241
- self.db.optimize()
242
+ self.db.optimize_with_size(data_size=self.ca.dataset.data.size)
242
243
 
243
244
  def _optimize(self) -> float:
244
245
  with concurrent.futures.ProcessPoolExecutor(max_workers=1) as executor:
@@ -35,6 +35,7 @@ def numerize(n) -> str:
35
35
 
36
36
 
37
37
  def time_it(func):
38
+ """ returns result and elapsed time"""
38
39
  @wraps(func)
39
40
  def inner(*args, **kwargs):
40
41
  pref = time.perf_counter()
@@ -437,6 +437,16 @@ CaseConfigParamInput_EF_SEARCH_AWSOpensearch = CaseConfigInput(
437
437
  },
438
438
  )
439
439
 
440
+ CaseConfigParamInput_EF_SEARCH_AliyunOpensearch = CaseConfigInput(
441
+ label=CaseConfigParamType.ef_search,
442
+ inputType=InputType.Number,
443
+ inputConfig={
444
+ "min": 1,
445
+ "max": 1000000,
446
+ "value": 40,
447
+ },
448
+ )
449
+
440
450
 
441
451
  CaseConfigParamInput_maintenance_work_mem_PgVector = CaseConfigInput(
442
452
  label=CaseConfigParamType.maintenance_work_mem,
@@ -1040,6 +1050,35 @@ CaseConfigParamInput_max_parallel_workers_AlloyDB = CaseConfigInput(
1040
1050
  },
1041
1051
  )
1042
1052
 
1053
+ CaseConfigParamInput_EFConstruction_AliES = CaseConfigInput(
1054
+ label=CaseConfigParamType.EFConstruction,
1055
+ inputType=InputType.Number,
1056
+ inputConfig={
1057
+ "min": 8,
1058
+ "max": 512,
1059
+ "value": 360,
1060
+ },
1061
+ )
1062
+
1063
+ CaseConfigParamInput_M_AliES = CaseConfigInput(
1064
+ label=CaseConfigParamType.M,
1065
+ inputType=InputType.Number,
1066
+ inputConfig={
1067
+ "min": 4,
1068
+ "max": 64,
1069
+ "value": 30,
1070
+ },
1071
+ )
1072
+ CaseConfigParamInput_NumCandidates_AliES = CaseConfigInput(
1073
+ label=CaseConfigParamType.numCandidates,
1074
+ inputType=InputType.Number,
1075
+ inputConfig={
1076
+ "min": 1,
1077
+ "max": 10000,
1078
+ "value": 100,
1079
+ },
1080
+ )
1081
+
1043
1082
 
1044
1083
  MilvusLoadConfig = [
1045
1084
  CaseConfigParamInput_IndexType,
@@ -1099,6 +1138,11 @@ AWSOpenSearchPerformanceConfig = [
1099
1138
  CaseConfigParamInput_EF_SEARCH_AWSOpensearch,
1100
1139
  ]
1101
1140
 
1141
+ AliyunOpensearchLoadingConfig = []
1142
+ AliyunOpenSearchPerformanceConfig = [
1143
+ CaseConfigParamInput_EF_SEARCH_AliyunOpensearch,
1144
+ ]
1145
+
1102
1146
  PgVectorLoadingConfig = [
1103
1147
  CaseConfigParamInput_IndexType_PgVector,
1104
1148
  CaseConfigParamInput_Lists_PgVector,
@@ -1206,6 +1250,12 @@ AlloyDBPerformanceConfig = [
1206
1250
  CaseConfigParamInput_max_parallel_workers_AlloyDB,
1207
1251
  ]
1208
1252
 
1253
+ AliyunElasticsearchLoadingConfig = [CaseConfigParamInput_EFConstruction_AliES, CaseConfigParamInput_M_AliES]
1254
+ AliyunElasticsearchPerformanceConfig = [
1255
+ CaseConfigParamInput_EFConstruction_AliES,
1256
+ CaseConfigParamInput_M_AliES,
1257
+ CaseConfigParamInput_NumCandidates_AliES,
1258
+ ]
1209
1259
 
1210
1260
  CASE_CONFIG_MAP = {
1211
1261
  DB.Milvus: {
@@ -1247,4 +1297,12 @@ CASE_CONFIG_MAP = {
1247
1297
  CaseLabel.Load: AlloyDBLoadConfig,
1248
1298
  CaseLabel.Performance: AlloyDBPerformanceConfig,
1249
1299
  },
1300
+ DB.AliyunElasticsearch: {
1301
+ CaseLabel.Load: AliyunElasticsearchLoadingConfig,
1302
+ CaseLabel.Performance: AliyunElasticsearchPerformanceConfig,
1303
+ },
1304
+ DB.AliyunOpenSearch: {
1305
+ CaseLabel.Load: AliyunOpensearchLoadingConfig,
1306
+ CaseLabel.Performance: AliyunOpenSearchPerformanceConfig,
1307
+ },
1250
1308
  }
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vectordb-bench
3
- Version: 0.0.18
3
+ Version: 0.0.19
4
4
  Summary: VectorDBBench is not just an offering of benchmark results for mainstream vector databases and cloud services, it's your go-to tool for the ultimate performance and cost-effectiveness comparison. Designed with ease-of-use in mind, VectorDBBench is devised to help users, even non-professionals, reproduce results or test new systems, making the hunt for the optimal choice amongst a plethora of cloud services and open-source vector databases a breeze.
5
5
  Author-email: XuanYang-cn <xuan.yang@zilliz.com>
6
6
  Project-URL: repository, https://github.com/zilliztech/VectorDBBench
@@ -35,15 +35,16 @@ Requires-Dist: qdrant-client; extra == "all"
35
35
  Requires-Dist: pinecone-client; extra == "all"
36
36
  Requires-Dist: weaviate-client; extra == "all"
37
37
  Requires-Dist: elasticsearch; extra == "all"
38
- Requires-Dist: pgvector; extra == "all"
39
- Requires-Dist: pgvecto_rs[psycopg3]>=0.2.2; extra == "all"
40
38
  Requires-Dist: sqlalchemy; extra == "all"
41
39
  Requires-Dist: redis; extra == "all"
42
40
  Requires-Dist: chromadb; extra == "all"
41
+ Requires-Dist: pgvector; extra == "all"
43
42
  Requires-Dist: psycopg; extra == "all"
44
43
  Requires-Dist: psycopg-binary; extra == "all"
45
- Requires-Dist: opensearch-dsl==2.1.0; extra == "all"
46
- Requires-Dist: opensearch-py==2.6.0; extra == "all"
44
+ Requires-Dist: pgvecto_rs[psycopg3]>=0.2.2; extra == "all"
45
+ Requires-Dist: opensearch-dsl; extra == "all"
46
+ Requires-Dist: opensearch-py; extra == "all"
47
+ Requires-Dist: memorydb; extra == "all"
47
48
  Provides-Extra: qdrant
48
49
  Requires-Dist: qdrant-client; extra == "qdrant"
49
50
  Provides-Extra: pinecone
@@ -56,18 +57,6 @@ Provides-Extra: pgvector
56
57
  Requires-Dist: psycopg; extra == "pgvector"
57
58
  Requires-Dist: psycopg-binary; extra == "pgvector"
58
59
  Requires-Dist: pgvector; extra == "pgvector"
59
- Provides-Extra: pgvectorscale
60
- Requires-Dist: psycopg; extra == "pgvectorscale"
61
- Requires-Dist: psycopg-binary; extra == "pgvectorscale"
62
- Requires-Dist: pgvector; extra == "pgvectorscale"
63
- Provides-Extra: pgdiskann
64
- Requires-Dist: psycopg; extra == "pgdiskann"
65
- Requires-Dist: psycopg-binary; extra == "pgdiskann"
66
- Requires-Dist: pgvector; extra == "pgdiskann"
67
- Provides-Extra: alloydb
68
- Requires-Dist: psycopg; extra == "alloydb"
69
- Requires-Dist: psycopg-binary; extra == "alloydb"
70
- Requires-Dist: pgvector; extra == "alloydb"
71
60
  Provides-Extra: pgvecto-rs
72
61
  Requires-Dist: pgvecto_rs[psycopg3]>=0.2.2; extra == "pgvecto-rs"
73
62
  Provides-Extra: redis
@@ -76,9 +65,8 @@ Provides-Extra: memorydb
76
65
  Requires-Dist: memorydb; extra == "memorydb"
77
66
  Provides-Extra: chromadb
78
67
  Requires-Dist: chromadb; extra == "chromadb"
79
- Provides-Extra: awsopensearch
80
- Requires-Dist: awsopensearch; extra == "awsopensearch"
81
- Provides-Extra: zilliz-cloud
68
+ Provides-Extra: opensearch
69
+ Requires-Dist: opensearch-py; extra == "opensearch"
82
70
 
83
71
  # VectorDBBench: A Benchmark Tool for VectorDB
84
72
 
@@ -111,21 +99,18 @@ All the database client supported
111
99
 
112
100
  | Optional database client | install command |
113
101
  |--------------------------|---------------------------------------------|
114
- | pymilvus(*default*) | `pip install vectordb-bench` |
115
- | all | `pip install vectordb-bench[all]` |
102
+ | pymilvus, zilliz_cloud (*default*) | `pip install vectordb-bench` |
103
+ | all (*clients requirements might be conflict with each other*) | `pip install vectordb-bench[all]` |
116
104
  | qdrant | `pip install vectordb-bench[qdrant]` |
117
105
  | pinecone | `pip install vectordb-bench[pinecone]` |
118
106
  | weaviate | `pip install vectordb-bench[weaviate]` |
119
- | elastic | `pip install vectordb-bench[elastic]` |
120
- | pgvector | `pip install vectordb-bench[pgvector]` |
107
+ | elastic, aliyun_elasticsearch| `pip install vectordb-bench[elastic]` |
108
+ | pgvector, pgvectorscale, pgdiskann, alloydb | `pip install vectordb-bench[pgvector]` |
121
109
  | pgvecto.rs | `pip install vectordb-bench[pgvecto_rs]` |
122
- | pgvectorscale | `pip install vectordb-bench[pgvectorscale]` |
123
- | pgdiskann | `pip install vectordb-bench[pgdiskann]` |
124
110
  | redis | `pip install vectordb-bench[redis]` |
125
111
  | memorydb | `pip install vectordb-bench[memorydb]` |
126
112
  | chromadb | `pip install vectordb-bench[chromadb]` |
127
- | awsopensearch | `pip install vectordb-bench[awsopensearch]` |
128
- | alloydb | `pip install vectordb-bench[alloydb]` |
113
+ | awsopensearch | `pip install vectordb-bench[opensearch]` |
129
114
 
130
115
  ### Run
131
116
 
@@ -7,16 +7,20 @@ vectordb_bench/metric.py,sha256=c-LAxCtb55txnsfd3FN4gRpRif8RREhKRF0eg2JmHGc,2045
7
7
  vectordb_bench/models.py,sha256=_0_hscKUqaCHjdjyO_-ntPFgJvgU01y8aldqDcq9ELQ,11041
8
8
  vectordb_bench/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
9
  vectordb_bench/backend/assembler.py,sha256=mmoLzWXFSlrpWvaVY41wiRNWNv2IR-LzlANX55MJbYI,2028
10
- vectordb_bench/backend/cases.py,sha256=lQ9jgKaJGunj-mJXR3cgGt16wCsrDrvs-GS3ycTDk0U,16169
10
+ vectordb_bench/backend/cases.py,sha256=tYAXs-8WhkXVkSfUGd5zh51IxKTojBkCgp94eU6Dbwg,16193
11
11
  vectordb_bench/backend/data_source.py,sha256=j4-eD0nIe7Y6fSM5WKEij3GfhyU_YOQ3L5Tyl-1GxX0,5446
12
12
  vectordb_bench/backend/dataset.py,sha256=MZSu0Q3AkK9gxiuLKNTMH6hhucKK668j4G1-8emhS18,8786
13
13
  vectordb_bench/backend/result_collector.py,sha256=jdQf5-q1z5y07SKy9Sig1wFROmm-p9x_Y81fId0sjaU,807
14
- vectordb_bench/backend/task_runner.py,sha256=cn_RRDyFfNSLlTT84W-ZaXvdl54pK6Cxcsp9ucNRcCs,11864
15
- vectordb_bench/backend/utils.py,sha256=2UixYyfKvl8zRiashywB1l6hTI3jMtiZhiVm_bXHV1Y,1811
16
- vectordb_bench/backend/clients/__init__.py,sha256=JKDRKwfDDZLZFct_wahtSEpqu00HVZBNsVyhJ_hHTL8,6701
17
- vectordb_bench/backend/clients/api.py,sha256=phvqTME3NEPyZGTo85MPeOWwICZO06W3388GT5g72pc,6210
14
+ vectordb_bench/backend/task_runner.py,sha256=ZSWlp5JhVpKTpbKQS3vsq3e5ZtOjz_ET-m4X3Euiicg,11949
15
+ vectordb_bench/backend/utils.py,sha256=8SXcSrw7kmmzeN1cSUwXRSc1BK5dgrj98kALqbrcUNE,1854
16
+ vectordb_bench/backend/clients/__init__.py,sha256=zr3VRNDm7wy8eREMHE4W1LODCZaXhOvt1qCaKQoXyys,7763
17
+ vectordb_bench/backend/clients/api.py,sha256=5OCh_HUWQxE8KCXn9wQMc6jv1TEVnsip02-eUYFoNCo,6285
18
+ vectordb_bench/backend/clients/aliyun_elasticsearch/aliyun_elasticsearch.py,sha256=3bAMrNjhAcWXCSvoW4YT8siY30r4NWqOs9Osv7bgDpo,771
19
+ vectordb_bench/backend/clients/aliyun_elasticsearch/config.py,sha256=FeglLLFvUrjeCOE3vdB5Sg98fufk7lW1NsOHdLTWFsc,564
20
+ vectordb_bench/backend/clients/aliyun_opensearch/aliyun_opensearch.py,sha256=dWEBroRs-qm17vjUZWIrHOHrLswn6m-NzoMmY1pU9M4,12673
21
+ vectordb_bench/backend/clients/aliyun_opensearch/config.py,sha256=HvBQAENG5Rfiv6VwbvNh9AdyUXm5YTtqa6FdHyPpllc,1311
18
22
  vectordb_bench/backend/clients/alloydb/alloydb.py,sha256=rAV558tyd2hX3jcl3bRcxOkeq__GSAXLxfl3MqkAVkM,13375
19
- vectordb_bench/backend/clients/alloydb/cli.py,sha256=5g3heAEfuwIHCUIHDU4LYQq-CaQto0sGAdr45jdhuNc,4970
23
+ vectordb_bench/backend/clients/alloydb/cli.py,sha256=IoRG0A5O0JIDNpKYoFFAr1czz4QZNihUHx4d1QFA7eQ,4974
20
24
  vectordb_bench/backend/clients/alloydb/config.py,sha256=JFQMHvBWG1P5T4N7B95o4tMfN4cVqb01I5TNvjDYQuw,5358
21
25
  vectordb_bench/backend/clients/aws_opensearch/aws_opensearch.py,sha256=O42OU7K7L0KcJ96AphjmgkyN7a220VLsOL0QwghY2aw,8038
22
26
  vectordb_bench/backend/clients/aws_opensearch/cli.py,sha256=v1bGoovgokhIGN5tZwb_MrP4af7BfXYQaOpDuy0Ibh0,1327
@@ -29,9 +33,9 @@ vectordb_bench/backend/clients/elastic_cloud/elastic_cloud.py,sha256=rWHthqGEpYw
29
33
  vectordb_bench/backend/clients/memorydb/cli.py,sha256=BqU5s1CnLCXeHnSOEpQBON8wWMngeLjvnf9-UQqU9cU,2624
30
34
  vectordb_bench/backend/clients/memorydb/config.py,sha256=PjhLMMr_LdJ8O91JpHNCCT6HMEGLwH9r_erUMGJEVaI,1501
31
35
  vectordb_bench/backend/clients/memorydb/memorydb.py,sha256=XIqtXpY-2lJohIuImFDsRO3c_upn04eCplIOlaLxFo4,10114
32
- vectordb_bench/backend/clients/milvus/cli.py,sha256=QqzYIOeUSXEvdLH0_YUMhwDHUDJirTNKeUxrJQIqSdw,8506
33
- vectordb_bench/backend/clients/milvus/config.py,sha256=AZ4QHoufRIjsX2eVrtnug8SeYnuHeBMna_34OQNFxz0,6847
34
- vectordb_bench/backend/clients/milvus/milvus.py,sha256=7l2ilpZeCVnXLkWjut2EoIhUraYL7qWfSsBrRaq4vuo,7700
36
+ vectordb_bench/backend/clients/milvus/cli.py,sha256=n3VlULuQQTxDZNN6NJJl3JRzzVfuyJ_AZphl0aoa1Wo,9690
37
+ vectordb_bench/backend/clients/milvus/config.py,sha256=sol9VCAfbrQI6FKSH2RRZh3JFreSzvkGmUVfpSnBSEw,7425
38
+ vectordb_bench/backend/clients/milvus/milvus.py,sha256=fA_A0HkVWZRiGOflmcjw6g1RzF3NDjhfhQc3vBafpEQ,7579
35
39
  vectordb_bench/backend/clients/pgdiskann/cli.py,sha256=ued1DyufltataIk6KcmBkNp8PdB9Aj65nVJ6WhrD_VI,3130
36
40
  vectordb_bench/backend/clients/pgdiskann/config.py,sha256=8E0GLgUxa5LlJ_eXCugbbO08qdbCVqc1wtdsoOsKEW4,4444
37
41
  vectordb_bench/backend/clients/pgdiskann/pgdiskann.py,sha256=bEcbpTVSFxRJ5HiJTX77cgu6NqTMPs8qiGeMF7jBC30,12628
@@ -62,10 +66,10 @@ vectordb_bench/backend/clients/zilliz_cloud/config.py,sha256=3Tk7X4r0n2SLzan110x
62
66
  vectordb_bench/backend/clients/zilliz_cloud/zilliz_cloud.py,sha256=4JcwiVEJcdEykW6n471nfHeIlmhIDa-gOZ7G5H_4krY,681
63
67
  vectordb_bench/backend/runner/__init__.py,sha256=5dZfPky8pY9Bi9HD5GZ3Fge8V2FJWrkGkQUkNL2v1t0,230
64
68
  vectordb_bench/backend/runner/mp_runner.py,sha256=sPJJWg6bKSQYsyWEe5y_j8i_Cf9l5buhtyY-wZxXDAI,9080
65
- vectordb_bench/backend/runner/rate_runner.py,sha256=qLfirLmS9tR0-3jljaWD_AMw_gt6nwhAVVkxhoo4F4A,3195
66
- vectordb_bench/backend/runner/read_write_runner.py,sha256=B8PD_gRS5K1nFH5004x6ON1Z8TulK7c4QepW3Glltd8,4732
67
- vectordb_bench/backend/runner/serial_runner.py,sha256=ku1Dtps9JcmwCwZq7eDw0pcP9IN2Zjjg-1VJumXYJpA,9414
68
- vectordb_bench/backend/runner/util.py,sha256=pGJn-qXWwGXVlmsMulaqH0zXcasDWjsVwwOJeDFWXhc,1032
69
+ vectordb_bench/backend/runner/rate_runner.py,sha256=UlUOvbvicizNSn7TC0Pf4ni1Z_gaHBuYsu-kiBv4Mec,3963
70
+ vectordb_bench/backend/runner/read_write_runner.py,sha256=HBCzoA2UU8x7LHj7R27EoQgd8RuRIAj5xuAmP8fslkU,7427
71
+ vectordb_bench/backend/runner/serial_runner.py,sha256=7ACSCyCmRRXiD-SIZbS_sl3JhEySdXZ-r7uNFhhji8I,9608
72
+ vectordb_bench/backend/runner/util.py,sha256=C-aELWWsooLG3TlBFd2PGqnQ6Z0_j78_nNk2WCLJyPs,521
69
73
  vectordb_bench/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
70
74
  vectordb_bench/cli/cli.py,sha256=Z2-vLwvnnZFsVAPyjFK557cZZYWX_q60XVJP-aYUGdc,15416
71
75
  vectordb_bench/cli/vectordbbench.py,sha256=jbpyjh4xKVRocxg4XurLL3ABUzBRXEChRGYhyqH4ItE,1140
@@ -97,7 +101,7 @@ vectordb_bench/frontend/components/run_test/hideSidebar.py,sha256=vb5kzIMmbMqWX6
97
101
  vectordb_bench/frontend/components/run_test/initStyle.py,sha256=osPUgfFfH7rRlVNHSMumvmZxvKWlLxmZiNqgnMiUJEU,723
98
102
  vectordb_bench/frontend/components/run_test/submitTask.py,sha256=NCEXfR3xudAncjVEvsV2iaiov5AatGObe830UI6481M,3341
99
103
  vectordb_bench/frontend/components/tables/data.py,sha256=pVG_hb4bTMLfUt10NUCJSqcFkPmnN7i9jTw9DcWizpI,1364
100
- vectordb_bench/frontend/config/dbCaseConfigs.py,sha256=40rJ3YtRkPbZBBlWnV7csXDArqrMVaBw6tLMtxzj3w4,36311
104
+ vectordb_bench/frontend/config/dbCaseConfigs.py,sha256=SpTfVtAlLsV9zWrg8zxeGCvLNGlCITWKc7xPN7dPUP8,37921
101
105
  vectordb_bench/frontend/config/dbPrices.py,sha256=10aBKjVcEg8y7TPSda28opmBM1KmXNrvbU9WM_BsZcE,176
102
106
  vectordb_bench/frontend/config/styles.py,sha256=E2PmwmiewxBKJJ59hQ4ZXatqg8QTN-Z53JlsvWMHM2M,2291
103
107
  vectordb_bench/frontend/pages/concurrent.py,sha256=z2izkQ0suO5mZ8PpVY2jypZkF5VT8xUkQQEkwd6C-ww,2094
@@ -123,9 +127,9 @@ vectordb_bench/results/WeaviateCloud/result_20230808_standard_weaviatecloud.json
123
127
  vectordb_bench/results/ZillizCloud/result_20230727_standard_zillizcloud.json,sha256=-Mdm4By65XDRCrmVOCF8yQXjcZtH4Xo4shcjoDoBUKU,18293
124
128
  vectordb_bench/results/ZillizCloud/result_20230808_standard_zillizcloud.json,sha256=77XlHT5zM_K7mG5HfDQKwXZnSCuR37VUbt6-P3J_amI,15737
125
129
  vectordb_bench/results/ZillizCloud/result_20240105_standard_202401_zillizcloud.json,sha256=TualfJ0664Hs-vdIW68bdkqAEYyzotXmu2P0yIN-GHk,42526
126
- vectordb_bench-0.0.18.dist-info/LICENSE,sha256=HXbxhrb5u5SegVzeLNF_voVgRsJMavcLaOmD1N0lZkM,1067
127
- vectordb_bench-0.0.18.dist-info/METADATA,sha256=PtTRr7G1PXhzA88gZYML3Y6JPaqzXk_ejeFbvoRVMOQ,34883
128
- vectordb_bench-0.0.18.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
129
- vectordb_bench-0.0.18.dist-info/entry_points.txt,sha256=Qzw6gVx96ui8esG21H6yHsI6nboEohRmV424TYhQNrA,113
130
- vectordb_bench-0.0.18.dist-info/top_level.txt,sha256=jnhZFZAuKX1J60yt-XOeBZ__ctiZMvoC_s0RFq29lpM,15
131
- vectordb_bench-0.0.18.dist-info/RECORD,,
130
+ vectordb_bench-0.0.19.dist-info/LICENSE,sha256=HXbxhrb5u5SegVzeLNF_voVgRsJMavcLaOmD1N0lZkM,1067
131
+ vectordb_bench-0.0.19.dist-info/METADATA,sha256=Og3-S_xabqf7EyRkgLgr7fE7_nTOyMfwG6BTt-aDJIw,34206
132
+ vectordb_bench-0.0.19.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
133
+ vectordb_bench-0.0.19.dist-info/entry_points.txt,sha256=Qzw6gVx96ui8esG21H6yHsI6nboEohRmV424TYhQNrA,113
134
+ vectordb_bench-0.0.19.dist-info/top_level.txt,sha256=jnhZFZAuKX1J60yt-XOeBZ__ctiZMvoC_s0RFq29lpM,15
135
+ vectordb_bench-0.0.19.dist-info/RECORD,,