vectordb-bench 0.0.20__py3-none-any.whl → 0.0.21__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. vectordb_bench/backend/assembler.py +2 -2
  2. vectordb_bench/backend/clients/__init__.py +12 -2
  3. vectordb_bench/backend/clients/aliyun_opensearch/aliyun_opensearch.py +1 -7
  4. vectordb_bench/backend/clients/alloydb/alloydb.py +1 -4
  5. vectordb_bench/backend/clients/api.py +8 -15
  6. vectordb_bench/backend/clients/aws_opensearch/aws_opensearch.py +4 -7
  7. vectordb_bench/backend/clients/chroma/chroma.py +1 -4
  8. vectordb_bench/backend/clients/elastic_cloud/elastic_cloud.py +1 -4
  9. vectordb_bench/backend/clients/memorydb/cli.py +2 -2
  10. vectordb_bench/backend/clients/memorydb/memorydb.py +2 -5
  11. vectordb_bench/backend/clients/milvus/milvus.py +1 -20
  12. vectordb_bench/backend/clients/pgdiskann/pgdiskann.py +1 -4
  13. vectordb_bench/backend/clients/pgvecto_rs/pgvecto_rs.py +3 -11
  14. vectordb_bench/backend/clients/pgvector/pgvector.py +2 -7
  15. vectordb_bench/backend/clients/pgvectorscale/pgvectorscale.py +2 -7
  16. vectordb_bench/backend/clients/pinecone/pinecone.py +1 -4
  17. vectordb_bench/backend/clients/qdrant_cloud/qdrant_cloud.py +3 -6
  18. vectordb_bench/backend/clients/redis/redis.py +1 -4
  19. vectordb_bench/backend/clients/test/cli.py +1 -1
  20. vectordb_bench/backend/clients/test/test.py +1 -4
  21. vectordb_bench/backend/clients/weaviate_cloud/weaviate_cloud.py +1 -4
  22. vectordb_bench/backend/data_source.py +4 -12
  23. vectordb_bench/backend/runner/mp_runner.py +16 -34
  24. vectordb_bench/backend/runner/rate_runner.py +4 -4
  25. vectordb_bench/backend/runner/read_write_runner.py +11 -15
  26. vectordb_bench/backend/runner/serial_runner.py +20 -28
  27. vectordb_bench/backend/task_runner.py +6 -26
  28. vectordb_bench/interface.py +10 -19
  29. {vectordb_bench-0.0.20.dist-info → vectordb_bench-0.0.21.dist-info}/METADATA +1 -1
  30. {vectordb_bench-0.0.20.dist-info → vectordb_bench-0.0.21.dist-info}/RECORD +34 -34
  31. {vectordb_bench-0.0.20.dist-info → vectordb_bench-0.0.21.dist-info}/LICENSE +0 -0
  32. {vectordb_bench-0.0.20.dist-info → vectordb_bench-0.0.21.dist-info}/WHEEL +0 -0
  33. {vectordb_bench-0.0.20.dist-info → vectordb_bench-0.0.21.dist-info}/entry_points.txt +0 -0
  34. {vectordb_bench-0.0.20.dist-info → vectordb_bench-0.0.21.dist-info}/top_level.txt +0 -0
@@ -53,8 +53,8 @@ class Assembler:
53
53
  _ = k.init_cls
54
54
 
55
55
  # sort by dataset size
56
- for k, _ in db2runner:
57
- db2runner[k].sort(key=lambda x: x.ca.dataset.data.size)
56
+ for _, runner in db2runner.items():
57
+ runner.sort(key=lambda x: x.ca.dataset.data.size)
58
58
 
59
59
  all_runners = []
60
60
  all_runners.extend(load_runners)
@@ -42,7 +42,7 @@ class DB(Enum):
42
42
  AliyunOpenSearch = "AliyunOpenSearch"
43
43
 
44
44
  @property
45
- def init_cls(self) -> type[VectorDB]: # noqa: PLR0911, PLR0912
45
+ def init_cls(self) -> type[VectorDB]: # noqa: PLR0911, PLR0912, C901
46
46
  """Import while in use"""
47
47
  if self == DB.Milvus:
48
48
  from .milvus.milvus import Milvus
@@ -129,11 +129,16 @@ class DB(Enum):
129
129
 
130
130
  return AliyunOpenSearch
131
131
 
132
+ if self == DB.Test:
133
+ from .test.test import Test
134
+
135
+ return Test
136
+
132
137
  msg = f"Unknown DB: {self.name}"
133
138
  raise ValueError(msg)
134
139
 
135
140
  @property
136
- def config_cls(self) -> type[DBConfig]: # noqa: PLR0911, PLR0912
141
+ def config_cls(self) -> type[DBConfig]: # noqa: PLR0911, PLR0912, C901
137
142
  """Import while in use"""
138
143
  if self == DB.Milvus:
139
144
  from .milvus.config import MilvusConfig
@@ -220,6 +225,11 @@ class DB(Enum):
220
225
 
221
226
  return AliyunOpenSearchConfig
222
227
 
228
+ if self == DB.Test:
229
+ from .test.config import TestConfig
230
+
231
+ return TestConfig
232
+
223
233
  msg = f"Unknown DB: {self.name}"
224
234
  raise ValueError(msg)
225
235
 
@@ -325,10 +325,7 @@ class AliyunOpenSearch(VectorDB):
325
325
 
326
326
  return False
327
327
 
328
- def optimize(self):
329
- pass
330
-
331
- def optimize_with_size(self, data_size: int):
328
+ def optimize(self, data_size: int):
332
329
  log.info(f"optimize count: {data_size}")
333
330
  retry_times = 0
334
331
  while True:
@@ -340,6 +337,3 @@ class AliyunOpenSearch(VectorDB):
340
337
  if total_count == data_size:
341
338
  log.info("optimize table finish.")
342
339
  return
343
-
344
- def ready_to_load(self):
345
- """ready_to_load will be called before load in load cases."""
@@ -149,10 +149,7 @@ class AlloyDB(VectorDB):
149
149
  )
150
150
  self.conn.commit()
151
151
 
152
- def ready_to_load(self):
153
- pass
154
-
155
- def optimize(self):
152
+ def optimize(self, data_size: int | None = None):
156
153
  self._post_insert()
157
154
 
158
155
  def _post_insert(self):
@@ -137,6 +137,13 @@ class VectorDB(ABC):
137
137
  @contextmanager
138
138
  def init(self) -> None:
139
139
  """create and destory connections to database.
140
+ Why contextmanager:
141
+
142
+ In multiprocessing search tasks, vectordbbench might init
143
+ totally hundreds of thousands of connections with DB server.
144
+
145
+ Too many connections may drain local FDs or server connection resources.
146
+ If the DB client doesn't have `close()` method, just set the object to None.
140
147
 
141
148
  Examples:
142
149
  >>> with self.init():
@@ -187,9 +194,8 @@ class VectorDB(ABC):
187
194
  """
188
195
  raise NotImplementedError
189
196
 
190
- # TODO: remove
191
197
  @abstractmethod
192
- def optimize(self):
198
+ def optimize(self, data_size: int | None = None):
193
199
  """optimize will be called between insertion and search in performance cases.
194
200
 
195
201
  Should be blocked until the vectorDB is ready to be tested on
@@ -199,16 +205,3 @@ class VectorDB(ABC):
199
205
  Optimize's execution time is limited, the limited time is based on cases.
200
206
  """
201
207
  raise NotImplementedError
202
-
203
- def optimize_with_size(self, data_size: int):
204
- self.optimize()
205
-
206
- # TODO: remove
207
- @abstractmethod
208
- def ready_to_load(self):
209
- """ready_to_load will be called before load in load cases.
210
-
211
- Should be blocked until the vectorDB is ready to be tested on
212
- heavy load cases.
213
- """
214
- raise NotImplementedError
@@ -145,15 +145,15 @@ class AWSOpenSearch(VectorDB):
145
145
  docvalue_fields=[self.id_col_name],
146
146
  stored_fields="_none_",
147
147
  )
148
- log.info(f'Search took: {resp["took"]}')
149
- log.info(f'Search shards: {resp["_shards"]}')
150
- log.info(f'Search hits total: {resp["hits"]["total"]}')
148
+ log.info(f"Search took: {resp['took']}")
149
+ log.info(f"Search shards: {resp['_shards']}")
150
+ log.info(f"Search hits total: {resp['hits']['total']}")
151
151
  return [int(h["fields"][self.id_col_name][0]) for h in resp["hits"]["hits"]]
152
152
  except Exception as e:
153
153
  log.warning(f"Failed to search: {self.index_name} error: {e!s}")
154
154
  raise e from None
155
155
 
156
- def optimize(self):
156
+ def optimize(self, data_size: int | None = None):
157
157
  """optimize will be called between insertion and search in performance cases."""
158
158
  # Call refresh first to ensure that all segments are created
159
159
  self._refresh_index()
@@ -194,6 +194,3 @@ class AWSOpenSearch(VectorDB):
194
194
  log.info("Calling warmup API to load graphs into memory")
195
195
  warmup_endpoint = f"/_plugins/_knn/warmup/{self.index_name}"
196
196
  self.client.transport.perform_request("GET", warmup_endpoint)
197
-
198
- def ready_to_load(self):
199
- """ready_to_load will be called before load in load cases."""
@@ -57,10 +57,7 @@ class ChromaClient(VectorDB):
57
57
  def ready_to_search(self) -> bool:
58
58
  pass
59
59
 
60
- def ready_to_load(self) -> bool:
61
- pass
62
-
63
- def optimize(self) -> None:
60
+ def optimize(self, data_size: int | None = None):
64
61
  pass
65
62
 
66
63
  def insert_embeddings(
@@ -143,7 +143,7 @@ class ElasticCloud(VectorDB):
143
143
  log.warning(f"Failed to search: {self.indice} error: {e!s}")
144
144
  raise e from None
145
145
 
146
- def optimize(self):
146
+ def optimize(self, data_size: int | None = None):
147
147
  """optimize will be called between insertion and search in performance cases."""
148
148
  assert self.client is not None, "should self.init() first"
149
149
  self.client.indices.refresh(index=self.indice)
@@ -158,6 +158,3 @@ class ElasticCloud(VectorDB):
158
158
  task_status = self.client.tasks.get(task_id=force_merge_task_id)
159
159
  if task_status["completed"]:
160
160
  return
161
-
162
- def ready_to_load(self):
163
- """ready_to_load will be called before load in load cases."""
@@ -43,8 +43,8 @@ class MemoryDBTypedDict(TypedDict):
43
43
  show_default=True,
44
44
  default=False,
45
45
  help=(
46
- "Cluster Mode Disabled (CMD), use this flag when testing locally on a single node instance.",
47
- " In production, MemoryDB only supports cluster mode (CME)",
46
+ "Cluster Mode Disabled (CMD), use this flag when testing locally on a single node instance."
47
+ " In production, MemoryDB only supports cluster mode (CME)"
48
48
  ),
49
49
  ),
50
50
  ]
@@ -157,17 +157,14 @@ class MemoryDB(VectorDB):
157
157
  self.conn = self.get_client()
158
158
  search_param = self.case_config.search_param()
159
159
  if search_param["ef_runtime"]:
160
- self.ef_runtime_str = f'EF_RUNTIME {search_param["ef_runtime"]}'
160
+ self.ef_runtime_str = f"EF_RUNTIME {search_param['ef_runtime']}"
161
161
  else:
162
162
  self.ef_runtime_str = ""
163
163
  yield
164
164
  self.conn.close()
165
165
  self.conn = None
166
166
 
167
- def ready_to_load(self) -> bool:
168
- pass
169
-
170
- def optimize(self) -> None:
167
+ def optimize(self, data_size: int | None = None):
171
168
  self._post_insert()
172
169
 
173
170
  def insert_embeddings(
@@ -138,26 +138,7 @@ class Milvus(VectorDB):
138
138
  log.warning(f"{self.name} optimize error: {e}")
139
139
  raise e from None
140
140
 
141
- def ready_to_load(self):
142
- assert self.col, "Please call self.init() before"
143
- self._pre_load(self.col)
144
-
145
- def _pre_load(self, coll: Collection):
146
- try:
147
- if not coll.has_index(index_name=self._index_name):
148
- log.info(f"{self.name} create index")
149
- coll.create_index(
150
- self._vector_field,
151
- self.case_config.index_param(),
152
- index_name=self._index_name,
153
- )
154
- coll.load()
155
- log.info(f"{self.name} load")
156
- except Exception as e:
157
- log.warning(f"{self.name} pre load error: {e}")
158
- raise e from None
159
-
160
- def optimize(self):
141
+ def optimize(self, data_size: int | None = None):
161
142
  assert self.col, "Please call self.init() before"
162
143
  self._optimize()
163
144
 
@@ -143,10 +143,7 @@ class PgDiskANN(VectorDB):
143
143
  )
144
144
  self.conn.commit()
145
145
 
146
- def ready_to_load(self):
147
- pass
148
-
149
- def optimize(self):
146
+ def optimize(self, data_size: int | None = None):
150
147
  self._post_insert()
151
148
 
152
149
  def _post_insert(self):
@@ -153,10 +153,7 @@ class PgVectoRS(VectorDB):
153
153
  )
154
154
  self.conn.commit()
155
155
 
156
- def ready_to_load(self):
157
- pass
158
-
159
- def optimize(self):
156
+ def optimize(self, data_size: int | None = None):
160
157
  self._post_insert()
161
158
 
162
159
  def _post_insert(self):
@@ -200,10 +197,7 @@ class PgVectoRS(VectorDB):
200
197
  self.cursor.execute(index_create_sql)
201
198
  self.conn.commit()
202
199
  except Exception as e:
203
- log.warning(
204
- f"Failed to create pgvecto.rs index {self._index_name} \
205
- at table {self.table_name} error: {e}",
206
- )
200
+ log.warning(f"Failed to create pgvecto.rs index {self._index_name} at table {self.table_name} error: {e}")
207
201
  raise e from None
208
202
 
209
203
  def _create_table(self, dim: int):
@@ -258,9 +252,7 @@ class PgVectoRS(VectorDB):
258
252
 
259
253
  return len(metadata), None
260
254
  except Exception as e:
261
- log.warning(
262
- f"Failed to insert data into pgvecto.rs table ({self.table_name}), error: {e}",
263
- )
255
+ log.warning(f"Failed to insert data into pgvecto.rs table ({self.table_name}), error: {e}")
264
256
  return 0, e
265
257
 
266
258
  def search_embedding(
@@ -228,10 +228,7 @@ class PgVector(VectorDB):
228
228
  )
229
229
  self.conn.commit()
230
230
 
231
- def ready_to_load(self):
232
- pass
233
-
234
- def optimize(self):
231
+ def optimize(self, data_size: int | None = None):
235
232
  self._post_insert()
236
233
 
237
234
  def _post_insert(self):
@@ -415,9 +412,7 @@ class PgVector(VectorDB):
415
412
 
416
413
  return len(metadata), None
417
414
  except Exception as e:
418
- log.warning(
419
- f"Failed to insert data into pgvector table ({self.table_name}), error: {e}",
420
- )
415
+ log.warning(f"Failed to insert data into pgvector table ({self.table_name}), error: {e}")
421
416
  return 0, e
422
417
 
423
418
  def search_embedding(
@@ -143,10 +143,7 @@ class PgVectorScale(VectorDB):
143
143
  )
144
144
  self.conn.commit()
145
145
 
146
- def ready_to_load(self):
147
- pass
148
-
149
- def optimize(self):
146
+ def optimize(self, data_size: int | None = None):
150
147
  self._post_insert()
151
148
 
152
149
  def _post_insert(self):
@@ -255,9 +252,7 @@ class PgVectorScale(VectorDB):
255
252
 
256
253
  return len(metadata), None
257
254
  except Exception as e:
258
- log.warning(
259
- f"Failed to insert data into pgvector table ({self.table_name}), error: {e}",
260
- )
255
+ log.warning(f"Failed to insert data into pgvector table ({self.table_name}), error: {e}")
261
256
  return 0, e
262
257
 
263
258
  def search_embedding(
@@ -59,10 +59,7 @@ class Pinecone(VectorDB):
59
59
  self.index = pc.Index(self.index_name)
60
60
  yield
61
61
 
62
- def ready_to_load(self):
63
- pass
64
-
65
- def optimize(self):
62
+ def optimize(self, data_size: int | None = None):
66
63
  pass
67
64
 
68
65
  def insert_embeddings(
@@ -62,10 +62,7 @@ class QdrantCloud(VectorDB):
62
62
  self.qdrant_client = None
63
63
  del self.qdrant_client
64
64
 
65
- def ready_to_load(self):
66
- pass
67
-
68
- def optimize(self):
65
+ def optimize(self, data_size: int | None = None):
69
66
  assert self.qdrant_client, "Please call self.init() before"
70
67
  # wait for vectors to be fully indexed
71
68
  try:
@@ -76,8 +73,8 @@ class QdrantCloud(VectorDB):
76
73
  continue
77
74
  if info.status == CollectionStatus.GREEN:
78
75
  msg = (
79
- f"Stored vectors: {info.vectors_count}, Indexed vectors: {info.indexed_vectors_count}, ",
80
- f"Collection status: {info.indexed_vectors_count}",
76
+ f"Stored vectors: {info.vectors_count}, Indexed vectors: {info.indexed_vectors_count}, "
77
+ f"Collection status: {info.indexed_vectors_count}"
81
78
  )
82
79
  log.info(msg)
83
80
  return
@@ -95,10 +95,7 @@ class Redis(VectorDB):
95
95
  def ready_to_search(self) -> bool:
96
96
  """Check if the database is ready to search."""
97
97
 
98
- def ready_to_load(self) -> bool:
99
- pass
100
-
101
- def optimize(self) -> None:
98
+ def optimize(self, data_size: int | None = None):
102
99
  pass
103
100
 
104
101
  def insert_embeddings(
@@ -17,7 +17,7 @@ class TestTypedDict(CommonTypedDict): ...
17
17
  @click_parameter_decorators_from_typed_dict(TestTypedDict)
18
18
  def Test(**parameters: Unpack[TestTypedDict]):
19
19
  run(
20
- db=DB.NewClient,
20
+ db=DB.Test,
21
21
  db_config=TestConfig(db_label=parameters["db_label"]),
22
22
  db_case_config=TestIndexConfig(),
23
23
  **parameters,
@@ -33,10 +33,7 @@ class Test(VectorDB):
33
33
 
34
34
  yield
35
35
 
36
- def ready_to_load(self) -> bool:
37
- return True
38
-
39
- def optimize(self) -> None:
36
+ def optimize(self, data_size: int | None = None):
40
37
  pass
41
38
 
42
39
  def insert_embeddings(
@@ -67,10 +67,7 @@ class WeaviateCloud(VectorDB):
67
67
  self.client = None
68
68
  del self.client
69
69
 
70
- def ready_to_load(self):
71
- """Should call insert first, do nothing"""
72
-
73
- def optimize(self):
70
+ def optimize(self, data_size: int | None = None):
74
71
  assert self.client.schema.exists(self.collection_name)
75
72
  self.client.schema.update_config(
76
73
  self.collection_name,
@@ -63,9 +63,7 @@ class AliyunOSSReader(DatasetReader):
63
63
  # check size equal
64
64
  remote_size, local_size = info.content_length, local.stat().st_size
65
65
  if remote_size != local_size:
66
- log.info(
67
- f"local file: {local} size[{local_size}] not match with remote size[{remote_size}]",
68
- )
66
+ log.info(f"local file: {local} size[{local_size}] not match with remote size[{remote_size}]")
69
67
  return False
70
68
 
71
69
  return True
@@ -89,9 +87,7 @@ class AliyunOSSReader(DatasetReader):
89
87
  local_file = local_ds_root.joinpath(file)
90
88
 
91
89
  if (not local_file.exists()) or (not self.validate_file(remote_file, local_file)):
92
- log.info(
93
- f"local file: {local_file} not match with remote: {remote_file}; add to downloading list",
94
- )
90
+ log.info(f"local file: {local_file} not match with remote: {remote_file}; add to downloading list")
95
91
  downloads.append((remote_file, local_file))
96
92
 
97
93
  if len(downloads) == 0:
@@ -135,9 +131,7 @@ class AwsS3Reader(DatasetReader):
135
131
  local_file = local_ds_root.joinpath(file)
136
132
 
137
133
  if (not local_file.exists()) or (not self.validate_file(remote_file, local_file)):
138
- log.info(
139
- f"local file: {local_file} not match with remote: {remote_file}; add to downloading list",
140
- )
134
+ log.info(f"local file: {local_file} not match with remote: {remote_file}; add to downloading list")
141
135
  downloads.append(remote_file)
142
136
 
143
137
  if len(downloads) == 0:
@@ -157,9 +151,7 @@ class AwsS3Reader(DatasetReader):
157
151
  # check size equal
158
152
  remote_size, local_size = info.get("size"), local.stat().st_size
159
153
  if remote_size != local_size:
160
- log.info(
161
- f"local file: {local} size[{local_size}] not match with remote size[{remote_size}]",
162
- )
154
+ log.info(f"local file: {local} size[{local_size}] not match with remote size[{remote_size}]")
163
155
  return False
164
156
 
165
157
  return True
@@ -79,14 +79,14 @@ class MultiProcessingSearchRunner:
79
79
 
80
80
  if count % 500 == 0:
81
81
  log.debug(
82
- f"({mp.current_process().name:16}) ",
83
- f"search_count: {count}, latest_latency={time.perf_counter()-s}",
82
+ f"({mp.current_process().name:16}) "
83
+ f"search_count: {count}, latest_latency={time.perf_counter()-s}"
84
84
  )
85
85
 
86
86
  total_dur = round(time.perf_counter() - start_time, 4)
87
87
  log.info(
88
88
  f"{mp.current_process().name:16} search {self.duration}s: "
89
- f"actual_dur={total_dur}s, count={count}, qps in this process: {round(count / total_dur, 4):3}",
89
+ f"actual_dur={total_dur}s, count={count}, qps in this process: {round(count / total_dur, 4):3}"
90
90
  )
91
91
 
92
92
  return (count, total_dur, latencies)
@@ -94,9 +94,7 @@ class MultiProcessingSearchRunner:
94
94
  @staticmethod
95
95
  def get_mp_context():
96
96
  mp_start_method = "spawn"
97
- log.debug(
98
- f"MultiProcessingSearchRunner get multiprocessing start method: {mp_start_method}",
99
- )
97
+ log.debug(f"MultiProcessingSearchRunner get multiprocessing start method: {mp_start_method}")
100
98
  return mp.get_context(mp_start_method)
101
99
 
102
100
  def _run_all_concurrencies_mem_efficient(self):
@@ -113,9 +111,7 @@ class MultiProcessingSearchRunner:
113
111
  mp_context=self.get_mp_context(),
114
112
  max_workers=conc,
115
113
  ) as executor:
116
- log.info(
117
- f"Start search {self.duration}s in concurrency {conc}, filters: {self.filters}",
118
- )
114
+ log.info(f"Start search {self.duration}s in concurrency {conc}, filters: {self.filters}")
119
115
  future_iter = [executor.submit(self.search, self.test_data, q, cond) for i in range(conc)]
120
116
  # Sync all processes
121
117
  while q.qsize() < conc:
@@ -124,9 +120,7 @@ class MultiProcessingSearchRunner:
124
120
 
125
121
  with cond:
126
122
  cond.notify_all()
127
- log.info(
128
- f"Syncing all process and start concurrency search, concurrency={conc}",
129
- )
123
+ log.info(f"Syncing all process and start concurrency search, concurrency={conc}")
130
124
 
131
125
  start = time.perf_counter()
132
126
  all_count = sum([r.result()[0] for r in future_iter])
@@ -140,18 +134,14 @@ class MultiProcessingSearchRunner:
140
134
  conc_qps_list.append(qps)
141
135
  conc_latency_p99_list.append(latency_p99)
142
136
  conc_latency_avg_list.append(latency_avg)
143
- log.info(
144
- f"End search in concurrency {conc}: dur={cost}s, total_count={all_count}, qps={qps}",
145
- )
137
+ log.info(f"End search in concurrency {conc}: dur={cost}s, total_count={all_count}, qps={qps}")
146
138
 
147
139
  if qps > max_qps:
148
140
  max_qps = qps
149
- log.info(
150
- f"Update largest qps with concurrency {conc}: current max_qps={max_qps}",
151
- )
141
+ log.info(f"Update largest qps with concurrency {conc}: current max_qps={max_qps}")
152
142
  except Exception as e:
153
143
  log.warning(
154
- f"Fail to search all concurrencies: {self.concurrencies}, max_qps before failure={max_qps}, reason={e}",
144
+ f"Fail to search, concurrencies: {self.concurrencies}, max_qps before failure={max_qps}, reason={e}"
155
145
  )
156
146
  traceback.print_exc()
157
147
 
@@ -193,9 +183,7 @@ class MultiProcessingSearchRunner:
193
183
  mp_context=self.get_mp_context(),
194
184
  max_workers=conc,
195
185
  ) as executor:
196
- log.info(
197
- f"Start search_by_dur {duration}s in concurrency {conc}, filters: {self.filters}",
198
- )
186
+ log.info(f"Start search_by_dur {duration}s in concurrency {conc}, filters: {self.filters}")
199
187
  future_iter = [
200
188
  executor.submit(self.search_by_dur, duration, self.test_data, q, cond) for i in range(conc)
201
189
  ]
@@ -206,24 +194,18 @@ class MultiProcessingSearchRunner:
206
194
 
207
195
  with cond:
208
196
  cond.notify_all()
209
- log.info(
210
- f"Syncing all process and start concurrency search, concurrency={conc}",
211
- )
197
+ log.info(f"Syncing all process and start concurrency search, concurrency={conc}")
212
198
 
213
199
  start = time.perf_counter()
214
200
  all_count = sum([r.result() for r in future_iter])
215
201
  cost = time.perf_counter() - start
216
202
 
217
203
  qps = round(all_count / cost, 4)
218
- log.info(
219
- f"End search in concurrency {conc}: dur={cost}s, total_count={all_count}, qps={qps}",
220
- )
204
+ log.info(f"End search in concurrency {conc}: dur={cost}s, total_count={all_count}, qps={qps}")
221
205
 
222
206
  if qps > max_qps:
223
207
  max_qps = qps
224
- log.info(
225
- f"Update largest qps with concurrency {conc}: current max_qps={max_qps}",
226
- )
208
+ log.info(f"Update largest qps with concurrency {conc}: current max_qps={max_qps}")
227
209
  except Exception as e:
228
210
  log.warning(
229
211
  f"Fail to search all concurrencies: {self.concurrencies}, max_qps before failure={max_qps}, reason={e}",
@@ -275,14 +257,14 @@ class MultiProcessingSearchRunner:
275
257
 
276
258
  if count % 500 == 0:
277
259
  log.debug(
278
- f"({mp.current_process().name:16}) search_count: {count}, ",
279
- f"latest_latency={time.perf_counter()-s}",
260
+ f"({mp.current_process().name:16}) search_count: {count}, "
261
+ f"latest_latency={time.perf_counter()-s}"
280
262
  )
281
263
 
282
264
  total_dur = round(time.perf_counter() - start_time, 4)
283
265
  log.debug(
284
266
  f"{mp.current_process().name:16} search {self.duration}s: "
285
- f"actual_dur={total_dur}s, count={count}, qps in this process: {round(count / total_dur, 4):3}",
267
+ f"actual_dur={total_dur}s, count={count}, qps in this process: {round(count / total_dur, 4):3}"
286
268
  )
287
269
 
288
270
  return count
@@ -73,14 +73,14 @@ class RatedMultiThreadingInsertRunner:
73
73
 
74
74
  if len(not_done) > 0:
75
75
  log.warning(
76
- f"Failed to finish all tasks in 1s, [{len(not_done)}/{len(executing_futures)}] ",
77
- f"tasks are not done, waited={wait_interval:.2f}, trying to wait in the next round",
76
+ f"Failed to finish all tasks in 1s, [{len(not_done)}/{len(executing_futures)}] "
77
+ f"tasks are not done, waited={wait_interval:.2f}, trying to wait in the next round"
78
78
  )
79
79
  executing_futures = list(not_done)
80
80
  else:
81
81
  log.debug(
82
- f"Finished {len(executing_futures)} insert-{config.NUM_PER_BATCH} ",
83
- f"task in 1s, wait_interval={wait_interval:.2f}",
82
+ f"Finished {len(executing_futures)} insert-{config.NUM_PER_BATCH} "
83
+ f"task in 1s, wait_interval={wait_interval:.2f}"
84
84
  )
85
85
  executing_futures = []
86
86
  except Exception as e:
@@ -45,8 +45,8 @@ class ReadWriteRunner(MultiProcessingSearchRunner, RatedMultiThreadingInsertRunn
45
45
  self.read_dur_after_write = read_dur_after_write
46
46
 
47
47
  log.info(
48
- f"Init runner, concurencys={concurrencies}, search_stage={search_stage}, ",
49
- f"stage_search_dur={read_dur_after_write}",
48
+ f"Init runner, concurencys={concurrencies}, search_stage={search_stage}, "
49
+ f"stage_search_dur={read_dur_after_write}"
50
50
  )
51
51
 
52
52
  test_emb = np.stack(dataset.test_data["emb"])
@@ -80,7 +80,7 @@ class ReadWriteRunner(MultiProcessingSearchRunner, RatedMultiThreadingInsertRunn
80
80
  """Optimize needs to run in differenct process for pymilvus schema recursion problem"""
81
81
  with self.db.init():
82
82
  log.info("Search after write - Optimize start")
83
- self.db.optimize()
83
+ self.db.optimize(data_size=self.data_volume)
84
84
  log.info("Search after write - Optimize finished")
85
85
 
86
86
  def run_search(self):
@@ -88,12 +88,10 @@ class ReadWriteRunner(MultiProcessingSearchRunner, RatedMultiThreadingInsertRunn
88
88
  res, ssearch_dur = self.serial_search_runner.run()
89
89
  recall, ndcg, p99_latency = res
90
90
  log.info(
91
- f"Search after write - Serial search - recall={recall}, ndcg={ndcg}, p99={p99_latency}, ",
91
+ f"Search after write - Serial search - recall={recall}, ndcg={ndcg}, p99={p99_latency}, "
92
92
  f"dur={ssearch_dur:.4f}",
93
93
  )
94
- log.info(
95
- f"Search after wirte - Conc search start, dur for each conc={self.read_dur_after_write}",
96
- )
94
+ log.info(f"Search after wirte - Conc search start, dur for each conc={self.read_dur_after_write}")
97
95
  max_qps = self.run_by_dur(self.read_dur_after_write)
98
96
  log.info(f"Search after wirte - Conc search finished, max_qps={max_qps}")
99
97
 
@@ -157,9 +155,7 @@ class ReadWriteRunner(MultiProcessingSearchRunner, RatedMultiThreadingInsertRunn
157
155
 
158
156
  got = wait_next_target(start_batch, target_batch)
159
157
  if got is False:
160
- log.warning(
161
- f"Abnormal exit, target_batch={target_batch}, start_batch={start_batch}",
162
- )
158
+ log.warning(f"Abnormal exit, target_batch={target_batch}, start_batch={start_batch}")
163
159
  return None
164
160
 
165
161
  log.info(f"Insert {perc}% done, total batch={total_batch}")
@@ -167,8 +163,8 @@ class ReadWriteRunner(MultiProcessingSearchRunner, RatedMultiThreadingInsertRunn
167
163
  res, ssearch_dur = self.serial_search_runner.run()
168
164
  recall, ndcg, p99_latency = res
169
165
  log.info(
170
- f"[{target_batch}/{total_batch}] Serial search - {perc}% done, recall={recall}, ",
171
- f"ndcg={ndcg}, p99={p99_latency}, dur={ssearch_dur:.4f}",
166
+ f"[{target_batch}/{total_batch}] Serial search - {perc}% done, recall={recall}, "
167
+ f"ndcg={ndcg}, p99={p99_latency}, dur={ssearch_dur:.4f}"
172
168
  )
173
169
 
174
170
  # Search duration for non-last search stage is carefully calculated.
@@ -183,8 +179,8 @@ class ReadWriteRunner(MultiProcessingSearchRunner, RatedMultiThreadingInsertRunn
183
179
  each_conc_search_dur = csearch_dur / len(self.concurrencies)
184
180
  if each_conc_search_dur < 30:
185
181
  warning_msg = (
186
- f"Results might be inaccurate, duration[{csearch_dur:.4f}] left for conc-search is too short, ",
187
- f"total available dur={total_dur_between_stages}, serial_search_cost={ssearch_dur}.",
182
+ f"Results might be inaccurate, duration[{csearch_dur:.4f}] left for conc-search is too short, "
183
+ f"total available dur={total_dur_between_stages}, serial_search_cost={ssearch_dur}."
188
184
  )
189
185
  log.warning(warning_msg)
190
186
 
@@ -193,7 +189,7 @@ class ReadWriteRunner(MultiProcessingSearchRunner, RatedMultiThreadingInsertRunn
193
189
  each_conc_search_dur = 60
194
190
 
195
191
  log.info(
196
- f"[{target_batch}/{total_batch}] Concurrent search - {perc}% start, dur={each_conc_search_dur:.4f}",
192
+ f"[{target_batch}/{total_batch}] Concurrent search - {perc}% start, dur={each_conc_search_dur:.4f}"
197
193
  )
198
194
  max_qps = self.run_by_dur(each_conc_search_dur)
199
195
  result.append((perc, max_qps, recall, ndcg, p99_latency))
@@ -40,9 +40,7 @@ class SerialInsertRunner:
40
40
  def task(self) -> int:
41
41
  count = 0
42
42
  with self.db.init():
43
- log.info(
44
- f"({mp.current_process().name:16}) Start inserting embeddings in batch {config.NUM_PER_BATCH}",
45
- )
43
+ log.info(f"({mp.current_process().name:16}) Start inserting embeddings in batch {config.NUM_PER_BATCH}")
46
44
  start = time.perf_counter()
47
45
  for data_df in self.dataset:
48
46
  all_metadata = data_df["id"].tolist()
@@ -66,13 +64,11 @@ class SerialInsertRunner:
66
64
  assert insert_count == len(all_metadata)
67
65
  count += insert_count
68
66
  if count % 100_000 == 0:
69
- log.info(
70
- f"({mp.current_process().name:16}) Loaded {count} embeddings into VectorDB",
71
- )
67
+ log.info(f"({mp.current_process().name:16}) Loaded {count} embeddings into VectorDB")
72
68
 
73
69
  log.info(
74
- f"({mp.current_process().name:16}) Finish loading all dataset into VectorDB, ",
75
- f"dur={time.perf_counter()-start}",
70
+ f"({mp.current_process().name:16}) Finish loading all dataset into VectorDB, "
71
+ f"dur={time.perf_counter() - start}"
76
72
  )
77
73
  return count
78
74
 
@@ -83,8 +79,8 @@ class SerialInsertRunner:
83
79
 
84
80
  num_batches = math.ceil(len(all_embeddings) / NUM_PER_BATCH)
85
81
  log.info(
86
- f"({mp.current_process().name:16}) Start inserting {len(all_embeddings)} ",
87
- f"embeddings in batch {NUM_PER_BATCH}",
82
+ f"({mp.current_process().name:16}) Start inserting {len(all_embeddings)} "
83
+ f"embeddings in batch {NUM_PER_BATCH}"
88
84
  )
89
85
  count = 0
90
86
  for batch_id in range(num_batches):
@@ -94,8 +90,8 @@ class SerialInsertRunner:
94
90
  embeddings = all_embeddings[batch_id * NUM_PER_BATCH : (batch_id + 1) * NUM_PER_BATCH]
95
91
 
96
92
  log.debug(
97
- f"({mp.current_process().name:16}) batch [{batch_id:3}/{num_batches}], ",
98
- f"Start inserting {len(metadata)} embeddings",
93
+ f"({mp.current_process().name:16}) batch [{batch_id:3}/{num_batches}], "
94
+ f"Start inserting {len(metadata)} embeddings"
99
95
  )
100
96
  while retry_count < LOAD_MAX_TRY_COUNT:
101
97
  insert_count, error = self.db.insert_embeddings(
@@ -113,15 +109,15 @@ class SerialInsertRunner:
113
109
  else:
114
110
  break
115
111
  log.debug(
116
- f"({mp.current_process().name:16}) batch [{batch_id:3}/{num_batches}], ",
117
- f"Finish inserting {len(metadata)} embeddings",
112
+ f"({mp.current_process().name:16}) batch [{batch_id:3}/{num_batches}], "
113
+ f"Finish inserting {len(metadata)} embeddings"
118
114
  )
119
115
 
120
116
  assert already_insert_count == len(metadata)
121
117
  count += already_insert_count
122
118
  log.info(
123
- f"({mp.current_process().name:16}) Finish inserting {len(all_embeddings)} embeddings in ",
124
- f"batch {NUM_PER_BATCH}",
119
+ f"({mp.current_process().name:16}) Finish inserting {len(all_embeddings)} embeddings in "
120
+ f"batch {NUM_PER_BATCH}"
125
121
  )
126
122
  return count
127
123
 
@@ -160,8 +156,6 @@ class SerialInsertRunner:
160
156
  start_time = time.perf_counter()
161
157
  max_load_count, times = 0, 0
162
158
  try:
163
- with self.db.init():
164
- self.db.ready_to_load()
165
159
  while time.perf_counter() - start_time < self.timeout:
166
160
  count = self.endless_insert_data(
167
161
  all_embeddings,
@@ -171,13 +165,13 @@ class SerialInsertRunner:
171
165
  max_load_count += count
172
166
  times += 1
173
167
  log.info(
174
- f"Loaded {times} entire dataset, current max load counts={utils.numerize(max_load_count)}, ",
175
- f"{max_load_count}",
168
+ f"Loaded {times} entire dataset, current max load counts={utils.numerize(max_load_count)}, "
169
+ f"{max_load_count}"
176
170
  )
177
171
  except Exception as e:
178
172
  log.info(
179
- f"Capacity case load reach limit, insertion counts={utils.numerize(max_load_count)}, ",
180
- f"{max_load_count}, err={e}",
173
+ f"Capacity case load reach limit, insertion counts={utils.numerize(max_load_count)}, "
174
+ f"{max_load_count}, err={e}"
181
175
  )
182
176
  traceback.print_exc()
183
177
  return max_load_count
@@ -209,9 +203,7 @@ class SerialSearchRunner:
209
203
  self.ground_truth = ground_truth
210
204
 
211
205
  def search(self, args: tuple[list, pd.DataFrame]) -> tuple[float, float, float]:
212
- log.info(
213
- f"{mp.current_process().name:14} start search the entire test_data to get recall and latency",
214
- )
206
+ log.info(f"{mp.current_process().name:14} start search the entire test_data to get recall and latency")
215
207
  with self.db.init():
216
208
  test_data, ground_truth = args
217
209
  ideal_dcg = get_ideal_dcg(self.k)
@@ -242,8 +234,8 @@ class SerialSearchRunner:
242
234
 
243
235
  if len(latencies) % 100 == 0:
244
236
  log.debug(
245
- f"({mp.current_process().name:14}) search_count={len(latencies):3}, ",
246
- f"latest_latency={latencies[-1]}, latest recall={recalls[-1]}",
237
+ f"({mp.current_process().name:14}) search_count={len(latencies):3}, "
238
+ f"latest_latency={latencies[-1]}, latest recall={recalls[-1]}"
247
239
  )
248
240
 
249
241
  avg_latency = round(np.mean(latencies), 4)
@@ -258,7 +250,7 @@ class SerialSearchRunner:
258
250
  f"avg_recall={avg_recall}, "
259
251
  f"avg_ndcg={avg_ndcg},"
260
252
  f"avg_latency={avg_latency}, "
261
- f"p99={p99}",
253
+ f"p99={p99}"
262
254
  )
263
255
  return (avg_recall, avg_ndcg, p99)
264
256
 
@@ -98,9 +98,7 @@ class CaseRunner(BaseModel):
98
98
  self.init_db(drop_old)
99
99
  self.ca.dataset.prepare(self.dataset_source, filters=self.ca.filter_rate)
100
100
  except ModuleNotFoundError as e:
101
- log.warning(
102
- f"pre run case error: please install client for db: {self.config.db}, error={e}",
103
- )
101
+ log.warning(f"pre run case error: please install client for db: {self.config.db}, error={e}")
104
102
  raise e from None
105
103
 
106
104
  def run(self, drop_old: bool = True) -> Metric:
@@ -136,9 +134,7 @@ class CaseRunner(BaseModel):
136
134
  log.warning(f"Failed to run capacity case, reason = {e}")
137
135
  raise e from None
138
136
  else:
139
- log.info(
140
- f"Capacity case loading dataset reaches VectorDB's limit: max capacity = {count}",
141
- )
137
+ log.info(f"Capacity case loading dataset reaches VectorDB's limit: max capacity = {count}")
142
138
  return Metric(max_load_count=count)
143
139
 
144
140
  def _run_perf_case(self, drop_old: bool = True) -> Metric:
@@ -147,22 +143,6 @@ class CaseRunner(BaseModel):
147
143
  Returns:
148
144
  Metric: load_duration, recall, serial_latency_p99, and, qps
149
145
  """
150
- """
151
- if drop_old:
152
- _, load_dur = self._load_train_data()
153
- build_dur = self._optimize()
154
- m.load_duration = round(load_dur+build_dur, 4)
155
- log.info(
156
- f"Finish loading the entire dataset into VectorDB,"
157
- f" insert_duration={load_dur}, optimize_duration={build_dur}"
158
- f" load_duration(insert + optimize) = {m.load_duration}"
159
- )
160
-
161
- self._init_search_runner()
162
-
163
- m.qps, m.conc_num_list, m.conc_qps_list, m.conc_latency_p99_list = self._conc_search()
164
- m.recall, m.serial_latency_p99 = self._serial_search()
165
- """
166
146
 
167
147
  log.info("Start performance case")
168
148
  try:
@@ -175,7 +155,7 @@ class CaseRunner(BaseModel):
175
155
  log.info(
176
156
  f"Finish loading the entire dataset into VectorDB,"
177
157
  f" insert_duration={load_dur}, optimize_duration={build_dur}"
178
- f" load_duration(insert + optimize) = {m.load_duration}",
158
+ f" load_duration(insert + optimize) = {m.load_duration}"
179
159
  )
180
160
  else:
181
161
  log.info("Data loading skipped")
@@ -254,13 +234,13 @@ class CaseRunner(BaseModel):
254
234
  self.stop()
255
235
 
256
236
  @utils.time_it
257
- def _task(self) -> None:
237
+ def _optimize_task(self) -> None:
258
238
  with self.db.init():
259
- self.db.optimize_with_size(data_size=self.ca.dataset.data.size)
239
+ self.db.optimize(data_size=self.ca.dataset.data.size)
260
240
 
261
241
  def _optimize(self) -> float:
262
242
  with concurrent.futures.ProcessPoolExecutor(max_workers=1) as executor:
263
- future = executor.submit(self._task)
243
+ future = executor.submit(self._optimize_task)
264
244
  try:
265
245
  return future.result(timeout=self.ca.optimize_timeout)[1]
266
246
  except TimeoutError as e:
@@ -65,9 +65,7 @@ class BenchMarkRunner:
65
65
  log.warning("Empty tasks submitted")
66
66
  return False
67
67
 
68
- log.debug(
69
- f"tasks: {tasks}, task_label: {task_label}, dataset source: {self.dataset_source}",
70
- )
68
+ log.debug(f"tasks: {tasks}, task_label: {task_label}, dataset source: {self.dataset_source}")
71
69
 
72
70
  # Generate run_id
73
71
  run_id = uuid.uuid4().hex
@@ -169,14 +167,13 @@ class BenchMarkRunner:
169
167
  drop_old = TaskStage.DROP_OLD in runner.config.stages
170
168
  if (latest_runner and runner == latest_runner) or not self.drop_old:
171
169
  drop_old = False
170
+ num_cases = running_task.num_cases()
172
171
  try:
173
- log.info(
174
- f"[{idx+1}/{running_task.num_cases()}] start case: {runner.display()}, drop_old={drop_old}",
175
- )
172
+ log.info(f"[{idx+1}/{num_cases}] start case: {runner.display()}, drop_old={drop_old}")
176
173
  case_res.metrics = runner.run(drop_old)
177
174
  log.info(
178
- f"[{idx+1}/{running_task.num_cases()}] finish case: {runner.display()}, "
179
- f"result={case_res.metrics}, label={case_res.label}",
175
+ f"[{idx+1}/{num_cases}] finish case: {runner.display()}, "
176
+ f"result={case_res.metrics}, label={case_res.label}"
180
177
  )
181
178
 
182
179
  # cache the latest succeeded runner
@@ -189,16 +186,12 @@ class BenchMarkRunner:
189
186
  if not drop_old:
190
187
  case_res.metrics.load_duration = cached_load_duration if cached_load_duration else 0.0
191
188
  except (LoadTimeoutError, PerformanceTimeoutError) as e:
192
- log.warning(
193
- f"[{idx+1}/{running_task.num_cases()}] case {runner.display()} failed to run, reason={e}",
194
- )
189
+ log.warning(f"[{idx+1}/{num_cases}] case {runner.display()} failed to run, reason={e}")
195
190
  case_res.label = ResultLabel.OUTOFRANGE
196
191
  continue
197
192
 
198
193
  except Exception as e:
199
- log.warning(
200
- f"[{idx+1}/{running_task.num_cases()}] case {runner.display()} failed to run, reason={e}",
201
- )
194
+ log.warning(f"[{idx+1}/{num_cases}] case {runner.display()} failed to run, reason={e}")
202
195
  traceback.print_exc()
203
196
  case_res.label = ResultLabel.FAILED
204
197
  continue
@@ -217,9 +210,7 @@ class BenchMarkRunner:
217
210
 
218
211
  send_conn.send((SIGNAL.SUCCESS, None))
219
212
  send_conn.close()
220
- log.info(
221
- f"Success to finish task: label={running_task.task_label}, run_id={running_task.run_id}",
222
- )
213
+ log.info(f"Success to finish task: label={running_task.task_label}, run_id={running_task.run_id}")
223
214
 
224
215
  except Exception as e:
225
216
  err_msg = (
@@ -249,8 +240,8 @@ class BenchMarkRunner:
249
240
 
250
241
  def _run_async(self, conn: Connection) -> bool:
251
242
  log.info(
252
- f"task submitted: id={self.running_task.run_id}, {self.running_task.task_label}, ",
253
- f"case number: {len(self.running_task.case_runners)}",
243
+ f"task submitted: id={self.running_task.run_id}, {self.running_task.task_label}, "
244
+ f"case number: {len(self.running_task.case_runners)}"
254
245
  )
255
246
  global global_result_future
256
247
  executor = concurrent.futures.ProcessPoolExecutor(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: vectordb-bench
3
- Version: 0.0.20
3
+ Version: 0.0.21
4
4
  Summary: VectorDBBench is not just an offering of benchmark results for mainstream vector databases and cloud services, it's your go-to tool for the ultimate performance and cost-effectiveness comparison. Designed with ease-of-use in mind, VectorDBBench is devised to help users, even non-professionals, reproduce results or test new systems, making the hunt for the optimal choice amongst a plethora of cloud services and open-source vector databases a breeze.
5
5
  Author-email: XuanYang-cn <xuan.yang@zilliz.com>
6
6
  Project-URL: repository, https://github.com/zilliztech/VectorDBBench
@@ -1,74 +1,74 @@
1
1
  vectordb_bench/__init__.py,sha256=d5psAfISw9F6PFL2xPlSYUKKFDw7ifQm7g3LWC8_yUA,2375
2
2
  vectordb_bench/__main__.py,sha256=cyYbVSU-zA1AgzneGKcRRuzR4ftRDr9sIi9Ei9NZnhI,858
3
3
  vectordb_bench/base.py,sha256=AgavIF0P9ku_RmCRk1KKziba-wI4ZpA2aJvjJzNhRSs,129
4
- vectordb_bench/interface.py,sha256=8iTNV2oq0LAU-8y3DbeEfzVex03d8M5kves4OXs_ubY,10016
4
+ vectordb_bench/interface.py,sha256=XaCjTgUeI17uVjsgOauPeVlkvnkuCyQOWyOaWhrgCt8,9811
5
5
  vectordb_bench/log_util.py,sha256=hOdK0TnrcpYZOrRZoBslievXSW8qtTvLvube43rxbVc,2776
6
6
  vectordb_bench/metric.py,sha256=pj-AxQHyIRHTaJY-wTIkTbC6TqEqMzt3kcEmMWEv71w,2063
7
7
  vectordb_bench/models.py,sha256=5N4-0lJLWpoR6NnzX4ONuH7vyi4nRFFuNS0q9jQ4cgM,11023
8
8
  vectordb_bench/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
- vectordb_bench/backend/assembler.py,sha256=CkXn40A18x2nOWbL1F8DrrSz--TcYAlwxhqD0VQMCbA,2040
9
+ vectordb_bench/backend/assembler.py,sha256=6GInRT7yBgfTaIPmo-XMkYX4pA8PJQmjMQInynwaunE,2047
10
10
  vectordb_bench/backend/cases.py,sha256=obDdY6g3p9Z2fog7qDwLLDuRMwo3LGQKMHsP66QZd2M,16296
11
- vectordb_bench/backend/data_source.py,sha256=VGrQ6P1a4Yt5RT5VkQe3YhW8vb8Lre0a7uARwYnwuAA,5680
11
+ vectordb_bench/backend/data_source.py,sha256=bfa_Zg4O9fRP2ENmVZ_2-NISKozoFN-TocyxOlw1JtE,5524
12
12
  vectordb_bench/backend/dataset.py,sha256=V4OKPt23v0kmdvgJwDr_R2fLJv3lXLZEii992cEM2Q0,8993
13
13
  vectordb_bench/backend/result_collector.py,sha256=mpROVdZ-HChKBVyMV5TZ5v7YGRb69bvfT7Gezn5F5sY,819
14
- vectordb_bench/backend/task_runner.py,sha256=VIyyMG2NCyzK_aJvZX5UxtW_YcEulC9EbHp5pmsCRuk,12244
14
+ vectordb_bench/backend/task_runner.py,sha256=vlaXB0_25-G9w1Lj-F0SrvJzhXT7ceDWGIb2aKRXukU,11488
15
15
  vectordb_bench/backend/utils.py,sha256=R6THuJdZhiQYSSJTqv0Uegl2B20taV_QjwvFrun2yxE,1949
16
- vectordb_bench/backend/clients/__init__.py,sha256=GWBOsRsYULH4V9aA078iB0cHAMkODoWPiYlrZYG2s5o,7998
17
- vectordb_bench/backend/clients/api.py,sha256=74yDDCFNJegNZSmWTNtLc9moPpGLa0vVVJfFuVgDWUY,6229
16
+ vectordb_bench/backend/clients/__init__.py,sha256=YvXoI8CS69WOTLl5n858xkRReeqiiRFBETZAAnD14qg,8212
17
+ vectordb_bench/backend/clients/api.py,sha256=uQaX_FiMFlD3z_91awUzB-qtBkvyDsMKE8ks5bBgJSY,6233
18
18
  vectordb_bench/backend/clients/aliyun_elasticsearch/aliyun_elasticsearch.py,sha256=7yPYaWoHeHNxDMtpReGXsdEPFD1e4vQblFor7TmLq5o,770
19
19
  vectordb_bench/backend/clients/aliyun_elasticsearch/config.py,sha256=d9RCgfCgauKvy6z9ig_wBormgwiGtkh8POyoHloHnJA,505
20
- vectordb_bench/backend/clients/aliyun_opensearch/aliyun_opensearch.py,sha256=GI8v59ntuy6f8C2wxz_EoCTcWvgxmgcCN7WBu7caL64,13385
20
+ vectordb_bench/backend/clients/aliyun_opensearch/aliyun_opensearch.py,sha256=rwa4rtbbP2Kaczh7Bf0bc_lE_sGG5w9PhtfdFu7rQNs,13237
21
21
  vectordb_bench/backend/clients/aliyun_opensearch/config.py,sha256=KSiuRu-p7oL2PEukfD6SvYCKg1jTVvro9lMcUnQSN6I,1214
22
- vectordb_bench/backend/clients/alloydb/alloydb.py,sha256=ehMwguVtzxaRAgyoY-bS7TNPD87M4rqExZgbaRPkB_g,13022
22
+ vectordb_bench/backend/clients/alloydb/alloydb.py,sha256=E24hxCUgpBCRiScdcS_iBk8n0wngUgVg8qujOWiUhw0,13009
23
23
  vectordb_bench/backend/clients/alloydb/cli.py,sha256=G6Q0WApoDXDG_pqmK2lEKFIvKB8qAsZFPM8TfsURydE,5086
24
24
  vectordb_bench/backend/clients/alloydb/config.py,sha256=PJs2wIJqwcG6UJ3T8R7Pi3xTMBfxTZiNkcWyhtHv5dc,5313
25
- vectordb_bench/backend/clients/aws_opensearch/aws_opensearch.py,sha256=dLRuK-LN247CTW3pQ7qKrUzH3VFlZjoAZCGGhs3hC4k,7611
25
+ vectordb_bench/backend/clients/aws_opensearch/aws_opensearch.py,sha256=PAFP5W6k0GxbpvvIFs8u3pMi7x-sLw9cm4mUsM2agws,7541
26
26
  vectordb_bench/backend/clients/aws_opensearch/cli.py,sha256=845dUYcD5m9j9FNOCXAspgCkvOKWWJXt2k87I55Odbs,1301
27
27
  vectordb_bench/backend/clients/aws_opensearch/config.py,sha256=XsQBKGBPR0lqH3XrQpijwgt9Tfb6KBVg35x2L9LQQMw,1881
28
28
  vectordb_bench/backend/clients/aws_opensearch/run.py,sha256=Ry5aAlielWjq0hx7LnbdShfOwzZhz3Gq9WYu5U43x9s,5001
29
- vectordb_bench/backend/clients/chroma/chroma.py,sha256=_JpLaCu5SXJMSJzuR2UZheP1AGfzBkMYKAH_WxaU8Rs,3638
29
+ vectordb_bench/backend/clients/chroma/chroma.py,sha256=TGsmAnG5I3bbIjJ5L7ktke6fD8lOrx56Wt2tMCb3dY8,3609
30
30
  vectordb_bench/backend/clients/chroma/config.py,sha256=8nXpPdecQ5HrNqcsQwAVgacSz6uLgI-BI7v4tB8CeDk,347
31
31
  vectordb_bench/backend/clients/elastic_cloud/config.py,sha256=_5Cz3__CbMU7zCizkhK1pGhH3TLJacn8efVueUZ0lnQ,1573
32
- vectordb_bench/backend/clients/elastic_cloud/elastic_cloud.py,sha256=51yiwuW9eRH4t44we0qGF3YvhfEmKB8-T78ueUPsh7E,5513
33
- vectordb_bench/backend/clients/memorydb/cli.py,sha256=AIjkKTu7L4UFGbrtUeqx7mFC5_-6F1jjzuhQy0oYHa0,2661
32
+ vectordb_bench/backend/clients/elastic_cloud/elastic_cloud.py,sha256=juWDlWt-eDd9WZEw35Q4WKvfW1pmNaFXdWjK4UveyyA,5443
33
+ vectordb_bench/backend/clients/memorydb/cli.py,sha256=mUpBN0VoE6M55AAEwyd20uEtPkOpckJzmcP2XXpue30,2659
34
34
  vectordb_bench/backend/clients/memorydb/config.py,sha256=D2Q-HkDwnmz98ek1e_iNu4o9CIRB14pOQWSZgRvd6oY,1500
35
- vectordb_bench/backend/clients/memorydb/memorydb.py,sha256=dX_5Rm3t_B5M439QA8aOgUW0dNn4OoaigHpTT_Ex9fY,10248
35
+ vectordb_bench/backend/clients/memorydb/memorydb.py,sha256=WrZhDYJqpwN173sk2lmPnOibHcQCPrq_PEAMFcL62U4,10219
36
36
  vectordb_bench/backend/clients/milvus/cli.py,sha256=xGvYYKOAs32vz78oB5Ks_xnWIMzcl_f7TPEPRk94FeQ,8895
37
37
  vectordb_bench/backend/clients/milvus/config.py,sha256=oFZ5VG5UHws161M1cYmMr2b9NSEoqwwst998T59QGQo,7520
38
- vectordb_bench/backend/clients/milvus/milvus.py,sha256=yhIvmUPVf16BZSQ-y7xTxplbXjJrxO7T5HR4bMP0Jdo,7509
38
+ vectordb_bench/backend/clients/milvus/milvus.py,sha256=xdVVjMnBzD5KGJ7iUB-B3SuTL4JDW1UD15QBevExMLw,6862
39
39
  vectordb_bench/backend/clients/pgdiskann/cli.py,sha256=o5ddAp1Be2TOnm8Wh9IyIWUxdnw5N6v92Ms1s6CEwBo,3135
40
40
  vectordb_bench/backend/clients/pgdiskann/config.py,sha256=DBsVgLn4edl-irSlP_GV7KW-8jFemns_ujR_CuVnQtE,4412
41
- vectordb_bench/backend/clients/pgdiskann/pgdiskann.py,sha256=VNI2rdtkz0YS94YyakBix9wterHDCDI8rU0AVLW0SuI,12312
41
+ vectordb_bench/backend/clients/pgdiskann/pgdiskann.py,sha256=Z8K74Y6uMi6q8gnnD68doBxc5pWBSpRnNLDhlifseH4,12299
42
42
  vectordb_bench/backend/clients/pgvecto_rs/cli.py,sha256=n0cMbUrGS2jzCpusVExxRDJb3iUzWblkeNmuRzLPmoE,4686
43
43
  vectordb_bench/backend/clients/pgvecto_rs/config.py,sha256=jWs3078s5chH37O94zSHoQ98ptLTYiJeHiLy6BQgTE4,4725
44
- vectordb_bench/backend/clients/pgvecto_rs/pgvecto_rs.py,sha256=eABNe74U0p5Sauk1p2Ej6xbeEftP42j56TjybBKt80I,9925
44
+ vectordb_bench/backend/clients/pgvecto_rs/pgvecto_rs.py,sha256=ZSOPpQjLtWxpQz7-R24X-e2oVLHJsZeEmaOzfd5pELA,9828
45
45
  vectordb_bench/backend/clients/pgvector/cli.py,sha256=aeuYMRIVWgR_rRkuEa_hU6_e0J5y43pxiprInQ_OrGg,6229
46
46
  vectordb_bench/backend/clients/pgvector/config.py,sha256=llLdn5y9NtK24tzT9bqbJmIaDYxkg_BqPbedoW5nfH0,8924
47
- vectordb_bench/backend/clients/pgvector/pgvector.py,sha256=pZAlzdfhJzP9KYqeGlWoPW_8idR-kaiwRI5P9OLRJcA,18631
47
+ vectordb_bench/backend/clients/pgvector/pgvector.py,sha256=-KNqr57bmeUeeNO72IN6nzop79Pp1Emn-MmqvdE1rNk,18587
48
48
  vectordb_bench/backend/clients/pgvectorscale/cli.py,sha256=3XL2NdBXh9ug8SyUwPD6fGXkjYflahew5GO2xIza43g,3403
49
49
  vectordb_bench/backend/clients/pgvectorscale/config.py,sha256=ZMcRQPyCMzMJLXw56zODUGJmqOP-sOMA1entNsfE-Ck,3122
50
- vectordb_bench/backend/clients/pgvectorscale/pgvectorscale.py,sha256=Ajo339JWpWH6enCMcb0rl76msPXWXf-uoDXRHxdtbfs,10180
50
+ vectordb_bench/backend/clients/pgvectorscale/pgvectorscale.py,sha256=NONFdcE-b-mt6GsRTru6UbMMu8iqX8PfRF43fY_AODw,10136
51
51
  vectordb_bench/backend/clients/pinecone/config.py,sha256=hzPX1lxDpYI9IdpNs7RYB1vAn2uMlCw9NH4FonQEmfQ,294
52
- vectordb_bench/backend/clients/pinecone/pinecone.py,sha256=Ko9eFODiBIyOrVYCq-3xBCUtx1uuWsOUvOTypYuCs0g,3599
52
+ vectordb_bench/backend/clients/pinecone/pinecone.py,sha256=9uxAp3KgofaFMoosnbqwP4CUkbn4kNplhtho-IDEqB4,3586
53
53
  vectordb_bench/backend/clients/qdrant_cloud/config.py,sha256=81eRgFNJJy7oe-FreNv7RzhAfyd-_19REbVcW-hJTt4,1348
54
- vectordb_bench/backend/clients/qdrant_cloud/qdrant_cloud.py,sha256=n50L2-yyf6iQLnG27wSD7Ww3R2MZ2qFZv8OL9b4WeYU,5441
54
+ vectordb_bench/backend/clients/qdrant_cloud/qdrant_cloud.py,sha256=fh-FP6661Siz6oynL1K5Tf-Y_tTw4bVs02RHCBMyhmY,5426
55
55
  vectordb_bench/backend/clients/redis/cli.py,sha256=tFLXzNyvh_GYUZihqMvj65C5vBKPVVAYIXtbzGaVCcU,2167
56
56
  vectordb_bench/backend/clients/redis/config.py,sha256=xVSVC6xjjAKsiwJuJoLguCGhiiUT9w13Db_Up5ZqljY,1241
57
- vectordb_bench/backend/clients/redis/redis.py,sha256=QYbULIehrZ-6SIs20vTCS1wMcd3REKcgXkwlXo88rcY,6798
58
- vectordb_bench/backend/clients/test/cli.py,sha256=ro7RRQ63horo68Xgn0g7IPxH40I7o7D8je0Mf42KV20,548
57
+ vectordb_bench/backend/clients/redis/redis.py,sha256=39-JfyMQp584jLN5ltCKqyB-sNwC18VICd6Z1XpJNMg,6769
58
+ vectordb_bench/backend/clients/test/cli.py,sha256=NqvX7Rl6iEzAcvdy4VXOier-bOp0N3yVQ84rQOKjZEo,543
59
59
  vectordb_bench/backend/clients/test/config.py,sha256=_Eufl8g9EYBUlUw-6vNf4b4FK2KM2u9a41cz7n08QI8,390
60
- vectordb_bench/backend/clients/test/test.py,sha256=JESJ7GFBzlxTtQI-2RJobJTJ5Jmxm7CzxstfhnxGWSo,1399
60
+ vectordb_bench/backend/clients/test/test.py,sha256=p8ZJ9PPQOPMc3fgtZpMMw3LROOk3VGWY-1j81NkCi8Q,1363
61
61
  vectordb_bench/backend/clients/weaviate_cloud/cli.py,sha256=Cy9epFJgeImVa3STogZhEyFAePjCZ7LY_iDu8nRpiME,1047
62
62
  vectordb_bench/backend/clients/weaviate_cloud/config.py,sha256=kLSxWFtEr12WCF610SBGWyVRzXbgnO0PsftNPSIiBMM,1245
63
- vectordb_bench/backend/clients/weaviate_cloud/weaviate_cloud.py,sha256=1OLS5A_ZszFeBxPOSkLHM7xwD6yxy9nd6waEqVQ46yg,5241
63
+ vectordb_bench/backend/clients/weaviate_cloud/weaviate_cloud.py,sha256=OZifz7ZzUzMFqdx2QUgI9QG9bMrYjNo6cFFyzGe4-LU,5190
64
64
  vectordb_bench/backend/clients/zilliz_cloud/cli.py,sha256=3_eD3ZG-FeTw1cenhbBFniPnVLgT_UQwdIuGmGDroJw,1551
65
65
  vectordb_bench/backend/clients/zilliz_cloud/config.py,sha256=-Qb50m-Hcz86OcMURU21n61Rz-RpFqKfUsmjna85OR8,909
66
66
  vectordb_bench/backend/clients/zilliz_cloud/zilliz_cloud.py,sha256=B9EUDmK11oQ2GIslVkbRVAitHT-NbRGxQD_Weia-vhY,681
67
67
  vectordb_bench/backend/runner/__init__.py,sha256=mF8YnErTa7MVG37zZb0KFXBSrmMw_afttuiqWcwrVls,228
68
- vectordb_bench/backend/runner/mp_runner.py,sha256=22jySJFd7u1Ee1QfpA8N_pltZvx9MChwEuKphbisS-c,10118
69
- vectordb_bench/backend/runner/rate_runner.py,sha256=GluVVBl8a-LrSlaVvs4UjiHgaxL3UQ-IvjAxOtANnO4,4310
70
- vectordb_bench/backend/runner/read_write_runner.py,sha256=ItnajqPbpv82IfF8pZVk4NJi5l28rN8tbem6R2v1R2I,7866
71
- vectordb_bench/backend/runner/serial_runner.py,sha256=hey6X7HweiUIgdu5er_8Oqxvhq7frezc50ikIAzwoxc,10281
68
+ vectordb_bench/backend/runner/mp_runner.py,sha256=AJHrQmUADDWDQZ0eZ4aaAH9HOQtZHiafXJYGU5PNq3Y,9645
69
+ vectordb_bench/backend/runner/rate_runner.py,sha256=2coO7qalEh6ZbVKUkyFvip4JWjs1yJM-iiExSrjEp9c,4306
70
+ vectordb_bench/backend/runner/read_write_runner.py,sha256=CXYBXEEkS1S7-NurdzN5Wh6N0Vx-rprM9Qehk1WKwl8,7822
71
+ vectordb_bench/backend/runner/serial_runner.py,sha256=URymqOy-9NdVE3kXWBW3e9R0Oh2sG9zF1vJkV7sk48E,10092
72
72
  vectordb_bench/backend/runner/util.py,sha256=tjTFUxth6hNnVrlU82TqkHhfeZo4ymj7WlyK4zFyPTg,522
73
73
  vectordb_bench/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
74
74
  vectordb_bench/cli/cli.py,sha256=-BXRfiWzW6KjBF7d-6Lw7RexPktERm1pcwJqgetSX0c,15275
@@ -127,9 +127,9 @@ vectordb_bench/results/WeaviateCloud/result_20230808_standard_weaviatecloud.json
127
127
  vectordb_bench/results/ZillizCloud/result_20230727_standard_zillizcloud.json,sha256=-Mdm4By65XDRCrmVOCF8yQXjcZtH4Xo4shcjoDoBUKU,18293
128
128
  vectordb_bench/results/ZillizCloud/result_20230808_standard_zillizcloud.json,sha256=77XlHT5zM_K7mG5HfDQKwXZnSCuR37VUbt6-P3J_amI,15737
129
129
  vectordb_bench/results/ZillizCloud/result_20240105_standard_202401_zillizcloud.json,sha256=TualfJ0664Hs-vdIW68bdkqAEYyzotXmu2P0yIN-GHk,42526
130
- vectordb_bench-0.0.20.dist-info/LICENSE,sha256=HXbxhrb5u5SegVzeLNF_voVgRsJMavcLaOmD1N0lZkM,1067
131
- vectordb_bench-0.0.20.dist-info/METADATA,sha256=XlLpjF9OvIO2y9xO_kpH1_kMbAUSdGvUNTYtPTjHW_8,34577
132
- vectordb_bench-0.0.20.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
133
- vectordb_bench-0.0.20.dist-info/entry_points.txt,sha256=Qzw6gVx96ui8esG21H6yHsI6nboEohRmV424TYhQNrA,113
134
- vectordb_bench-0.0.20.dist-info/top_level.txt,sha256=jnhZFZAuKX1J60yt-XOeBZ__ctiZMvoC_s0RFq29lpM,15
135
- vectordb_bench-0.0.20.dist-info/RECORD,,
130
+ vectordb_bench-0.0.21.dist-info/LICENSE,sha256=HXbxhrb5u5SegVzeLNF_voVgRsJMavcLaOmD1N0lZkM,1067
131
+ vectordb_bench-0.0.21.dist-info/METADATA,sha256=SDCFG-7cwQRoLe3mrvjCQNslK1Ju8aw0VQ_Kc8408hw,34577
132
+ vectordb_bench-0.0.21.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
133
+ vectordb_bench-0.0.21.dist-info/entry_points.txt,sha256=Qzw6gVx96ui8esG21H6yHsI6nboEohRmV424TYhQNrA,113
134
+ vectordb_bench-0.0.21.dist-info/top_level.txt,sha256=jnhZFZAuKX1J60yt-XOeBZ__ctiZMvoC_s0RFq29lpM,15
135
+ vectordb_bench-0.0.21.dist-info/RECORD,,