vectordb-bench 0.0.21__py3-none-any.whl → 0.0.22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -40,6 +40,7 @@ class DB(Enum):
40
40
  AliyunElasticsearch = "AliyunElasticsearch"
41
41
  Test = "test"
42
42
  AliyunOpenSearch = "AliyunOpenSearch"
43
+ MongoDB = "MongoDB"
43
44
 
44
45
  @property
45
46
  def init_cls(self) -> type[VectorDB]: # noqa: PLR0911, PLR0912, C901
@@ -129,6 +130,11 @@ class DB(Enum):
129
130
 
130
131
  return AliyunOpenSearch
131
132
 
133
+ if self == DB.MongoDB:
134
+ from .mongodb.mongodb import MongoDB
135
+
136
+ return MongoDB
137
+
132
138
  if self == DB.Test:
133
139
  from .test.test import Test
134
140
 
@@ -225,6 +231,11 @@ class DB(Enum):
225
231
 
226
232
  return AliyunOpenSearchConfig
227
233
 
234
+ if self == DB.MongoDB:
235
+ from .mongodb.config import MongoDBConfig
236
+
237
+ return MongoDBConfig
238
+
228
239
  if self == DB.Test:
229
240
  from .test.config import TestConfig
230
241
 
@@ -302,6 +313,11 @@ class DB(Enum):
302
313
 
303
314
  return AliyunOpenSearchIndexConfig
304
315
 
316
+ if self == DB.MongoDB:
317
+ from .mongodb.config import MongoDBIndexConfig
318
+
319
+ return MongoDBIndexConfig
320
+
305
321
  # DB.Pinecone, DB.Chroma, DB.Redis
306
322
  return EmptyDBCaseConfig
307
323
 
@@ -12,6 +12,7 @@ log = logging.getLogger(__name__)
12
12
 
13
13
  WAITING_FOR_REFRESH_SEC = 30
14
14
  WAITING_FOR_FORCE_MERGE_SEC = 30
15
+ SECONDS_WAITING_FOR_REPLICAS_TO_BE_ENABLED_SEC = 30
15
16
 
16
17
 
17
18
  class AWSOpenSearch(VectorDB):
@@ -52,10 +53,27 @@ class AWSOpenSearch(VectorDB):
52
53
  return AWSOpenSearchIndexConfig
53
54
 
54
55
  def _create_index(self, client: OpenSearch):
56
+ cluster_settings_body = {
57
+ "persistent": {
58
+ "knn.algo_param.index_thread_qty": self.case_config.index_thread_qty,
59
+ "knn.memory.circuit_breaker.limit": self.case_config.cb_threshold,
60
+ }
61
+ }
62
+ client.cluster.put_settings(cluster_settings_body)
55
63
  settings = {
56
64
  "index": {
57
65
  "knn": True,
66
+ "number_of_shards": self.case_config.number_of_shards,
67
+ "number_of_replicas": 0,
68
+ "translog.flush_threshold_size": self.case_config.flush_threshold_size,
69
+ # Setting trans log threshold to 5GB
70
+ **(
71
+ {"knn.algo_param.ef_search": self.case_config.ef_search}
72
+ if self.case_config.engine == AWSOS_Engine.nmslib
73
+ else {}
74
+ ),
58
75
  },
76
+ "refresh_interval": self.case_config.refresh_interval,
59
77
  }
60
78
  mappings = {
61
79
  "properties": {
@@ -145,9 +163,9 @@ class AWSOpenSearch(VectorDB):
145
163
  docvalue_fields=[self.id_col_name],
146
164
  stored_fields="_none_",
147
165
  )
148
- log.info(f"Search took: {resp['took']}")
149
- log.info(f"Search shards: {resp['_shards']}")
150
- log.info(f"Search hits total: {resp['hits']['total']}")
166
+ log.debug(f"Search took: {resp['took']}")
167
+ log.debug(f"Search shards: {resp['_shards']}")
168
+ log.debug(f"Search hits total: {resp['hits']['total']}")
151
169
  return [int(h["fields"][self.id_col_name][0]) for h in resp["hits"]["hits"]]
152
170
  except Exception as e:
153
171
  log.warning(f"Failed to search: {self.index_name} error: {e!s}")
@@ -157,12 +175,37 @@ class AWSOpenSearch(VectorDB):
157
175
  """optimize will be called between insertion and search in performance cases."""
158
176
  # Call refresh first to ensure that all segments are created
159
177
  self._refresh_index()
160
- self._do_force_merge()
178
+ if self.case_config.force_merge_enabled:
179
+ self._do_force_merge()
180
+ self._refresh_index()
181
+ self._update_replicas()
161
182
  # Call refresh again to ensure that the index is ready after force merge.
162
183
  self._refresh_index()
163
184
  # ensure that all graphs are loaded in memory and ready for search
164
185
  self._load_graphs_to_memory()
165
186
 
187
+ def _update_replicas(self):
188
+ index_settings = self.client.indices.get_settings(index=self.index_name)
189
+ current_number_of_replicas = int(index_settings[self.index_name]["settings"]["index"]["number_of_replicas"])
190
+ log.info(
191
+ f"Current Number of replicas are {current_number_of_replicas}"
192
+ f" and changing the replicas to {self.case_config.number_of_replicas}"
193
+ )
194
+ settings_body = {"index": {"number_of_replicas": self.case_config.number_of_replicas}}
195
+ self.client.indices.put_settings(index=self.index_name, body=settings_body)
196
+ self._wait_till_green()
197
+
198
+ def _wait_till_green(self):
199
+ log.info("Wait for index to become green..")
200
+ while True:
201
+ res = self.client.cat.indices(index=self.index_name, h="health", format="json")
202
+ health = res[0]["health"]
203
+ if health != "green":
204
+ break
205
+ log.info(f"The index {self.index_name} has health : {health} and is not green. Retrying")
206
+ time.sleep(SECONDS_WAITING_FOR_REPLICAS_TO_BE_ENABLED_SEC)
207
+ log.info(f"Index {self.index_name} is green..")
208
+
166
209
  def _refresh_index(self):
167
210
  log.debug(f"Starting refresh for index {self.index_name}")
168
211
  while True:
@@ -179,6 +222,12 @@ class AWSOpenSearch(VectorDB):
179
222
  log.debug(f"Completed refresh for index {self.index_name}")
180
223
 
181
224
  def _do_force_merge(self):
225
+ log.info(f"Updating the Index thread qty to {self.case_config.index_thread_qty_during_force_merge}.")
226
+
227
+ cluster_settings_body = {
228
+ "persistent": {"knn.algo_param.index_thread_qty": self.case_config.index_thread_qty_during_force_merge}
229
+ }
230
+ self.client.cluster.put_settings(cluster_settings_body)
182
231
  log.debug(f"Starting force merge for index {self.index_name}")
183
232
  force_merge_endpoint = f"/{self.index_name}/_forcemerge?max_num_segments=1&wait_for_completion=false"
184
233
  force_merge_task_id = self.client.transport.perform_request("POST", force_merge_endpoint)["task"]
@@ -18,6 +18,79 @@ class AWSOpenSearchTypedDict(TypedDict):
18
18
  port: Annotated[int, click.option("--port", type=int, default=443, help="Db Port")]
19
19
  user: Annotated[str, click.option("--user", type=str, default="admin", help="Db User")]
20
20
  password: Annotated[str, click.option("--password", type=str, help="Db password")]
21
+ number_of_shards: Annotated[
22
+ int,
23
+ click.option("--number-of-shards", type=int, help="Number of primary shards for the index", default=1),
24
+ ]
25
+ number_of_replicas: Annotated[
26
+ int,
27
+ click.option(
28
+ "--number-of-replicas", type=int, help="Number of replica copies for each primary shard", default=1
29
+ ),
30
+ ]
31
+ index_thread_qty: Annotated[
32
+ int,
33
+ click.option(
34
+ "--index-thread-qty",
35
+ type=int,
36
+ help="Thread count for native engine indexing",
37
+ default=4,
38
+ ),
39
+ ]
40
+
41
+ index_thread_qty_during_force_merge: Annotated[
42
+ int,
43
+ click.option(
44
+ "--index-thread-qty-during-force-merge",
45
+ type=int,
46
+ help="Thread count during force merge operations",
47
+ default=4,
48
+ ),
49
+ ]
50
+
51
+ number_of_indexing_clients: Annotated[
52
+ int,
53
+ click.option(
54
+ "--number-of-indexing-clients",
55
+ type=int,
56
+ help="Number of concurrent indexing clients",
57
+ default=1,
58
+ ),
59
+ ]
60
+
61
+ number_of_segments: Annotated[
62
+ int,
63
+ click.option("--number-of-segments", type=int, help="Target number of segments after merging", default=1),
64
+ ]
65
+
66
+ refresh_interval: Annotated[
67
+ int,
68
+ click.option(
69
+ "--refresh-interval", type=str, help="How often to make new data available for search", default="60s"
70
+ ),
71
+ ]
72
+
73
+ force_merge_enabled: Annotated[
74
+ int,
75
+ click.option("--force-merge-enabled", type=bool, help="Whether to perform force merge operation", default=True),
76
+ ]
77
+
78
+ flush_threshold_size: Annotated[
79
+ int,
80
+ click.option(
81
+ "--flush-threshold-size", type=str, help="Size threshold for flushing the transaction log", default="5120mb"
82
+ ),
83
+ ]
84
+
85
+ cb_threshold: Annotated[
86
+ int,
87
+ click.option(
88
+ "--cb-threshold",
89
+ type=str,
90
+ help="k-NN Memory circuit breaker threshold",
91
+ default="50%",
92
+ ),
93
+ ]
21
94
 
22
95
 
23
96
  class AWSOpenSearchHNSWTypedDict(CommonTypedDict, AWSOpenSearchTypedDict, HNSWFlavor2): ...
@@ -36,6 +109,17 @@ def AWSOpenSearch(**parameters: Unpack[AWSOpenSearchHNSWTypedDict]):
36
109
  user=parameters["user"],
37
110
  password=SecretStr(parameters["password"]),
38
111
  ),
39
- db_case_config=AWSOpenSearchIndexConfig(),
112
+ db_case_config=AWSOpenSearchIndexConfig(
113
+ number_of_shards=parameters["number_of_shards"],
114
+ number_of_replicas=parameters["number_of_replicas"],
115
+ index_thread_qty=parameters["index_thread_qty"],
116
+ number_of_segments=parameters["number_of_segments"],
117
+ refresh_interval=parameters["refresh_interval"],
118
+ force_merge_enabled=parameters["force_merge_enabled"],
119
+ flush_threshold_size=parameters["flush_threshold_size"],
120
+ number_of_indexing_clients=parameters["number_of_indexing_clients"],
121
+ index_thread_qty_during_force_merge=parameters["index_thread_qty_during_force_merge"],
122
+ cb_threshold=parameters["cb_threshold"],
123
+ ),
40
124
  **parameters,
41
125
  )
@@ -39,6 +39,16 @@ class AWSOpenSearchIndexConfig(BaseModel, DBCaseConfig):
39
39
  efConstruction: int = 256
40
40
  efSearch: int = 256
41
41
  M: int = 16
42
+ index_thread_qty: int | None = 4
43
+ number_of_shards: int | None = 1
44
+ number_of_replicas: int | None = 0
45
+ number_of_segments: int | None = 1
46
+ refresh_interval: str | None = "60s"
47
+ force_merge_enabled: bool | None = True
48
+ flush_threshold_size: str | None = "5120mb"
49
+ number_of_indexing_clients: int | None = 1
50
+ index_thread_qty_during_force_merge: int
51
+ cb_threshold: str | None = "50%"
42
52
 
43
53
  def parse_metric(self) -> str:
44
54
  if self.metric_type == MetricType.IP:
@@ -0,0 +1,53 @@
1
+ from enum import Enum
2
+
3
+ from pydantic import BaseModel, SecretStr
4
+
5
+ from ..api import DBCaseConfig, DBConfig, IndexType, MetricType
6
+
7
+
8
+ class QuantizationType(Enum):
9
+ NONE = "none"
10
+ BINARY = "binary"
11
+ SCALAR = "scalar"
12
+
13
+
14
+ class MongoDBConfig(DBConfig, BaseModel):
15
+ connection_string: SecretStr = "mongodb+srv://<user>:<password>@<cluster_name>.heatl.mongodb.net"
16
+ database: str = "vdb_bench"
17
+
18
+ def to_dict(self) -> dict:
19
+ return {
20
+ "connection_string": self.connection_string.get_secret_value(),
21
+ "database": self.database,
22
+ }
23
+
24
+
25
+ class MongoDBIndexConfig(BaseModel, DBCaseConfig):
26
+ index: IndexType = IndexType.HNSW # MongoDB uses HNSW for vector search
27
+ metric_type: MetricType = MetricType.COSINE
28
+ num_candidates_ratio: int = 10 # Default numCandidates ratio for vector search
29
+ quantization: QuantizationType = QuantizationType.NONE # Quantization type if applicable
30
+
31
+ def parse_metric(self) -> str:
32
+ if self.metric_type == MetricType.L2:
33
+ return "euclidean"
34
+ if self.metric_type == MetricType.IP:
35
+ return "dotProduct"
36
+ return "cosine" # Default to cosine similarity
37
+
38
+ def index_param(self) -> dict:
39
+ return {
40
+ "type": "vectorSearch",
41
+ "fields": [
42
+ {
43
+ "type": "vector",
44
+ "similarity": self.parse_metric(),
45
+ "numDimensions": None, # Will be set in MongoDB class
46
+ "path": "vector", # Vector field name
47
+ "quantization": self.quantization.value,
48
+ }
49
+ ],
50
+ }
51
+
52
+ def search_param(self) -> dict:
53
+ return {"num_candidates_ratio": self.num_candidates_ratio}
@@ -0,0 +1,200 @@
1
+ import logging
2
+ import time
3
+ from contextlib import contextmanager
4
+
5
+ from pymongo import MongoClient
6
+ from pymongo.operations import SearchIndexModel
7
+
8
+ from ..api import VectorDB
9
+ from .config import MongoDBIndexConfig
10
+
11
+ log = logging.getLogger(__name__)
12
+
13
+
14
+ class MongoDBError(Exception):
15
+ """Custom exception class for MongoDB client errors."""
16
+
17
+
18
+ class MongoDB(VectorDB):
19
+ def __init__(
20
+ self,
21
+ dim: int,
22
+ db_config: dict,
23
+ db_case_config: MongoDBIndexConfig,
24
+ collection_name: str = "vdb_bench_collection",
25
+ id_field: str = "id",
26
+ vector_field: str = "vector",
27
+ drop_old: bool = False,
28
+ **kwargs,
29
+ ):
30
+ self.dim = dim
31
+ self.db_config = db_config
32
+ self.case_config = db_case_config
33
+ self.collection_name = collection_name
34
+ self.id_field = id_field
35
+ self.vector_field = vector_field
36
+ self.drop_old = drop_old
37
+
38
+ # Update index dimensions
39
+ index_params = self.case_config.index_param()
40
+ log.info(f"index params: {index_params}")
41
+ index_params["fields"][0]["numDimensions"] = dim
42
+ self.index_params = index_params
43
+
44
+ # Initialize - they'll also be set in init()
45
+ uri = self.db_config["connection_string"]
46
+ self.client = MongoClient(uri)
47
+ self.db = self.client[self.db_config["database"]]
48
+ self.collection = self.db[self.collection_name]
49
+ if self.drop_old and self.collection_name in self.db.list_collection_names():
50
+ log.info(f"MongoDB client dropping old collection: {self.collection_name}")
51
+ self.db.drop_collection(self.collection_name)
52
+ self.client = None
53
+ self.db = None
54
+ self.collection = None
55
+
56
+ @contextmanager
57
+ def init(self):
58
+ """Initialize MongoDB client and cleanup when done"""
59
+ try:
60
+ uri = self.db_config["connection_string"]
61
+ self.client = MongoClient(uri)
62
+ self.db = self.client[self.db_config["database"]]
63
+ self.collection = self.db[self.collection_name]
64
+
65
+ yield
66
+ finally:
67
+ if self.client is not None:
68
+ self.client.close()
69
+ self.client = None
70
+ self.db = None
71
+ self.collection = None
72
+
73
+ def _create_index(self) -> None:
74
+ """Create vector search index"""
75
+ index_name = "vector_index"
76
+ index_params = self.index_params
77
+ log.info(f"index params {index_params}")
78
+ # drop index if already exists
79
+ if self.collection.list_indexes():
80
+ all_indexes = self.collection.list_search_indexes()
81
+ if any(idx.get("name") == index_name for idx in all_indexes):
82
+ log.info(f"Drop index: {index_name}")
83
+ try:
84
+ self.collection.drop_search_index(index_name)
85
+ while True:
86
+ indices = list(self.collection.list_search_indexes())
87
+ indices = [idx for idx in indices if idx["name"] == index_name]
88
+ log.debug(f"index status {indices}")
89
+ if len(indices) == 0:
90
+ break
91
+ log.info(f"index deleting {indices}")
92
+ except Exception:
93
+ log.exception(f"Error dropping index {index_name}")
94
+ try:
95
+ # Create vector search index
96
+ search_index = SearchIndexModel(definition=index_params, name=index_name, type="vectorSearch")
97
+
98
+ self.collection.create_search_index(search_index)
99
+ log.info(f"Created vector search index: {index_name}")
100
+ self._wait_for_index_ready(index_name)
101
+
102
+ # Create regular index on id field for faster lookups
103
+ self.collection.create_index(self.id_field)
104
+ log.info(f"Created index on {self.id_field} field")
105
+
106
+ except Exception:
107
+ log.exception(f"Error creating index {index_name}")
108
+ raise
109
+
110
+ def _wait_for_index_ready(self, index_name: str, check_interval: int = 5) -> None:
111
+ """Wait for index to be ready"""
112
+ while True:
113
+ indices = list(self.collection.list_search_indexes())
114
+ log.debug(f"index status {indices}")
115
+ if indices and any(idx.get("name") == index_name and idx.get("queryable") for idx in indices):
116
+ break
117
+ for idx in indices:
118
+ if idx.get("name") == index_name and idx.get("status") == "FAILED":
119
+ error_msg = f"Index {index_name} failed to build"
120
+ raise MongoDBError(error_msg)
121
+
122
+ time.sleep(check_interval)
123
+ log.info(f"Index {index_name} is ready")
124
+
125
+ def need_normalize_cosine(self) -> bool:
126
+ return False
127
+
128
+ def insert_embeddings(
129
+ self,
130
+ embeddings: list[list[float]],
131
+ metadata: list[int],
132
+ **kwargs,
133
+ ) -> (int, Exception | None):
134
+ """Insert embeddings into MongoDB"""
135
+
136
+ # Prepare documents in bulk
137
+ documents = [
138
+ {
139
+ self.id_field: id_,
140
+ self.vector_field: embedding,
141
+ }
142
+ for id_, embedding in zip(metadata, embeddings, strict=False)
143
+ ]
144
+
145
+ # Use ordered=False for better insert performance
146
+ try:
147
+ self.collection.insert_many(documents, ordered=False)
148
+ except Exception as e:
149
+ return 0, e
150
+ return len(documents), None
151
+
152
+ def search_embedding(
153
+ self,
154
+ query: list[float],
155
+ k: int = 100,
156
+ filters: dict | None = None,
157
+ **kwargs,
158
+ ) -> list[int]:
159
+ """Search for similar vectors"""
160
+ search_params = self.case_config.search_param()
161
+
162
+ vector_search = {"queryVector": query, "index": "vector_index", "path": self.vector_field, "limit": k}
163
+
164
+ # Add exact search parameter if specified
165
+ if search_params["exact"]:
166
+ vector_search["exact"] = True
167
+ else:
168
+ # Set numCandidates based on k value and data size
169
+ # For 50K dataset, use higher multiplier for better recall
170
+ num_candidates = min(10000, k * search_params["num_candidates_ratio"])
171
+ vector_search["numCandidates"] = num_candidates
172
+
173
+ # Add filter if specified
174
+ if filters:
175
+ log.info(f"Applying filter: {filters}")
176
+ vector_search["filter"] = {
177
+ "id": {"gte": filters["id"]},
178
+ }
179
+ pipeline = [
180
+ {"$vectorSearch": vector_search},
181
+ {
182
+ "$project": {
183
+ "_id": 0,
184
+ self.id_field: 1,
185
+ "score": {"$meta": "vectorSearchScore"}, # Include similarity score
186
+ }
187
+ },
188
+ ]
189
+
190
+ results = list(self.collection.aggregate(pipeline))
191
+ return [doc[self.id_field] for doc in results]
192
+
193
+ def optimize(self, data_size: int | None = None) -> None:
194
+ """MongoDB vector search indexes are self-optimizing"""
195
+ log.info("optimize for search")
196
+ self._create_index()
197
+ self._wait_for_index_ready("vector_index")
198
+
199
+ def ready_to_load(self) -> None:
200
+ """MongoDB is always ready to load"""
@@ -3,7 +3,7 @@ def displayParams(st):
3
3
  """
4
4
  - `Folder Path` - The path to the folder containing all the files. Please ensure that all files in the folder are in the `Parquet` format.
5
5
  - Vectors data files: The file must be named `train.parquet` and should have two columns: `id` as an incrementing `int` and `emb` as an array of `float32`.
6
- - Query test vectors: The file must be named `test.parquet` and should have two columns: `id` as an incrementing `int` and `emb` as an array of `float32`.
6
+ - Query test vectors: The file must be named `test.parquet` and should have two columns: `id` as an incrementing `int` and `emb` as an array of `float32`.
7
7
  - Ground truth file: The file must be named `neighbors.parquet` and should have two columns: `id` corresponding to query vectors and `neighbors_id` as an array of `int`.
8
8
 
9
9
  - `Train File Count` - If the vector file is too large, you can consider splitting it into multiple files. The naming format for the split files should be `train-[index]-of-[file_count].parquet`. For example, `train-01-of-10.parquet` represents the second file (0-indexed) among 10 split files.
@@ -11,3 +11,14 @@ def displayParams(st):
11
11
  - `Use Shuffled Data` - If you check this option, the vector data files need to be modified. VectorDBBench will load the data labeled with `shuffle`. For example, use `shuffle_train.parquet` instead of `train.parquet` and `shuffle_train-04-of-10.parquet` instead of `train-04-of-10.parquet`. The `id` column in the shuffled data can be in any order.
12
12
  """
13
13
  )
14
+ st.caption(
15
+ """We recommend limiting the number of test query vectors, like 1,000.""",
16
+ help="""
17
+ When conducting concurrent query tests, Vdbbench creates a large number of processes.
18
+ To minimize additional communication overhead during testing,
19
+ we prepare a complete set of test queries for each process, allowing them to run independently.\n
20
+ However, this means that as the number of concurrent processes increases,
21
+ the number of copied query vectors also increases significantly,
22
+ which can place substantial pressure on memory resources.
23
+ """,
24
+ )
@@ -1,6 +1,8 @@
1
1
  from datetime import datetime
2
+ from vectordb_bench import config
2
3
  from vectordb_bench.frontend.config import styles
3
4
  from vectordb_bench.interface import benchmark_runner
5
+ from vectordb_bench.models import TaskConfig
4
6
 
5
7
 
6
8
  def submitTask(st, tasks, isAllValid):
@@ -47,16 +49,31 @@ def advancedSettings(st):
47
49
  k = container[0].number_input("k", min_value=1, value=100, label_visibility="collapsed")
48
50
  container[1].caption("K value for number of nearest neighbors to search")
49
51
 
50
- return index_already_exists, use_aliyun, k
52
+ container = st.columns([1, 2])
53
+ defaultconcurrentInput = ",".join(map(str, config.NUM_CONCURRENCY))
54
+ concurrentInput = container[0].text_input(
55
+ "Concurrent Input", value=defaultconcurrentInput, label_visibility="collapsed"
56
+ )
57
+ container[1].caption("num of concurrencies for search tests to get max-qps")
58
+ return index_already_exists, use_aliyun, k, concurrentInput
51
59
 
52
60
 
53
- def controlPanel(st, tasks, taskLabel, isAllValid):
54
- index_already_exists, use_aliyun, k = advancedSettings(st)
61
+ def controlPanel(st, tasks: list[TaskConfig], taskLabel, isAllValid):
62
+ index_already_exists, use_aliyun, k, concurrentInput = advancedSettings(st)
55
63
 
56
64
  def runHandler():
57
65
  benchmark_runner.set_drop_old(not index_already_exists)
66
+
67
+ try:
68
+ concurrentInput_list = [int(item.strip()) for item in concurrentInput.split(",")]
69
+ except ValueError:
70
+ st.write("please input correct number")
71
+ return None
72
+
58
73
  for task in tasks:
59
74
  task.case_config.k = k
75
+ task.case_config.concurrency_search_config.num_concurrency = concurrentInput_list
76
+
60
77
  benchmark_runner.set_download_address(use_aliyun)
61
78
  benchmark_runner.run(tasks, taskLabel)
62
79
 
@@ -1041,6 +1041,26 @@ CaseConfigParamInput_NumCandidates_AliES = CaseConfigInput(
1041
1041
  )
1042
1042
 
1043
1043
 
1044
+ CaseConfigParamInput_MongoDBQuantizationType = CaseConfigInput(
1045
+ label=CaseConfigParamType.mongodb_quantization_type,
1046
+ inputType=InputType.Option,
1047
+ inputConfig={
1048
+ "options": ["none", "scalar", "binary"],
1049
+ },
1050
+ )
1051
+
1052
+
1053
+ CaseConfigParamInput_MongoDBNumCandidatesRatio = CaseConfigInput(
1054
+ label=CaseConfigParamType.mongodb_num_candidates_ratio,
1055
+ inputType=InputType.Number,
1056
+ inputConfig={
1057
+ "min": 10,
1058
+ "max": 20,
1059
+ "value": 10,
1060
+ },
1061
+ )
1062
+
1063
+
1044
1064
  MilvusLoadConfig = [
1045
1065
  CaseConfigParamInput_IndexType,
1046
1066
  CaseConfigParamInput_M,
@@ -1224,6 +1244,14 @@ AliyunElasticsearchPerformanceConfig = [
1224
1244
  CaseConfigParamInput_NumCandidates_AliES,
1225
1245
  ]
1226
1246
 
1247
+ MongoDBLoadingConfig = [
1248
+ CaseConfigParamInput_MongoDBQuantizationType,
1249
+ ]
1250
+ MongoDBPerformanceConfig = [
1251
+ CaseConfigParamInput_MongoDBQuantizationType,
1252
+ CaseConfigParamInput_MongoDBNumCandidatesRatio,
1253
+ ]
1254
+
1227
1255
  CASE_CONFIG_MAP = {
1228
1256
  DB.Milvus: {
1229
1257
  CaseLabel.Load: MilvusLoadConfig,
@@ -1272,4 +1300,8 @@ CASE_CONFIG_MAP = {
1272
1300
  CaseLabel.Load: AliyunOpensearchLoadingConfig,
1273
1301
  CaseLabel.Performance: AliyunOpenSearchPerformanceConfig,
1274
1302
  },
1303
+ DB.MongoDB: {
1304
+ CaseLabel.Load: MongoDBLoadingConfig,
1305
+ CaseLabel.Performance: MongoDBPerformanceConfig,
1306
+ },
1275
1307
  }
@@ -1,8 +1,13 @@
1
1
  import logging
2
2
  from logging import config
3
+ from pathlib import Path
3
4
 
4
5
 
5
6
  def init(log_level: str):
7
+ # Create logs directory if it doesn't exist
8
+ log_dir = Path("logs")
9
+ log_dir.mkdir(exist_ok=True)
10
+
6
11
  log_config = {
7
12
  "version": 1,
8
13
  "disable_existing_loggers": False,
@@ -24,15 +29,23 @@ def init(log_level: str):
24
29
  "class": "logging.StreamHandler",
25
30
  "formatter": "default",
26
31
  },
32
+ "file": {
33
+ "class": "logging.handlers.RotatingFileHandler",
34
+ "formatter": "default",
35
+ "filename": "logs/vectordb_bench.log",
36
+ "maxBytes": 10485760, # 10MB
37
+ "backupCount": 5,
38
+ "encoding": "utf8",
39
+ },
27
40
  },
28
41
  "loggers": {
29
42
  "vectordb_bench": {
30
- "handlers": ["console"],
43
+ "handlers": ["console", "file"],
31
44
  "level": log_level,
32
45
  "propagate": False,
33
46
  },
34
47
  "no_color": {
35
- "handlers": ["no_color_console"],
48
+ "handlers": ["no_color_console", "file"],
36
49
  "level": log_level,
37
50
  "propagate": False,
38
51
  },
vectordb_bench/models.py CHANGED
@@ -88,6 +88,10 @@ class CaseConfigParamType(Enum):
88
88
  numSearchThreads = "num_search_threads"
89
89
  maxNumPrefetchDatasets = "max_num_prefetch_datasets"
90
90
 
91
+ # mongodb params
92
+ mongodb_quantization_type = "quantization"
93
+ mongodb_num_candidates_ratio = "num_candidates_ratio"
94
+
91
95
 
92
96
  class CustomizedCase(BaseModel):
93
97
  pass
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: vectordb-bench
3
- Version: 0.0.21
3
+ Version: 0.0.22
4
4
  Summary: VectorDBBench is not just an offering of benchmark results for mainstream vector databases and cloud services, it's your go-to tool for the ultimate performance and cost-effectiveness comparison. Designed with ease-of-use in mind, VectorDBBench is devised to help users, even non-professionals, reproduce results or test new systems, making the hunt for the optimal choice amongst a plethora of cloud services and open-source vector databases a breeze.
5
5
  Author-email: XuanYang-cn <xuan.yang@zilliz.com>
6
6
  Project-URL: repository, https://github.com/zilliztech/VectorDBBench
@@ -21,7 +21,7 @@ Requires-Dist: oss2
21
21
  Requires-Dist: psutil
22
22
  Requires-Dist: polars
23
23
  Requires-Dist: plotly
24
- Requires-Dist: environs
24
+ Requires-Dist: environs<14.1.0
25
25
  Requires-Dist: pydantic<v2
26
26
  Requires-Dist: scikit-learn
27
27
  Requires-Dist: pymilvus
@@ -73,6 +73,8 @@ Requires-Dist: opensearch-py; extra == "opensearch"
73
73
  Provides-Extra: aliyun-opensearch
74
74
  Requires-Dist: alibabacloud_ha3engine_vector; extra == "aliyun-opensearch"
75
75
  Requires-Dist: alibabacloud_searchengine20211025; extra == "aliyun-opensearch"
76
+ Provides-Extra: mongodb
77
+ Requires-Dist: pymongo; extra == "mongodb"
76
78
 
77
79
  # VectorDBBench: A Benchmark Tool for VectorDB
78
80
 
@@ -89,6 +91,8 @@ Closely mimicking real-world production environments, we've set up diverse testi
89
91
 
90
92
  Prepare to delve into the world of VectorDBBench, and let it guide you in uncovering your perfect vector database match.
91
93
 
94
+ VectorDBBench is sponsered by Zilliz,the leading opensource vectorDB company behind Milvus. Choose smarter with VectorDBBench- start your free test on [zilliz cloud](https://zilliz.com/) today!
95
+
92
96
  **Leaderboard:** https://zilliz.com/benchmark
93
97
  ## Quick Start
94
98
  ### Prerequirement
@@ -128,6 +132,7 @@ All the database client supported
128
132
  | chromadb | `pip install vectordb-bench[chromadb]` |
129
133
  | awsopensearch | `pip install vectordb-bench[opensearch]` |
130
134
  | aliyun_opensearch | `pip install vectordb-bench[aliyun_opensearch]` |
135
+ | mongodb | `pip install vectordb-bench[mongodb]` |
131
136
 
132
137
  ### Run
133
138
 
@@ -228,6 +233,47 @@ Options:
228
233
  with-gt]
229
234
  --help Show this message and exit.
230
235
  ```
236
+
237
+ ### Run awsopensearch from command line
238
+
239
+ ```shell
240
+ vectordbbench awsopensearch --db-label awsopensearch \
241
+ --m 16 --ef-construction 256 \
242
+ --host search-vector-db-prod-h4f6m4of6x7yp2rz7gdmots7w4.us-west-2.es.amazonaws.com --port 443 \
243
+ --user vector --password '<password>' \
244
+ --case-type Performance1536D5M --num-insert-workers 10 \
245
+ --skip-load --num-concurrency 75
246
+ ```
247
+
248
+ To list the options for awsopensearch, execute `vectordbbench awsopensearch --help`
249
+
250
+ ```text
251
+ $ vectordbbench awsopensearch --help
252
+ Usage: vectordbbench awsopensearch [OPTIONS]
253
+
254
+ Options:
255
+ # Sharding and Replication
256
+ --number-of-shards INTEGER Number of primary shards for the index
257
+ --number-of-replicas INTEGER Number of replica copies for each primary
258
+ shard
259
+ # Indexing Performance
260
+ --index-thread-qty INTEGER Thread count for native engine indexing
261
+ --index-thread-qty-during-force-merge INTEGER
262
+ Thread count during force merge operations
263
+ --number-of-indexing-clients INTEGER
264
+ Number of concurrent indexing clients
265
+ # Index Management
266
+ --number-of-segments INTEGER Target number of segments after merging
267
+ --refresh-interval TEXT How often to make new data available for
268
+ search
269
+ --force-merge-enabled BOOLEAN Whether to perform force merge operation
270
+ --flush-threshold-size TEXT Size threshold for flushing the transaction
271
+ log
272
+ # Memory Management
273
+ --cb-threshold TEXT k-NN Memory circuit breaker threshold
274
+
275
+ --help Show this message and exit.```
276
+
231
277
  #### Using a configuration file.
232
278
 
233
279
  The vectordbbench command can optionally read some or all the options from a yaml formatted configuration file.
@@ -394,6 +440,13 @@ We have strict requirements for the data set format, please follow them.
394
440
  - `Folder Path` - The path to the folder containing all the files. Please ensure that all files in the folder are in the `Parquet` format.
395
441
  - Vectors data files: The file must be named `train.parquet` and should have two columns: `id` as an incrementing `int` and `emb` as an array of `float32`.
396
442
  - Query test vectors: The file must be named `test.parquet` and should have two columns: `id` as an incrementing `int` and `emb` as an array of `float32`.
443
+ - We recommend limiting the number of test query vectors, like 1,000.
444
+ When conducting concurrent query tests, Vdbbench creates a large number of processes.
445
+ To minimize additional communication overhead during testing,
446
+ we prepare a complete set of test queries for each process, allowing them to run independently.
447
+ However, this means that as the number of concurrent processes increases,
448
+ the number of copied query vectors also increases significantly,
449
+ which can place substantial pressure on memory resources.
397
450
  - Ground truth file: The file must be named `neighbors.parquet` and should have two columns: `id` corresponding to query vectors and `neighbors_id` as an array of `int`.
398
451
 
399
452
  - `Train File Count` - If the vector file is too large, you can consider splitting it into multiple files. The naming format for the split files should be `train-[index]-of-[file_count].parquet`. For example, `train-01-of-10.parquet` represents the second file (0-indexed) among 10 split files.
@@ -2,9 +2,9 @@ vectordb_bench/__init__.py,sha256=d5psAfISw9F6PFL2xPlSYUKKFDw7ifQm7g3LWC8_yUA,23
2
2
  vectordb_bench/__main__.py,sha256=cyYbVSU-zA1AgzneGKcRRuzR4ftRDr9sIi9Ei9NZnhI,858
3
3
  vectordb_bench/base.py,sha256=AgavIF0P9ku_RmCRk1KKziba-wI4ZpA2aJvjJzNhRSs,129
4
4
  vectordb_bench/interface.py,sha256=XaCjTgUeI17uVjsgOauPeVlkvnkuCyQOWyOaWhrgCt8,9811
5
- vectordb_bench/log_util.py,sha256=hOdK0TnrcpYZOrRZoBslievXSW8qtTvLvube43rxbVc,2776
5
+ vectordb_bench/log_util.py,sha256=wDNaU_JBBOfKi_Z4vq7LDa0kOlLjoNNzDX3VZQn_Dxo,3239
6
6
  vectordb_bench/metric.py,sha256=pj-AxQHyIRHTaJY-wTIkTbC6TqEqMzt3kcEmMWEv71w,2063
7
- vectordb_bench/models.py,sha256=5N4-0lJLWpoR6NnzX4ONuH7vyi4nRFFuNS0q9jQ4cgM,11023
7
+ vectordb_bench/models.py,sha256=1G6GDTtP0TbYT2Qhw_cJ84kx_Wkn4gq6V_EUVpCJW3Y,11150
8
8
  vectordb_bench/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
9
  vectordb_bench/backend/assembler.py,sha256=6GInRT7yBgfTaIPmo-XMkYX4pA8PJQmjMQInynwaunE,2047
10
10
  vectordb_bench/backend/cases.py,sha256=obDdY6g3p9Z2fog7qDwLLDuRMwo3LGQKMHsP66QZd2M,16296
@@ -13,7 +13,7 @@ vectordb_bench/backend/dataset.py,sha256=V4OKPt23v0kmdvgJwDr_R2fLJv3lXLZEii992cE
13
13
  vectordb_bench/backend/result_collector.py,sha256=mpROVdZ-HChKBVyMV5TZ5v7YGRb69bvfT7Gezn5F5sY,819
14
14
  vectordb_bench/backend/task_runner.py,sha256=vlaXB0_25-G9w1Lj-F0SrvJzhXT7ceDWGIb2aKRXukU,11488
15
15
  vectordb_bench/backend/utils.py,sha256=R6THuJdZhiQYSSJTqv0Uegl2B20taV_QjwvFrun2yxE,1949
16
- vectordb_bench/backend/clients/__init__.py,sha256=YvXoI8CS69WOTLl5n858xkRReeqiiRFBETZAAnD14qg,8212
16
+ vectordb_bench/backend/clients/__init__.py,sha256=ncF4H4foep04EdpzQHQ6zqDxTL1cSvUmglt-v2E966g,8595
17
17
  vectordb_bench/backend/clients/api.py,sha256=uQaX_FiMFlD3z_91awUzB-qtBkvyDsMKE8ks5bBgJSY,6233
18
18
  vectordb_bench/backend/clients/aliyun_elasticsearch/aliyun_elasticsearch.py,sha256=7yPYaWoHeHNxDMtpReGXsdEPFD1e4vQblFor7TmLq5o,770
19
19
  vectordb_bench/backend/clients/aliyun_elasticsearch/config.py,sha256=d9RCgfCgauKvy6z9ig_wBormgwiGtkh8POyoHloHnJA,505
@@ -22,9 +22,9 @@ vectordb_bench/backend/clients/aliyun_opensearch/config.py,sha256=KSiuRu-p7oL2PE
22
22
  vectordb_bench/backend/clients/alloydb/alloydb.py,sha256=E24hxCUgpBCRiScdcS_iBk8n0wngUgVg8qujOWiUhw0,13009
23
23
  vectordb_bench/backend/clients/alloydb/cli.py,sha256=G6Q0WApoDXDG_pqmK2lEKFIvKB8qAsZFPM8TfsURydE,5086
24
24
  vectordb_bench/backend/clients/alloydb/config.py,sha256=PJs2wIJqwcG6UJ3T8R7Pi3xTMBfxTZiNkcWyhtHv5dc,5313
25
- vectordb_bench/backend/clients/aws_opensearch/aws_opensearch.py,sha256=PAFP5W6k0GxbpvvIFs8u3pMi7x-sLw9cm4mUsM2agws,7541
26
- vectordb_bench/backend/clients/aws_opensearch/cli.py,sha256=845dUYcD5m9j9FNOCXAspgCkvOKWWJXt2k87I55Odbs,1301
27
- vectordb_bench/backend/clients/aws_opensearch/config.py,sha256=XsQBKGBPR0lqH3XrQpijwgt9Tfb6KBVg35x2L9LQQMw,1881
25
+ vectordb_bench/backend/clients/aws_opensearch/aws_opensearch.py,sha256=iRtPmHZoVTpQ-3Q90nE70zy_XsklGlSSNgBOgeAtVzU,10047
26
+ vectordb_bench/backend/clients/aws_opensearch/cli.py,sha256=G086STCoaTBkz2J5Qt42bnyhmcYbhl6XxTaLfeirkXQ,4065
27
+ vectordb_bench/backend/clients/aws_opensearch/config.py,sha256=9meXQUOVFlk3UOAhvBhaghNm7TasDsA6-fXOY8C9gzU,2295
28
28
  vectordb_bench/backend/clients/aws_opensearch/run.py,sha256=Ry5aAlielWjq0hx7LnbdShfOwzZhz3Gq9WYu5U43x9s,5001
29
29
  vectordb_bench/backend/clients/chroma/chroma.py,sha256=TGsmAnG5I3bbIjJ5L7ktke6fD8lOrx56Wt2tMCb3dY8,3609
30
30
  vectordb_bench/backend/clients/chroma/config.py,sha256=8nXpPdecQ5HrNqcsQwAVgacSz6uLgI-BI7v4tB8CeDk,347
@@ -36,6 +36,8 @@ vectordb_bench/backend/clients/memorydb/memorydb.py,sha256=WrZhDYJqpwN173sk2lmPn
36
36
  vectordb_bench/backend/clients/milvus/cli.py,sha256=xGvYYKOAs32vz78oB5Ks_xnWIMzcl_f7TPEPRk94FeQ,8895
37
37
  vectordb_bench/backend/clients/milvus/config.py,sha256=oFZ5VG5UHws161M1cYmMr2b9NSEoqwwst998T59QGQo,7520
38
38
  vectordb_bench/backend/clients/milvus/milvus.py,sha256=xdVVjMnBzD5KGJ7iUB-B3SuTL4JDW1UD15QBevExMLw,6862
39
+ vectordb_bench/backend/clients/mongodb/config.py,sha256=7DZCh0bjPiqJW2luPypfpNeGfvKxVC4mdHLqgcjF1hA,1745
40
+ vectordb_bench/backend/clients/mongodb/mongodb.py,sha256=ts2gpAzUTarpkfMFnM5ANi6T-xvcjS8kc4-apPt9jug,7225
39
41
  vectordb_bench/backend/clients/pgdiskann/cli.py,sha256=o5ddAp1Be2TOnm8Wh9IyIWUxdnw5N6v92Ms1s6CEwBo,3135
40
42
  vectordb_bench/backend/clients/pgdiskann/config.py,sha256=DBsVgLn4edl-irSlP_GV7KW-8jFemns_ujR_CuVnQtE,4412
41
43
  vectordb_bench/backend/clients/pgdiskann/pgdiskann.py,sha256=Z8K74Y6uMi6q8gnnD68doBxc5pWBSpRnNLDhlifseH4,12299
@@ -88,7 +90,7 @@ vectordb_bench/frontend/components/check_results/priceTable.py,sha256=K3NmlNKAb-
88
90
  vectordb_bench/frontend/components/check_results/stPageConfig.py,sha256=czkqr9NC3UQAxiz8KSCZC8cPmgSnFUhI2lOLHXfuMxo,432
89
91
  vectordb_bench/frontend/components/concurrent/charts.py,sha256=00WI8wxIdHAhnpmFJLd03n5U3LbowmeY4swVbGNzyYg,2874
90
92
  vectordb_bench/frontend/components/custom/displayCustomCase.py,sha256=aIWKFm13-EPG2XlJ3PWc2znR6q8A5FR93D5ZkGGncrM,1641
91
- vectordb_bench/frontend/components/custom/displaypPrams.py,sha256=GNs-awcbYAyOTgmnsFl_EmwjJHhD8EoN86a1-iQfCnc,1335
93
+ vectordb_bench/frontend/components/custom/displaypPrams.py,sha256=mwm74_86YYRbpJ1Hz2Dba0eKvyzkK0DM7uhjBDFoElU,1910
92
94
  vectordb_bench/frontend/components/custom/getCustomConfig.py,sha256=tSPI2DPJSNxlArLcO5Kf9nhpIBc0_YE2QD9-1cbaLus,1031
93
95
  vectordb_bench/frontend/components/custom/initStyle.py,sha256=ortsoUNqH-vVq9ECiw80PnBEcIaUwxR1AQ65DSkBhGs,434
94
96
  vectordb_bench/frontend/components/get_results/saveAsImage.py,sha256=POaFiwKoCGqrY-zhanWC7-tubE64bV_JjqI4lgIuMts,1459
@@ -99,9 +101,9 @@ vectordb_bench/frontend/components/run_test/dbSelector.py,sha256=hzMEIL1DzvpP8xk
99
101
  vectordb_bench/frontend/components/run_test/generateTasks.py,sha256=3y8NHtWJMNqoP2SvoWuR7kj84g0OEg68IULebimzz7E,741
100
102
  vectordb_bench/frontend/components/run_test/hideSidebar.py,sha256=vb5kzIMmbMqWX67qFEHek21X4sGO_tPyn_uPqUEtp3Q,234
101
103
  vectordb_bench/frontend/components/run_test/initStyle.py,sha256=osPUgfFfH7rRlVNHSMumvmZxvKWlLxmZiNqgnMiUJEU,723
102
- vectordb_bench/frontend/components/run_test/submitTask.py,sha256=8Ka8n7eviZi56BXfcrpsUqhLcYUHgyif0zzQ2w2hHMw,3328
104
+ vectordb_bench/frontend/components/run_test/submitTask.py,sha256=VZjkopkCBNhqLwGqsoM0hbPEeF6Q5UOQcdFUaegerxc,4094
103
105
  vectordb_bench/frontend/components/tables/data.py,sha256=5DdnC64BB7Aj2z9acht2atsPB4NabzQCZKALfIUnqtQ,1233
104
- vectordb_bench/frontend/config/dbCaseConfigs.py,sha256=cjFLhmHaNJKhPsAplKnjIO7ypZnlSw7S83N-WXI_gUQ,37781
106
+ vectordb_bench/frontend/config/dbCaseConfigs.py,sha256=vHYdD3Tzi4NRD5ckJfkciIqZcMZY_Qslm_sUolKLxro,38597
105
107
  vectordb_bench/frontend/config/dbPrices.py,sha256=10aBKjVcEg8y7TPSda28opmBM1KmXNrvbU9WM_BsZcE,176
106
108
  vectordb_bench/frontend/config/styles.py,sha256=E2PmwmiewxBKJJ59hQ4ZXatqg8QTN-Z53JlsvWMHM2M,2291
107
109
  vectordb_bench/frontend/pages/concurrent.py,sha256=bvoSafRSIsRzBQkI3uBwwrdg8jnhRUQG-epZbrJhGiE,2082
@@ -127,9 +129,9 @@ vectordb_bench/results/WeaviateCloud/result_20230808_standard_weaviatecloud.json
127
129
  vectordb_bench/results/ZillizCloud/result_20230727_standard_zillizcloud.json,sha256=-Mdm4By65XDRCrmVOCF8yQXjcZtH4Xo4shcjoDoBUKU,18293
128
130
  vectordb_bench/results/ZillizCloud/result_20230808_standard_zillizcloud.json,sha256=77XlHT5zM_K7mG5HfDQKwXZnSCuR37VUbt6-P3J_amI,15737
129
131
  vectordb_bench/results/ZillizCloud/result_20240105_standard_202401_zillizcloud.json,sha256=TualfJ0664Hs-vdIW68bdkqAEYyzotXmu2P0yIN-GHk,42526
130
- vectordb_bench-0.0.21.dist-info/LICENSE,sha256=HXbxhrb5u5SegVzeLNF_voVgRsJMavcLaOmD1N0lZkM,1067
131
- vectordb_bench-0.0.21.dist-info/METADATA,sha256=SDCFG-7cwQRoLe3mrvjCQNslK1Ju8aw0VQ_Kc8408hw,34577
132
- vectordb_bench-0.0.21.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
133
- vectordb_bench-0.0.21.dist-info/entry_points.txt,sha256=Qzw6gVx96ui8esG21H6yHsI6nboEohRmV424TYhQNrA,113
134
- vectordb_bench-0.0.21.dist-info/top_level.txt,sha256=jnhZFZAuKX1J60yt-XOeBZ__ctiZMvoC_s0RFq29lpM,15
135
- vectordb_bench-0.0.21.dist-info/RECORD,,
132
+ vectordb_bench-0.0.22.dist-info/LICENSE,sha256=HXbxhrb5u5SegVzeLNF_voVgRsJMavcLaOmD1N0lZkM,1067
133
+ vectordb_bench-0.0.22.dist-info/METADATA,sha256=fSJ2cTKb1oLQcXYVQc8cTHoMkEqOelusy0a_VEPuvPo,37166
134
+ vectordb_bench-0.0.22.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
135
+ vectordb_bench-0.0.22.dist-info/entry_points.txt,sha256=Qzw6gVx96ui8esG21H6yHsI6nboEohRmV424TYhQNrA,113
136
+ vectordb_bench-0.0.22.dist-info/top_level.txt,sha256=jnhZFZAuKX1J60yt-XOeBZ__ctiZMvoC_s0RFq29lpM,15
137
+ vectordb_bench-0.0.22.dist-info/RECORD,,