vectordb-bench 0.0.21__py3-none-any.whl → 0.0.22__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vectordb_bench/backend/clients/__init__.py +16 -0
- vectordb_bench/backend/clients/aws_opensearch/aws_opensearch.py +53 -4
- vectordb_bench/backend/clients/aws_opensearch/cli.py +85 -1
- vectordb_bench/backend/clients/aws_opensearch/config.py +10 -0
- vectordb_bench/backend/clients/mongodb/config.py +53 -0
- vectordb_bench/backend/clients/mongodb/mongodb.py +200 -0
- vectordb_bench/frontend/components/custom/displaypPrams.py +12 -1
- vectordb_bench/frontend/components/run_test/submitTask.py +20 -3
- vectordb_bench/frontend/config/dbCaseConfigs.py +32 -0
- vectordb_bench/log_util.py +15 -2
- vectordb_bench/models.py +4 -0
- {vectordb_bench-0.0.21.dist-info → vectordb_bench-0.0.22.dist-info}/METADATA +55 -2
- {vectordb_bench-0.0.21.dist-info → vectordb_bench-0.0.22.dist-info}/RECORD +17 -15
- {vectordb_bench-0.0.21.dist-info → vectordb_bench-0.0.22.dist-info}/LICENSE +0 -0
- {vectordb_bench-0.0.21.dist-info → vectordb_bench-0.0.22.dist-info}/WHEEL +0 -0
- {vectordb_bench-0.0.21.dist-info → vectordb_bench-0.0.22.dist-info}/entry_points.txt +0 -0
- {vectordb_bench-0.0.21.dist-info → vectordb_bench-0.0.22.dist-info}/top_level.txt +0 -0
@@ -40,6 +40,7 @@ class DB(Enum):
|
|
40
40
|
AliyunElasticsearch = "AliyunElasticsearch"
|
41
41
|
Test = "test"
|
42
42
|
AliyunOpenSearch = "AliyunOpenSearch"
|
43
|
+
MongoDB = "MongoDB"
|
43
44
|
|
44
45
|
@property
|
45
46
|
def init_cls(self) -> type[VectorDB]: # noqa: PLR0911, PLR0912, C901
|
@@ -129,6 +130,11 @@ class DB(Enum):
|
|
129
130
|
|
130
131
|
return AliyunOpenSearch
|
131
132
|
|
133
|
+
if self == DB.MongoDB:
|
134
|
+
from .mongodb.mongodb import MongoDB
|
135
|
+
|
136
|
+
return MongoDB
|
137
|
+
|
132
138
|
if self == DB.Test:
|
133
139
|
from .test.test import Test
|
134
140
|
|
@@ -225,6 +231,11 @@ class DB(Enum):
|
|
225
231
|
|
226
232
|
return AliyunOpenSearchConfig
|
227
233
|
|
234
|
+
if self == DB.MongoDB:
|
235
|
+
from .mongodb.config import MongoDBConfig
|
236
|
+
|
237
|
+
return MongoDBConfig
|
238
|
+
|
228
239
|
if self == DB.Test:
|
229
240
|
from .test.config import TestConfig
|
230
241
|
|
@@ -302,6 +313,11 @@ class DB(Enum):
|
|
302
313
|
|
303
314
|
return AliyunOpenSearchIndexConfig
|
304
315
|
|
316
|
+
if self == DB.MongoDB:
|
317
|
+
from .mongodb.config import MongoDBIndexConfig
|
318
|
+
|
319
|
+
return MongoDBIndexConfig
|
320
|
+
|
305
321
|
# DB.Pinecone, DB.Chroma, DB.Redis
|
306
322
|
return EmptyDBCaseConfig
|
307
323
|
|
@@ -12,6 +12,7 @@ log = logging.getLogger(__name__)
|
|
12
12
|
|
13
13
|
WAITING_FOR_REFRESH_SEC = 30
|
14
14
|
WAITING_FOR_FORCE_MERGE_SEC = 30
|
15
|
+
SECONDS_WAITING_FOR_REPLICAS_TO_BE_ENABLED_SEC = 30
|
15
16
|
|
16
17
|
|
17
18
|
class AWSOpenSearch(VectorDB):
|
@@ -52,10 +53,27 @@ class AWSOpenSearch(VectorDB):
|
|
52
53
|
return AWSOpenSearchIndexConfig
|
53
54
|
|
54
55
|
def _create_index(self, client: OpenSearch):
|
56
|
+
cluster_settings_body = {
|
57
|
+
"persistent": {
|
58
|
+
"knn.algo_param.index_thread_qty": self.case_config.index_thread_qty,
|
59
|
+
"knn.memory.circuit_breaker.limit": self.case_config.cb_threshold,
|
60
|
+
}
|
61
|
+
}
|
62
|
+
client.cluster.put_settings(cluster_settings_body)
|
55
63
|
settings = {
|
56
64
|
"index": {
|
57
65
|
"knn": True,
|
66
|
+
"number_of_shards": self.case_config.number_of_shards,
|
67
|
+
"number_of_replicas": 0,
|
68
|
+
"translog.flush_threshold_size": self.case_config.flush_threshold_size,
|
69
|
+
# Setting trans log threshold to 5GB
|
70
|
+
**(
|
71
|
+
{"knn.algo_param.ef_search": self.case_config.ef_search}
|
72
|
+
if self.case_config.engine == AWSOS_Engine.nmslib
|
73
|
+
else {}
|
74
|
+
),
|
58
75
|
},
|
76
|
+
"refresh_interval": self.case_config.refresh_interval,
|
59
77
|
}
|
60
78
|
mappings = {
|
61
79
|
"properties": {
|
@@ -145,9 +163,9 @@ class AWSOpenSearch(VectorDB):
|
|
145
163
|
docvalue_fields=[self.id_col_name],
|
146
164
|
stored_fields="_none_",
|
147
165
|
)
|
148
|
-
log.
|
149
|
-
log.
|
150
|
-
log.
|
166
|
+
log.debug(f"Search took: {resp['took']}")
|
167
|
+
log.debug(f"Search shards: {resp['_shards']}")
|
168
|
+
log.debug(f"Search hits total: {resp['hits']['total']}")
|
151
169
|
return [int(h["fields"][self.id_col_name][0]) for h in resp["hits"]["hits"]]
|
152
170
|
except Exception as e:
|
153
171
|
log.warning(f"Failed to search: {self.index_name} error: {e!s}")
|
@@ -157,12 +175,37 @@ class AWSOpenSearch(VectorDB):
|
|
157
175
|
"""optimize will be called between insertion and search in performance cases."""
|
158
176
|
# Call refresh first to ensure that all segments are created
|
159
177
|
self._refresh_index()
|
160
|
-
self.
|
178
|
+
if self.case_config.force_merge_enabled:
|
179
|
+
self._do_force_merge()
|
180
|
+
self._refresh_index()
|
181
|
+
self._update_replicas()
|
161
182
|
# Call refresh again to ensure that the index is ready after force merge.
|
162
183
|
self._refresh_index()
|
163
184
|
# ensure that all graphs are loaded in memory and ready for search
|
164
185
|
self._load_graphs_to_memory()
|
165
186
|
|
187
|
+
def _update_replicas(self):
|
188
|
+
index_settings = self.client.indices.get_settings(index=self.index_name)
|
189
|
+
current_number_of_replicas = int(index_settings[self.index_name]["settings"]["index"]["number_of_replicas"])
|
190
|
+
log.info(
|
191
|
+
f"Current Number of replicas are {current_number_of_replicas}"
|
192
|
+
f" and changing the replicas to {self.case_config.number_of_replicas}"
|
193
|
+
)
|
194
|
+
settings_body = {"index": {"number_of_replicas": self.case_config.number_of_replicas}}
|
195
|
+
self.client.indices.put_settings(index=self.index_name, body=settings_body)
|
196
|
+
self._wait_till_green()
|
197
|
+
|
198
|
+
def _wait_till_green(self):
|
199
|
+
log.info("Wait for index to become green..")
|
200
|
+
while True:
|
201
|
+
res = self.client.cat.indices(index=self.index_name, h="health", format="json")
|
202
|
+
health = res[0]["health"]
|
203
|
+
if health != "green":
|
204
|
+
break
|
205
|
+
log.info(f"The index {self.index_name} has health : {health} and is not green. Retrying")
|
206
|
+
time.sleep(SECONDS_WAITING_FOR_REPLICAS_TO_BE_ENABLED_SEC)
|
207
|
+
log.info(f"Index {self.index_name} is green..")
|
208
|
+
|
166
209
|
def _refresh_index(self):
|
167
210
|
log.debug(f"Starting refresh for index {self.index_name}")
|
168
211
|
while True:
|
@@ -179,6 +222,12 @@ class AWSOpenSearch(VectorDB):
|
|
179
222
|
log.debug(f"Completed refresh for index {self.index_name}")
|
180
223
|
|
181
224
|
def _do_force_merge(self):
|
225
|
+
log.info(f"Updating the Index thread qty to {self.case_config.index_thread_qty_during_force_merge}.")
|
226
|
+
|
227
|
+
cluster_settings_body = {
|
228
|
+
"persistent": {"knn.algo_param.index_thread_qty": self.case_config.index_thread_qty_during_force_merge}
|
229
|
+
}
|
230
|
+
self.client.cluster.put_settings(cluster_settings_body)
|
182
231
|
log.debug(f"Starting force merge for index {self.index_name}")
|
183
232
|
force_merge_endpoint = f"/{self.index_name}/_forcemerge?max_num_segments=1&wait_for_completion=false"
|
184
233
|
force_merge_task_id = self.client.transport.perform_request("POST", force_merge_endpoint)["task"]
|
@@ -18,6 +18,79 @@ class AWSOpenSearchTypedDict(TypedDict):
|
|
18
18
|
port: Annotated[int, click.option("--port", type=int, default=443, help="Db Port")]
|
19
19
|
user: Annotated[str, click.option("--user", type=str, default="admin", help="Db User")]
|
20
20
|
password: Annotated[str, click.option("--password", type=str, help="Db password")]
|
21
|
+
number_of_shards: Annotated[
|
22
|
+
int,
|
23
|
+
click.option("--number-of-shards", type=int, help="Number of primary shards for the index", default=1),
|
24
|
+
]
|
25
|
+
number_of_replicas: Annotated[
|
26
|
+
int,
|
27
|
+
click.option(
|
28
|
+
"--number-of-replicas", type=int, help="Number of replica copies for each primary shard", default=1
|
29
|
+
),
|
30
|
+
]
|
31
|
+
index_thread_qty: Annotated[
|
32
|
+
int,
|
33
|
+
click.option(
|
34
|
+
"--index-thread-qty",
|
35
|
+
type=int,
|
36
|
+
help="Thread count for native engine indexing",
|
37
|
+
default=4,
|
38
|
+
),
|
39
|
+
]
|
40
|
+
|
41
|
+
index_thread_qty_during_force_merge: Annotated[
|
42
|
+
int,
|
43
|
+
click.option(
|
44
|
+
"--index-thread-qty-during-force-merge",
|
45
|
+
type=int,
|
46
|
+
help="Thread count during force merge operations",
|
47
|
+
default=4,
|
48
|
+
),
|
49
|
+
]
|
50
|
+
|
51
|
+
number_of_indexing_clients: Annotated[
|
52
|
+
int,
|
53
|
+
click.option(
|
54
|
+
"--number-of-indexing-clients",
|
55
|
+
type=int,
|
56
|
+
help="Number of concurrent indexing clients",
|
57
|
+
default=1,
|
58
|
+
),
|
59
|
+
]
|
60
|
+
|
61
|
+
number_of_segments: Annotated[
|
62
|
+
int,
|
63
|
+
click.option("--number-of-segments", type=int, help="Target number of segments after merging", default=1),
|
64
|
+
]
|
65
|
+
|
66
|
+
refresh_interval: Annotated[
|
67
|
+
int,
|
68
|
+
click.option(
|
69
|
+
"--refresh-interval", type=str, help="How often to make new data available for search", default="60s"
|
70
|
+
),
|
71
|
+
]
|
72
|
+
|
73
|
+
force_merge_enabled: Annotated[
|
74
|
+
int,
|
75
|
+
click.option("--force-merge-enabled", type=bool, help="Whether to perform force merge operation", default=True),
|
76
|
+
]
|
77
|
+
|
78
|
+
flush_threshold_size: Annotated[
|
79
|
+
int,
|
80
|
+
click.option(
|
81
|
+
"--flush-threshold-size", type=str, help="Size threshold for flushing the transaction log", default="5120mb"
|
82
|
+
),
|
83
|
+
]
|
84
|
+
|
85
|
+
cb_threshold: Annotated[
|
86
|
+
int,
|
87
|
+
click.option(
|
88
|
+
"--cb-threshold",
|
89
|
+
type=str,
|
90
|
+
help="k-NN Memory circuit breaker threshold",
|
91
|
+
default="50%",
|
92
|
+
),
|
93
|
+
]
|
21
94
|
|
22
95
|
|
23
96
|
class AWSOpenSearchHNSWTypedDict(CommonTypedDict, AWSOpenSearchTypedDict, HNSWFlavor2): ...
|
@@ -36,6 +109,17 @@ def AWSOpenSearch(**parameters: Unpack[AWSOpenSearchHNSWTypedDict]):
|
|
36
109
|
user=parameters["user"],
|
37
110
|
password=SecretStr(parameters["password"]),
|
38
111
|
),
|
39
|
-
db_case_config=AWSOpenSearchIndexConfig(
|
112
|
+
db_case_config=AWSOpenSearchIndexConfig(
|
113
|
+
number_of_shards=parameters["number_of_shards"],
|
114
|
+
number_of_replicas=parameters["number_of_replicas"],
|
115
|
+
index_thread_qty=parameters["index_thread_qty"],
|
116
|
+
number_of_segments=parameters["number_of_segments"],
|
117
|
+
refresh_interval=parameters["refresh_interval"],
|
118
|
+
force_merge_enabled=parameters["force_merge_enabled"],
|
119
|
+
flush_threshold_size=parameters["flush_threshold_size"],
|
120
|
+
number_of_indexing_clients=parameters["number_of_indexing_clients"],
|
121
|
+
index_thread_qty_during_force_merge=parameters["index_thread_qty_during_force_merge"],
|
122
|
+
cb_threshold=parameters["cb_threshold"],
|
123
|
+
),
|
40
124
|
**parameters,
|
41
125
|
)
|
@@ -39,6 +39,16 @@ class AWSOpenSearchIndexConfig(BaseModel, DBCaseConfig):
|
|
39
39
|
efConstruction: int = 256
|
40
40
|
efSearch: int = 256
|
41
41
|
M: int = 16
|
42
|
+
index_thread_qty: int | None = 4
|
43
|
+
number_of_shards: int | None = 1
|
44
|
+
number_of_replicas: int | None = 0
|
45
|
+
number_of_segments: int | None = 1
|
46
|
+
refresh_interval: str | None = "60s"
|
47
|
+
force_merge_enabled: bool | None = True
|
48
|
+
flush_threshold_size: str | None = "5120mb"
|
49
|
+
number_of_indexing_clients: int | None = 1
|
50
|
+
index_thread_qty_during_force_merge: int
|
51
|
+
cb_threshold: str | None = "50%"
|
42
52
|
|
43
53
|
def parse_metric(self) -> str:
|
44
54
|
if self.metric_type == MetricType.IP:
|
@@ -0,0 +1,53 @@
|
|
1
|
+
from enum import Enum
|
2
|
+
|
3
|
+
from pydantic import BaseModel, SecretStr
|
4
|
+
|
5
|
+
from ..api import DBCaseConfig, DBConfig, IndexType, MetricType
|
6
|
+
|
7
|
+
|
8
|
+
class QuantizationType(Enum):
|
9
|
+
NONE = "none"
|
10
|
+
BINARY = "binary"
|
11
|
+
SCALAR = "scalar"
|
12
|
+
|
13
|
+
|
14
|
+
class MongoDBConfig(DBConfig, BaseModel):
|
15
|
+
connection_string: SecretStr = "mongodb+srv://<user>:<password>@<cluster_name>.heatl.mongodb.net"
|
16
|
+
database: str = "vdb_bench"
|
17
|
+
|
18
|
+
def to_dict(self) -> dict:
|
19
|
+
return {
|
20
|
+
"connection_string": self.connection_string.get_secret_value(),
|
21
|
+
"database": self.database,
|
22
|
+
}
|
23
|
+
|
24
|
+
|
25
|
+
class MongoDBIndexConfig(BaseModel, DBCaseConfig):
|
26
|
+
index: IndexType = IndexType.HNSW # MongoDB uses HNSW for vector search
|
27
|
+
metric_type: MetricType = MetricType.COSINE
|
28
|
+
num_candidates_ratio: int = 10 # Default numCandidates ratio for vector search
|
29
|
+
quantization: QuantizationType = QuantizationType.NONE # Quantization type if applicable
|
30
|
+
|
31
|
+
def parse_metric(self) -> str:
|
32
|
+
if self.metric_type == MetricType.L2:
|
33
|
+
return "euclidean"
|
34
|
+
if self.metric_type == MetricType.IP:
|
35
|
+
return "dotProduct"
|
36
|
+
return "cosine" # Default to cosine similarity
|
37
|
+
|
38
|
+
def index_param(self) -> dict:
|
39
|
+
return {
|
40
|
+
"type": "vectorSearch",
|
41
|
+
"fields": [
|
42
|
+
{
|
43
|
+
"type": "vector",
|
44
|
+
"similarity": self.parse_metric(),
|
45
|
+
"numDimensions": None, # Will be set in MongoDB class
|
46
|
+
"path": "vector", # Vector field name
|
47
|
+
"quantization": self.quantization.value,
|
48
|
+
}
|
49
|
+
],
|
50
|
+
}
|
51
|
+
|
52
|
+
def search_param(self) -> dict:
|
53
|
+
return {"num_candidates_ratio": self.num_candidates_ratio}
|
@@ -0,0 +1,200 @@
|
|
1
|
+
import logging
|
2
|
+
import time
|
3
|
+
from contextlib import contextmanager
|
4
|
+
|
5
|
+
from pymongo import MongoClient
|
6
|
+
from pymongo.operations import SearchIndexModel
|
7
|
+
|
8
|
+
from ..api import VectorDB
|
9
|
+
from .config import MongoDBIndexConfig
|
10
|
+
|
11
|
+
log = logging.getLogger(__name__)
|
12
|
+
|
13
|
+
|
14
|
+
class MongoDBError(Exception):
|
15
|
+
"""Custom exception class for MongoDB client errors."""
|
16
|
+
|
17
|
+
|
18
|
+
class MongoDB(VectorDB):
|
19
|
+
def __init__(
|
20
|
+
self,
|
21
|
+
dim: int,
|
22
|
+
db_config: dict,
|
23
|
+
db_case_config: MongoDBIndexConfig,
|
24
|
+
collection_name: str = "vdb_bench_collection",
|
25
|
+
id_field: str = "id",
|
26
|
+
vector_field: str = "vector",
|
27
|
+
drop_old: bool = False,
|
28
|
+
**kwargs,
|
29
|
+
):
|
30
|
+
self.dim = dim
|
31
|
+
self.db_config = db_config
|
32
|
+
self.case_config = db_case_config
|
33
|
+
self.collection_name = collection_name
|
34
|
+
self.id_field = id_field
|
35
|
+
self.vector_field = vector_field
|
36
|
+
self.drop_old = drop_old
|
37
|
+
|
38
|
+
# Update index dimensions
|
39
|
+
index_params = self.case_config.index_param()
|
40
|
+
log.info(f"index params: {index_params}")
|
41
|
+
index_params["fields"][0]["numDimensions"] = dim
|
42
|
+
self.index_params = index_params
|
43
|
+
|
44
|
+
# Initialize - they'll also be set in init()
|
45
|
+
uri = self.db_config["connection_string"]
|
46
|
+
self.client = MongoClient(uri)
|
47
|
+
self.db = self.client[self.db_config["database"]]
|
48
|
+
self.collection = self.db[self.collection_name]
|
49
|
+
if self.drop_old and self.collection_name in self.db.list_collection_names():
|
50
|
+
log.info(f"MongoDB client dropping old collection: {self.collection_name}")
|
51
|
+
self.db.drop_collection(self.collection_name)
|
52
|
+
self.client = None
|
53
|
+
self.db = None
|
54
|
+
self.collection = None
|
55
|
+
|
56
|
+
@contextmanager
|
57
|
+
def init(self):
|
58
|
+
"""Initialize MongoDB client and cleanup when done"""
|
59
|
+
try:
|
60
|
+
uri = self.db_config["connection_string"]
|
61
|
+
self.client = MongoClient(uri)
|
62
|
+
self.db = self.client[self.db_config["database"]]
|
63
|
+
self.collection = self.db[self.collection_name]
|
64
|
+
|
65
|
+
yield
|
66
|
+
finally:
|
67
|
+
if self.client is not None:
|
68
|
+
self.client.close()
|
69
|
+
self.client = None
|
70
|
+
self.db = None
|
71
|
+
self.collection = None
|
72
|
+
|
73
|
+
def _create_index(self) -> None:
|
74
|
+
"""Create vector search index"""
|
75
|
+
index_name = "vector_index"
|
76
|
+
index_params = self.index_params
|
77
|
+
log.info(f"index params {index_params}")
|
78
|
+
# drop index if already exists
|
79
|
+
if self.collection.list_indexes():
|
80
|
+
all_indexes = self.collection.list_search_indexes()
|
81
|
+
if any(idx.get("name") == index_name for idx in all_indexes):
|
82
|
+
log.info(f"Drop index: {index_name}")
|
83
|
+
try:
|
84
|
+
self.collection.drop_search_index(index_name)
|
85
|
+
while True:
|
86
|
+
indices = list(self.collection.list_search_indexes())
|
87
|
+
indices = [idx for idx in indices if idx["name"] == index_name]
|
88
|
+
log.debug(f"index status {indices}")
|
89
|
+
if len(indices) == 0:
|
90
|
+
break
|
91
|
+
log.info(f"index deleting {indices}")
|
92
|
+
except Exception:
|
93
|
+
log.exception(f"Error dropping index {index_name}")
|
94
|
+
try:
|
95
|
+
# Create vector search index
|
96
|
+
search_index = SearchIndexModel(definition=index_params, name=index_name, type="vectorSearch")
|
97
|
+
|
98
|
+
self.collection.create_search_index(search_index)
|
99
|
+
log.info(f"Created vector search index: {index_name}")
|
100
|
+
self._wait_for_index_ready(index_name)
|
101
|
+
|
102
|
+
# Create regular index on id field for faster lookups
|
103
|
+
self.collection.create_index(self.id_field)
|
104
|
+
log.info(f"Created index on {self.id_field} field")
|
105
|
+
|
106
|
+
except Exception:
|
107
|
+
log.exception(f"Error creating index {index_name}")
|
108
|
+
raise
|
109
|
+
|
110
|
+
def _wait_for_index_ready(self, index_name: str, check_interval: int = 5) -> None:
|
111
|
+
"""Wait for index to be ready"""
|
112
|
+
while True:
|
113
|
+
indices = list(self.collection.list_search_indexes())
|
114
|
+
log.debug(f"index status {indices}")
|
115
|
+
if indices and any(idx.get("name") == index_name and idx.get("queryable") for idx in indices):
|
116
|
+
break
|
117
|
+
for idx in indices:
|
118
|
+
if idx.get("name") == index_name and idx.get("status") == "FAILED":
|
119
|
+
error_msg = f"Index {index_name} failed to build"
|
120
|
+
raise MongoDBError(error_msg)
|
121
|
+
|
122
|
+
time.sleep(check_interval)
|
123
|
+
log.info(f"Index {index_name} is ready")
|
124
|
+
|
125
|
+
def need_normalize_cosine(self) -> bool:
|
126
|
+
return False
|
127
|
+
|
128
|
+
def insert_embeddings(
|
129
|
+
self,
|
130
|
+
embeddings: list[list[float]],
|
131
|
+
metadata: list[int],
|
132
|
+
**kwargs,
|
133
|
+
) -> (int, Exception | None):
|
134
|
+
"""Insert embeddings into MongoDB"""
|
135
|
+
|
136
|
+
# Prepare documents in bulk
|
137
|
+
documents = [
|
138
|
+
{
|
139
|
+
self.id_field: id_,
|
140
|
+
self.vector_field: embedding,
|
141
|
+
}
|
142
|
+
for id_, embedding in zip(metadata, embeddings, strict=False)
|
143
|
+
]
|
144
|
+
|
145
|
+
# Use ordered=False for better insert performance
|
146
|
+
try:
|
147
|
+
self.collection.insert_many(documents, ordered=False)
|
148
|
+
except Exception as e:
|
149
|
+
return 0, e
|
150
|
+
return len(documents), None
|
151
|
+
|
152
|
+
def search_embedding(
|
153
|
+
self,
|
154
|
+
query: list[float],
|
155
|
+
k: int = 100,
|
156
|
+
filters: dict | None = None,
|
157
|
+
**kwargs,
|
158
|
+
) -> list[int]:
|
159
|
+
"""Search for similar vectors"""
|
160
|
+
search_params = self.case_config.search_param()
|
161
|
+
|
162
|
+
vector_search = {"queryVector": query, "index": "vector_index", "path": self.vector_field, "limit": k}
|
163
|
+
|
164
|
+
# Add exact search parameter if specified
|
165
|
+
if search_params["exact"]:
|
166
|
+
vector_search["exact"] = True
|
167
|
+
else:
|
168
|
+
# Set numCandidates based on k value and data size
|
169
|
+
# For 50K dataset, use higher multiplier for better recall
|
170
|
+
num_candidates = min(10000, k * search_params["num_candidates_ratio"])
|
171
|
+
vector_search["numCandidates"] = num_candidates
|
172
|
+
|
173
|
+
# Add filter if specified
|
174
|
+
if filters:
|
175
|
+
log.info(f"Applying filter: {filters}")
|
176
|
+
vector_search["filter"] = {
|
177
|
+
"id": {"gte": filters["id"]},
|
178
|
+
}
|
179
|
+
pipeline = [
|
180
|
+
{"$vectorSearch": vector_search},
|
181
|
+
{
|
182
|
+
"$project": {
|
183
|
+
"_id": 0,
|
184
|
+
self.id_field: 1,
|
185
|
+
"score": {"$meta": "vectorSearchScore"}, # Include similarity score
|
186
|
+
}
|
187
|
+
},
|
188
|
+
]
|
189
|
+
|
190
|
+
results = list(self.collection.aggregate(pipeline))
|
191
|
+
return [doc[self.id_field] for doc in results]
|
192
|
+
|
193
|
+
def optimize(self, data_size: int | None = None) -> None:
|
194
|
+
"""MongoDB vector search indexes are self-optimizing"""
|
195
|
+
log.info("optimize for search")
|
196
|
+
self._create_index()
|
197
|
+
self._wait_for_index_ready("vector_index")
|
198
|
+
|
199
|
+
def ready_to_load(self) -> None:
|
200
|
+
"""MongoDB is always ready to load"""
|
@@ -3,7 +3,7 @@ def displayParams(st):
|
|
3
3
|
"""
|
4
4
|
- `Folder Path` - The path to the folder containing all the files. Please ensure that all files in the folder are in the `Parquet` format.
|
5
5
|
- Vectors data files: The file must be named `train.parquet` and should have two columns: `id` as an incrementing `int` and `emb` as an array of `float32`.
|
6
|
-
- Query test vectors: The file must be named `test.parquet` and should have two columns: `id` as an incrementing `int` and `emb` as an array of `float32`.
|
6
|
+
- Query test vectors: The file must be named `test.parquet` and should have two columns: `id` as an incrementing `int` and `emb` as an array of `float32`.
|
7
7
|
- Ground truth file: The file must be named `neighbors.parquet` and should have two columns: `id` corresponding to query vectors and `neighbors_id` as an array of `int`.
|
8
8
|
|
9
9
|
- `Train File Count` - If the vector file is too large, you can consider splitting it into multiple files. The naming format for the split files should be `train-[index]-of-[file_count].parquet`. For example, `train-01-of-10.parquet` represents the second file (0-indexed) among 10 split files.
|
@@ -11,3 +11,14 @@ def displayParams(st):
|
|
11
11
|
- `Use Shuffled Data` - If you check this option, the vector data files need to be modified. VectorDBBench will load the data labeled with `shuffle`. For example, use `shuffle_train.parquet` instead of `train.parquet` and `shuffle_train-04-of-10.parquet` instead of `train-04-of-10.parquet`. The `id` column in the shuffled data can be in any order.
|
12
12
|
"""
|
13
13
|
)
|
14
|
+
st.caption(
|
15
|
+
"""We recommend limiting the number of test query vectors, like 1,000.""",
|
16
|
+
help="""
|
17
|
+
When conducting concurrent query tests, Vdbbench creates a large number of processes.
|
18
|
+
To minimize additional communication overhead during testing,
|
19
|
+
we prepare a complete set of test queries for each process, allowing them to run independently.\n
|
20
|
+
However, this means that as the number of concurrent processes increases,
|
21
|
+
the number of copied query vectors also increases significantly,
|
22
|
+
which can place substantial pressure on memory resources.
|
23
|
+
""",
|
24
|
+
)
|
@@ -1,6 +1,8 @@
|
|
1
1
|
from datetime import datetime
|
2
|
+
from vectordb_bench import config
|
2
3
|
from vectordb_bench.frontend.config import styles
|
3
4
|
from vectordb_bench.interface import benchmark_runner
|
5
|
+
from vectordb_bench.models import TaskConfig
|
4
6
|
|
5
7
|
|
6
8
|
def submitTask(st, tasks, isAllValid):
|
@@ -47,16 +49,31 @@ def advancedSettings(st):
|
|
47
49
|
k = container[0].number_input("k", min_value=1, value=100, label_visibility="collapsed")
|
48
50
|
container[1].caption("K value for number of nearest neighbors to search")
|
49
51
|
|
50
|
-
|
52
|
+
container = st.columns([1, 2])
|
53
|
+
defaultconcurrentInput = ",".join(map(str, config.NUM_CONCURRENCY))
|
54
|
+
concurrentInput = container[0].text_input(
|
55
|
+
"Concurrent Input", value=defaultconcurrentInput, label_visibility="collapsed"
|
56
|
+
)
|
57
|
+
container[1].caption("num of concurrencies for search tests to get max-qps")
|
58
|
+
return index_already_exists, use_aliyun, k, concurrentInput
|
51
59
|
|
52
60
|
|
53
|
-
def controlPanel(st, tasks, taskLabel, isAllValid):
|
54
|
-
index_already_exists, use_aliyun, k = advancedSettings(st)
|
61
|
+
def controlPanel(st, tasks: list[TaskConfig], taskLabel, isAllValid):
|
62
|
+
index_already_exists, use_aliyun, k, concurrentInput = advancedSettings(st)
|
55
63
|
|
56
64
|
def runHandler():
|
57
65
|
benchmark_runner.set_drop_old(not index_already_exists)
|
66
|
+
|
67
|
+
try:
|
68
|
+
concurrentInput_list = [int(item.strip()) for item in concurrentInput.split(",")]
|
69
|
+
except ValueError:
|
70
|
+
st.write("please input correct number")
|
71
|
+
return None
|
72
|
+
|
58
73
|
for task in tasks:
|
59
74
|
task.case_config.k = k
|
75
|
+
task.case_config.concurrency_search_config.num_concurrency = concurrentInput_list
|
76
|
+
|
60
77
|
benchmark_runner.set_download_address(use_aliyun)
|
61
78
|
benchmark_runner.run(tasks, taskLabel)
|
62
79
|
|
@@ -1041,6 +1041,26 @@ CaseConfigParamInput_NumCandidates_AliES = CaseConfigInput(
|
|
1041
1041
|
)
|
1042
1042
|
|
1043
1043
|
|
1044
|
+
CaseConfigParamInput_MongoDBQuantizationType = CaseConfigInput(
|
1045
|
+
label=CaseConfigParamType.mongodb_quantization_type,
|
1046
|
+
inputType=InputType.Option,
|
1047
|
+
inputConfig={
|
1048
|
+
"options": ["none", "scalar", "binary"],
|
1049
|
+
},
|
1050
|
+
)
|
1051
|
+
|
1052
|
+
|
1053
|
+
CaseConfigParamInput_MongoDBNumCandidatesRatio = CaseConfigInput(
|
1054
|
+
label=CaseConfigParamType.mongodb_num_candidates_ratio,
|
1055
|
+
inputType=InputType.Number,
|
1056
|
+
inputConfig={
|
1057
|
+
"min": 10,
|
1058
|
+
"max": 20,
|
1059
|
+
"value": 10,
|
1060
|
+
},
|
1061
|
+
)
|
1062
|
+
|
1063
|
+
|
1044
1064
|
MilvusLoadConfig = [
|
1045
1065
|
CaseConfigParamInput_IndexType,
|
1046
1066
|
CaseConfigParamInput_M,
|
@@ -1224,6 +1244,14 @@ AliyunElasticsearchPerformanceConfig = [
|
|
1224
1244
|
CaseConfigParamInput_NumCandidates_AliES,
|
1225
1245
|
]
|
1226
1246
|
|
1247
|
+
MongoDBLoadingConfig = [
|
1248
|
+
CaseConfigParamInput_MongoDBQuantizationType,
|
1249
|
+
]
|
1250
|
+
MongoDBPerformanceConfig = [
|
1251
|
+
CaseConfigParamInput_MongoDBQuantizationType,
|
1252
|
+
CaseConfigParamInput_MongoDBNumCandidatesRatio,
|
1253
|
+
]
|
1254
|
+
|
1227
1255
|
CASE_CONFIG_MAP = {
|
1228
1256
|
DB.Milvus: {
|
1229
1257
|
CaseLabel.Load: MilvusLoadConfig,
|
@@ -1272,4 +1300,8 @@ CASE_CONFIG_MAP = {
|
|
1272
1300
|
CaseLabel.Load: AliyunOpensearchLoadingConfig,
|
1273
1301
|
CaseLabel.Performance: AliyunOpenSearchPerformanceConfig,
|
1274
1302
|
},
|
1303
|
+
DB.MongoDB: {
|
1304
|
+
CaseLabel.Load: MongoDBLoadingConfig,
|
1305
|
+
CaseLabel.Performance: MongoDBPerformanceConfig,
|
1306
|
+
},
|
1275
1307
|
}
|
vectordb_bench/log_util.py
CHANGED
@@ -1,8 +1,13 @@
|
|
1
1
|
import logging
|
2
2
|
from logging import config
|
3
|
+
from pathlib import Path
|
3
4
|
|
4
5
|
|
5
6
|
def init(log_level: str):
|
7
|
+
# Create logs directory if it doesn't exist
|
8
|
+
log_dir = Path("logs")
|
9
|
+
log_dir.mkdir(exist_ok=True)
|
10
|
+
|
6
11
|
log_config = {
|
7
12
|
"version": 1,
|
8
13
|
"disable_existing_loggers": False,
|
@@ -24,15 +29,23 @@ def init(log_level: str):
|
|
24
29
|
"class": "logging.StreamHandler",
|
25
30
|
"formatter": "default",
|
26
31
|
},
|
32
|
+
"file": {
|
33
|
+
"class": "logging.handlers.RotatingFileHandler",
|
34
|
+
"formatter": "default",
|
35
|
+
"filename": "logs/vectordb_bench.log",
|
36
|
+
"maxBytes": 10485760, # 10MB
|
37
|
+
"backupCount": 5,
|
38
|
+
"encoding": "utf8",
|
39
|
+
},
|
27
40
|
},
|
28
41
|
"loggers": {
|
29
42
|
"vectordb_bench": {
|
30
|
-
"handlers": ["console"],
|
43
|
+
"handlers": ["console", "file"],
|
31
44
|
"level": log_level,
|
32
45
|
"propagate": False,
|
33
46
|
},
|
34
47
|
"no_color": {
|
35
|
-
"handlers": ["no_color_console"],
|
48
|
+
"handlers": ["no_color_console", "file"],
|
36
49
|
"level": log_level,
|
37
50
|
"propagate": False,
|
38
51
|
},
|
vectordb_bench/models.py
CHANGED
@@ -88,6 +88,10 @@ class CaseConfigParamType(Enum):
|
|
88
88
|
numSearchThreads = "num_search_threads"
|
89
89
|
maxNumPrefetchDatasets = "max_num_prefetch_datasets"
|
90
90
|
|
91
|
+
# mongodb params
|
92
|
+
mongodb_quantization_type = "quantization"
|
93
|
+
mongodb_num_candidates_ratio = "num_candidates_ratio"
|
94
|
+
|
91
95
|
|
92
96
|
class CustomizedCase(BaseModel):
|
93
97
|
pass
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: vectordb-bench
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.22
|
4
4
|
Summary: VectorDBBench is not just an offering of benchmark results for mainstream vector databases and cloud services, it's your go-to tool for the ultimate performance and cost-effectiveness comparison. Designed with ease-of-use in mind, VectorDBBench is devised to help users, even non-professionals, reproduce results or test new systems, making the hunt for the optimal choice amongst a plethora of cloud services and open-source vector databases a breeze.
|
5
5
|
Author-email: XuanYang-cn <xuan.yang@zilliz.com>
|
6
6
|
Project-URL: repository, https://github.com/zilliztech/VectorDBBench
|
@@ -21,7 +21,7 @@ Requires-Dist: oss2
|
|
21
21
|
Requires-Dist: psutil
|
22
22
|
Requires-Dist: polars
|
23
23
|
Requires-Dist: plotly
|
24
|
-
Requires-Dist: environs
|
24
|
+
Requires-Dist: environs<14.1.0
|
25
25
|
Requires-Dist: pydantic<v2
|
26
26
|
Requires-Dist: scikit-learn
|
27
27
|
Requires-Dist: pymilvus
|
@@ -73,6 +73,8 @@ Requires-Dist: opensearch-py; extra == "opensearch"
|
|
73
73
|
Provides-Extra: aliyun-opensearch
|
74
74
|
Requires-Dist: alibabacloud_ha3engine_vector; extra == "aliyun-opensearch"
|
75
75
|
Requires-Dist: alibabacloud_searchengine20211025; extra == "aliyun-opensearch"
|
76
|
+
Provides-Extra: mongodb
|
77
|
+
Requires-Dist: pymongo; extra == "mongodb"
|
76
78
|
|
77
79
|
# VectorDBBench: A Benchmark Tool for VectorDB
|
78
80
|
|
@@ -89,6 +91,8 @@ Closely mimicking real-world production environments, we've set up diverse testi
|
|
89
91
|
|
90
92
|
Prepare to delve into the world of VectorDBBench, and let it guide you in uncovering your perfect vector database match.
|
91
93
|
|
94
|
+
VectorDBBench is sponsered by Zilliz,the leading opensource vectorDB company behind Milvus. Choose smarter with VectorDBBench- start your free test on [zilliz cloud](https://zilliz.com/) today!
|
95
|
+
|
92
96
|
**Leaderboard:** https://zilliz.com/benchmark
|
93
97
|
## Quick Start
|
94
98
|
### Prerequirement
|
@@ -128,6 +132,7 @@ All the database client supported
|
|
128
132
|
| chromadb | `pip install vectordb-bench[chromadb]` |
|
129
133
|
| awsopensearch | `pip install vectordb-bench[opensearch]` |
|
130
134
|
| aliyun_opensearch | `pip install vectordb-bench[aliyun_opensearch]` |
|
135
|
+
| mongodb | `pip install vectordb-bench[mongodb]` |
|
131
136
|
|
132
137
|
### Run
|
133
138
|
|
@@ -228,6 +233,47 @@ Options:
|
|
228
233
|
with-gt]
|
229
234
|
--help Show this message and exit.
|
230
235
|
```
|
236
|
+
|
237
|
+
### Run awsopensearch from command line
|
238
|
+
|
239
|
+
```shell
|
240
|
+
vectordbbench awsopensearch --db-label awsopensearch \
|
241
|
+
--m 16 --ef-construction 256 \
|
242
|
+
--host search-vector-db-prod-h4f6m4of6x7yp2rz7gdmots7w4.us-west-2.es.amazonaws.com --port 443 \
|
243
|
+
--user vector --password '<password>' \
|
244
|
+
--case-type Performance1536D5M --num-insert-workers 10 \
|
245
|
+
--skip-load --num-concurrency 75
|
246
|
+
```
|
247
|
+
|
248
|
+
To list the options for awsopensearch, execute `vectordbbench awsopensearch --help`
|
249
|
+
|
250
|
+
```text
|
251
|
+
$ vectordbbench awsopensearch --help
|
252
|
+
Usage: vectordbbench awsopensearch [OPTIONS]
|
253
|
+
|
254
|
+
Options:
|
255
|
+
# Sharding and Replication
|
256
|
+
--number-of-shards INTEGER Number of primary shards for the index
|
257
|
+
--number-of-replicas INTEGER Number of replica copies for each primary
|
258
|
+
shard
|
259
|
+
# Indexing Performance
|
260
|
+
--index-thread-qty INTEGER Thread count for native engine indexing
|
261
|
+
--index-thread-qty-during-force-merge INTEGER
|
262
|
+
Thread count during force merge operations
|
263
|
+
--number-of-indexing-clients INTEGER
|
264
|
+
Number of concurrent indexing clients
|
265
|
+
# Index Management
|
266
|
+
--number-of-segments INTEGER Target number of segments after merging
|
267
|
+
--refresh-interval TEXT How often to make new data available for
|
268
|
+
search
|
269
|
+
--force-merge-enabled BOOLEAN Whether to perform force merge operation
|
270
|
+
--flush-threshold-size TEXT Size threshold for flushing the transaction
|
271
|
+
log
|
272
|
+
# Memory Management
|
273
|
+
--cb-threshold TEXT k-NN Memory circuit breaker threshold
|
274
|
+
|
275
|
+
--help Show this message and exit.```
|
276
|
+
|
231
277
|
#### Using a configuration file.
|
232
278
|
|
233
279
|
The vectordbbench command can optionally read some or all the options from a yaml formatted configuration file.
|
@@ -394,6 +440,13 @@ We have strict requirements for the data set format, please follow them.
|
|
394
440
|
- `Folder Path` - The path to the folder containing all the files. Please ensure that all files in the folder are in the `Parquet` format.
|
395
441
|
- Vectors data files: The file must be named `train.parquet` and should have two columns: `id` as an incrementing `int` and `emb` as an array of `float32`.
|
396
442
|
- Query test vectors: The file must be named `test.parquet` and should have two columns: `id` as an incrementing `int` and `emb` as an array of `float32`.
|
443
|
+
- We recommend limiting the number of test query vectors, like 1,000.
|
444
|
+
When conducting concurrent query tests, Vdbbench creates a large number of processes.
|
445
|
+
To minimize additional communication overhead during testing,
|
446
|
+
we prepare a complete set of test queries for each process, allowing them to run independently.
|
447
|
+
However, this means that as the number of concurrent processes increases,
|
448
|
+
the number of copied query vectors also increases significantly,
|
449
|
+
which can place substantial pressure on memory resources.
|
397
450
|
- Ground truth file: The file must be named `neighbors.parquet` and should have two columns: `id` corresponding to query vectors and `neighbors_id` as an array of `int`.
|
398
451
|
|
399
452
|
- `Train File Count` - If the vector file is too large, you can consider splitting it into multiple files. The naming format for the split files should be `train-[index]-of-[file_count].parquet`. For example, `train-01-of-10.parquet` represents the second file (0-indexed) among 10 split files.
|
@@ -2,9 +2,9 @@ vectordb_bench/__init__.py,sha256=d5psAfISw9F6PFL2xPlSYUKKFDw7ifQm7g3LWC8_yUA,23
|
|
2
2
|
vectordb_bench/__main__.py,sha256=cyYbVSU-zA1AgzneGKcRRuzR4ftRDr9sIi9Ei9NZnhI,858
|
3
3
|
vectordb_bench/base.py,sha256=AgavIF0P9ku_RmCRk1KKziba-wI4ZpA2aJvjJzNhRSs,129
|
4
4
|
vectordb_bench/interface.py,sha256=XaCjTgUeI17uVjsgOauPeVlkvnkuCyQOWyOaWhrgCt8,9811
|
5
|
-
vectordb_bench/log_util.py,sha256=
|
5
|
+
vectordb_bench/log_util.py,sha256=wDNaU_JBBOfKi_Z4vq7LDa0kOlLjoNNzDX3VZQn_Dxo,3239
|
6
6
|
vectordb_bench/metric.py,sha256=pj-AxQHyIRHTaJY-wTIkTbC6TqEqMzt3kcEmMWEv71w,2063
|
7
|
-
vectordb_bench/models.py,sha256=
|
7
|
+
vectordb_bench/models.py,sha256=1G6GDTtP0TbYT2Qhw_cJ84kx_Wkn4gq6V_EUVpCJW3Y,11150
|
8
8
|
vectordb_bench/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
9
9
|
vectordb_bench/backend/assembler.py,sha256=6GInRT7yBgfTaIPmo-XMkYX4pA8PJQmjMQInynwaunE,2047
|
10
10
|
vectordb_bench/backend/cases.py,sha256=obDdY6g3p9Z2fog7qDwLLDuRMwo3LGQKMHsP66QZd2M,16296
|
@@ -13,7 +13,7 @@ vectordb_bench/backend/dataset.py,sha256=V4OKPt23v0kmdvgJwDr_R2fLJv3lXLZEii992cE
|
|
13
13
|
vectordb_bench/backend/result_collector.py,sha256=mpROVdZ-HChKBVyMV5TZ5v7YGRb69bvfT7Gezn5F5sY,819
|
14
14
|
vectordb_bench/backend/task_runner.py,sha256=vlaXB0_25-G9w1Lj-F0SrvJzhXT7ceDWGIb2aKRXukU,11488
|
15
15
|
vectordb_bench/backend/utils.py,sha256=R6THuJdZhiQYSSJTqv0Uegl2B20taV_QjwvFrun2yxE,1949
|
16
|
-
vectordb_bench/backend/clients/__init__.py,sha256=
|
16
|
+
vectordb_bench/backend/clients/__init__.py,sha256=ncF4H4foep04EdpzQHQ6zqDxTL1cSvUmglt-v2E966g,8595
|
17
17
|
vectordb_bench/backend/clients/api.py,sha256=uQaX_FiMFlD3z_91awUzB-qtBkvyDsMKE8ks5bBgJSY,6233
|
18
18
|
vectordb_bench/backend/clients/aliyun_elasticsearch/aliyun_elasticsearch.py,sha256=7yPYaWoHeHNxDMtpReGXsdEPFD1e4vQblFor7TmLq5o,770
|
19
19
|
vectordb_bench/backend/clients/aliyun_elasticsearch/config.py,sha256=d9RCgfCgauKvy6z9ig_wBormgwiGtkh8POyoHloHnJA,505
|
@@ -22,9 +22,9 @@ vectordb_bench/backend/clients/aliyun_opensearch/config.py,sha256=KSiuRu-p7oL2PE
|
|
22
22
|
vectordb_bench/backend/clients/alloydb/alloydb.py,sha256=E24hxCUgpBCRiScdcS_iBk8n0wngUgVg8qujOWiUhw0,13009
|
23
23
|
vectordb_bench/backend/clients/alloydb/cli.py,sha256=G6Q0WApoDXDG_pqmK2lEKFIvKB8qAsZFPM8TfsURydE,5086
|
24
24
|
vectordb_bench/backend/clients/alloydb/config.py,sha256=PJs2wIJqwcG6UJ3T8R7Pi3xTMBfxTZiNkcWyhtHv5dc,5313
|
25
|
-
vectordb_bench/backend/clients/aws_opensearch/aws_opensearch.py,sha256=
|
26
|
-
vectordb_bench/backend/clients/aws_opensearch/cli.py,sha256=
|
27
|
-
vectordb_bench/backend/clients/aws_opensearch/config.py,sha256=
|
25
|
+
vectordb_bench/backend/clients/aws_opensearch/aws_opensearch.py,sha256=iRtPmHZoVTpQ-3Q90nE70zy_XsklGlSSNgBOgeAtVzU,10047
|
26
|
+
vectordb_bench/backend/clients/aws_opensearch/cli.py,sha256=G086STCoaTBkz2J5Qt42bnyhmcYbhl6XxTaLfeirkXQ,4065
|
27
|
+
vectordb_bench/backend/clients/aws_opensearch/config.py,sha256=9meXQUOVFlk3UOAhvBhaghNm7TasDsA6-fXOY8C9gzU,2295
|
28
28
|
vectordb_bench/backend/clients/aws_opensearch/run.py,sha256=Ry5aAlielWjq0hx7LnbdShfOwzZhz3Gq9WYu5U43x9s,5001
|
29
29
|
vectordb_bench/backend/clients/chroma/chroma.py,sha256=TGsmAnG5I3bbIjJ5L7ktke6fD8lOrx56Wt2tMCb3dY8,3609
|
30
30
|
vectordb_bench/backend/clients/chroma/config.py,sha256=8nXpPdecQ5HrNqcsQwAVgacSz6uLgI-BI7v4tB8CeDk,347
|
@@ -36,6 +36,8 @@ vectordb_bench/backend/clients/memorydb/memorydb.py,sha256=WrZhDYJqpwN173sk2lmPn
|
|
36
36
|
vectordb_bench/backend/clients/milvus/cli.py,sha256=xGvYYKOAs32vz78oB5Ks_xnWIMzcl_f7TPEPRk94FeQ,8895
|
37
37
|
vectordb_bench/backend/clients/milvus/config.py,sha256=oFZ5VG5UHws161M1cYmMr2b9NSEoqwwst998T59QGQo,7520
|
38
38
|
vectordb_bench/backend/clients/milvus/milvus.py,sha256=xdVVjMnBzD5KGJ7iUB-B3SuTL4JDW1UD15QBevExMLw,6862
|
39
|
+
vectordb_bench/backend/clients/mongodb/config.py,sha256=7DZCh0bjPiqJW2luPypfpNeGfvKxVC4mdHLqgcjF1hA,1745
|
40
|
+
vectordb_bench/backend/clients/mongodb/mongodb.py,sha256=ts2gpAzUTarpkfMFnM5ANi6T-xvcjS8kc4-apPt9jug,7225
|
39
41
|
vectordb_bench/backend/clients/pgdiskann/cli.py,sha256=o5ddAp1Be2TOnm8Wh9IyIWUxdnw5N6v92Ms1s6CEwBo,3135
|
40
42
|
vectordb_bench/backend/clients/pgdiskann/config.py,sha256=DBsVgLn4edl-irSlP_GV7KW-8jFemns_ujR_CuVnQtE,4412
|
41
43
|
vectordb_bench/backend/clients/pgdiskann/pgdiskann.py,sha256=Z8K74Y6uMi6q8gnnD68doBxc5pWBSpRnNLDhlifseH4,12299
|
@@ -88,7 +90,7 @@ vectordb_bench/frontend/components/check_results/priceTable.py,sha256=K3NmlNKAb-
|
|
88
90
|
vectordb_bench/frontend/components/check_results/stPageConfig.py,sha256=czkqr9NC3UQAxiz8KSCZC8cPmgSnFUhI2lOLHXfuMxo,432
|
89
91
|
vectordb_bench/frontend/components/concurrent/charts.py,sha256=00WI8wxIdHAhnpmFJLd03n5U3LbowmeY4swVbGNzyYg,2874
|
90
92
|
vectordb_bench/frontend/components/custom/displayCustomCase.py,sha256=aIWKFm13-EPG2XlJ3PWc2znR6q8A5FR93D5ZkGGncrM,1641
|
91
|
-
vectordb_bench/frontend/components/custom/displaypPrams.py,sha256=
|
93
|
+
vectordb_bench/frontend/components/custom/displaypPrams.py,sha256=mwm74_86YYRbpJ1Hz2Dba0eKvyzkK0DM7uhjBDFoElU,1910
|
92
94
|
vectordb_bench/frontend/components/custom/getCustomConfig.py,sha256=tSPI2DPJSNxlArLcO5Kf9nhpIBc0_YE2QD9-1cbaLus,1031
|
93
95
|
vectordb_bench/frontend/components/custom/initStyle.py,sha256=ortsoUNqH-vVq9ECiw80PnBEcIaUwxR1AQ65DSkBhGs,434
|
94
96
|
vectordb_bench/frontend/components/get_results/saveAsImage.py,sha256=POaFiwKoCGqrY-zhanWC7-tubE64bV_JjqI4lgIuMts,1459
|
@@ -99,9 +101,9 @@ vectordb_bench/frontend/components/run_test/dbSelector.py,sha256=hzMEIL1DzvpP8xk
|
|
99
101
|
vectordb_bench/frontend/components/run_test/generateTasks.py,sha256=3y8NHtWJMNqoP2SvoWuR7kj84g0OEg68IULebimzz7E,741
|
100
102
|
vectordb_bench/frontend/components/run_test/hideSidebar.py,sha256=vb5kzIMmbMqWX67qFEHek21X4sGO_tPyn_uPqUEtp3Q,234
|
101
103
|
vectordb_bench/frontend/components/run_test/initStyle.py,sha256=osPUgfFfH7rRlVNHSMumvmZxvKWlLxmZiNqgnMiUJEU,723
|
102
|
-
vectordb_bench/frontend/components/run_test/submitTask.py,sha256=
|
104
|
+
vectordb_bench/frontend/components/run_test/submitTask.py,sha256=VZjkopkCBNhqLwGqsoM0hbPEeF6Q5UOQcdFUaegerxc,4094
|
103
105
|
vectordb_bench/frontend/components/tables/data.py,sha256=5DdnC64BB7Aj2z9acht2atsPB4NabzQCZKALfIUnqtQ,1233
|
104
|
-
vectordb_bench/frontend/config/dbCaseConfigs.py,sha256=
|
106
|
+
vectordb_bench/frontend/config/dbCaseConfigs.py,sha256=vHYdD3Tzi4NRD5ckJfkciIqZcMZY_Qslm_sUolKLxro,38597
|
105
107
|
vectordb_bench/frontend/config/dbPrices.py,sha256=10aBKjVcEg8y7TPSda28opmBM1KmXNrvbU9WM_BsZcE,176
|
106
108
|
vectordb_bench/frontend/config/styles.py,sha256=E2PmwmiewxBKJJ59hQ4ZXatqg8QTN-Z53JlsvWMHM2M,2291
|
107
109
|
vectordb_bench/frontend/pages/concurrent.py,sha256=bvoSafRSIsRzBQkI3uBwwrdg8jnhRUQG-epZbrJhGiE,2082
|
@@ -127,9 +129,9 @@ vectordb_bench/results/WeaviateCloud/result_20230808_standard_weaviatecloud.json
|
|
127
129
|
vectordb_bench/results/ZillizCloud/result_20230727_standard_zillizcloud.json,sha256=-Mdm4By65XDRCrmVOCF8yQXjcZtH4Xo4shcjoDoBUKU,18293
|
128
130
|
vectordb_bench/results/ZillizCloud/result_20230808_standard_zillizcloud.json,sha256=77XlHT5zM_K7mG5HfDQKwXZnSCuR37VUbt6-P3J_amI,15737
|
129
131
|
vectordb_bench/results/ZillizCloud/result_20240105_standard_202401_zillizcloud.json,sha256=TualfJ0664Hs-vdIW68bdkqAEYyzotXmu2P0yIN-GHk,42526
|
130
|
-
vectordb_bench-0.0.
|
131
|
-
vectordb_bench-0.0.
|
132
|
-
vectordb_bench-0.0.
|
133
|
-
vectordb_bench-0.0.
|
134
|
-
vectordb_bench-0.0.
|
135
|
-
vectordb_bench-0.0.
|
132
|
+
vectordb_bench-0.0.22.dist-info/LICENSE,sha256=HXbxhrb5u5SegVzeLNF_voVgRsJMavcLaOmD1N0lZkM,1067
|
133
|
+
vectordb_bench-0.0.22.dist-info/METADATA,sha256=fSJ2cTKb1oLQcXYVQc8cTHoMkEqOelusy0a_VEPuvPo,37166
|
134
|
+
vectordb_bench-0.0.22.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
135
|
+
vectordb_bench-0.0.22.dist-info/entry_points.txt,sha256=Qzw6gVx96ui8esG21H6yHsI6nboEohRmV424TYhQNrA,113
|
136
|
+
vectordb_bench-0.0.22.dist-info/top_level.txt,sha256=jnhZFZAuKX1J60yt-XOeBZ__ctiZMvoC_s0RFq29lpM,15
|
137
|
+
vectordb_bench-0.0.22.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|