vectordb-bench 0.0.29__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. vectordb_bench/__init__.py +14 -27
  2. vectordb_bench/backend/assembler.py +19 -6
  3. vectordb_bench/backend/cases.py +186 -23
  4. vectordb_bench/backend/clients/__init__.py +32 -0
  5. vectordb_bench/backend/clients/api.py +22 -1
  6. vectordb_bench/backend/clients/aws_opensearch/aws_opensearch.py +249 -43
  7. vectordb_bench/backend/clients/aws_opensearch/cli.py +51 -21
  8. vectordb_bench/backend/clients/aws_opensearch/config.py +58 -16
  9. vectordb_bench/backend/clients/chroma/chroma.py +6 -2
  10. vectordb_bench/backend/clients/elastic_cloud/config.py +19 -1
  11. vectordb_bench/backend/clients/elastic_cloud/elastic_cloud.py +133 -45
  12. vectordb_bench/backend/clients/lancedb/cli.py +62 -8
  13. vectordb_bench/backend/clients/lancedb/config.py +14 -1
  14. vectordb_bench/backend/clients/lancedb/lancedb.py +21 -9
  15. vectordb_bench/backend/clients/memorydb/memorydb.py +2 -2
  16. vectordb_bench/backend/clients/milvus/cli.py +30 -9
  17. vectordb_bench/backend/clients/milvus/config.py +3 -0
  18. vectordb_bench/backend/clients/milvus/milvus.py +81 -23
  19. vectordb_bench/backend/clients/oceanbase/cli.py +100 -0
  20. vectordb_bench/backend/clients/oceanbase/config.py +125 -0
  21. vectordb_bench/backend/clients/oceanbase/oceanbase.py +215 -0
  22. vectordb_bench/backend/clients/pinecone/pinecone.py +39 -25
  23. vectordb_bench/backend/clients/qdrant_cloud/config.py +59 -3
  24. vectordb_bench/backend/clients/qdrant_cloud/qdrant_cloud.py +100 -33
  25. vectordb_bench/backend/clients/qdrant_local/cli.py +60 -0
  26. vectordb_bench/backend/clients/qdrant_local/config.py +47 -0
  27. vectordb_bench/backend/clients/qdrant_local/qdrant_local.py +232 -0
  28. vectordb_bench/backend/clients/weaviate_cloud/cli.py +29 -3
  29. vectordb_bench/backend/clients/weaviate_cloud/config.py +2 -0
  30. vectordb_bench/backend/clients/weaviate_cloud/weaviate_cloud.py +5 -0
  31. vectordb_bench/backend/dataset.py +143 -27
  32. vectordb_bench/backend/filter.py +76 -0
  33. vectordb_bench/backend/runner/__init__.py +3 -3
  34. vectordb_bench/backend/runner/mp_runner.py +52 -39
  35. vectordb_bench/backend/runner/rate_runner.py +68 -52
  36. vectordb_bench/backend/runner/read_write_runner.py +125 -68
  37. vectordb_bench/backend/runner/serial_runner.py +56 -23
  38. vectordb_bench/backend/task_runner.py +48 -20
  39. vectordb_bench/cli/batch_cli.py +121 -0
  40. vectordb_bench/cli/cli.py +59 -1
  41. vectordb_bench/cli/vectordbbench.py +7 -0
  42. vectordb_bench/config-files/batch_sample_config.yml +17 -0
  43. vectordb_bench/frontend/components/check_results/data.py +16 -11
  44. vectordb_bench/frontend/components/check_results/filters.py +53 -25
  45. vectordb_bench/frontend/components/check_results/headerIcon.py +16 -13
  46. vectordb_bench/frontend/components/check_results/nav.py +20 -0
  47. vectordb_bench/frontend/components/custom/displayCustomCase.py +43 -8
  48. vectordb_bench/frontend/components/custom/displaypPrams.py +10 -5
  49. vectordb_bench/frontend/components/custom/getCustomConfig.py +10 -0
  50. vectordb_bench/frontend/components/label_filter/charts.py +60 -0
  51. vectordb_bench/frontend/components/run_test/caseSelector.py +48 -52
  52. vectordb_bench/frontend/components/run_test/dbSelector.py +9 -5
  53. vectordb_bench/frontend/components/run_test/inputWidget.py +48 -0
  54. vectordb_bench/frontend/components/run_test/submitTask.py +3 -1
  55. vectordb_bench/frontend/components/streaming/charts.py +253 -0
  56. vectordb_bench/frontend/components/streaming/data.py +62 -0
  57. vectordb_bench/frontend/components/tables/data.py +1 -1
  58. vectordb_bench/frontend/components/welcome/explainPrams.py +66 -0
  59. vectordb_bench/frontend/components/welcome/pagestyle.py +106 -0
  60. vectordb_bench/frontend/components/welcome/welcomePrams.py +147 -0
  61. vectordb_bench/frontend/config/dbCaseConfigs.py +420 -41
  62. vectordb_bench/frontend/config/styles.py +32 -2
  63. vectordb_bench/frontend/pages/concurrent.py +5 -1
  64. vectordb_bench/frontend/pages/custom.py +4 -0
  65. vectordb_bench/frontend/pages/label_filter.py +56 -0
  66. vectordb_bench/frontend/pages/quries_per_dollar.py +5 -1
  67. vectordb_bench/frontend/pages/results.py +60 -0
  68. vectordb_bench/frontend/pages/run_test.py +3 -3
  69. vectordb_bench/frontend/pages/streaming.py +135 -0
  70. vectordb_bench/frontend/pages/tables.py +4 -0
  71. vectordb_bench/frontend/vdb_benchmark.py +16 -41
  72. vectordb_bench/interface.py +6 -2
  73. vectordb_bench/metric.py +15 -1
  74. vectordb_bench/models.py +38 -11
  75. vectordb_bench/results/ElasticCloud/result_20250318_standard_elasticcloud.json +5890 -0
  76. vectordb_bench/results/Milvus/result_20250509_standard_milvus.json +6138 -0
  77. vectordb_bench/results/OpenSearch/result_20250224_standard_opensearch.json +7319 -0
  78. vectordb_bench/results/Pinecone/result_20250124_standard_pinecone.json +2365 -0
  79. vectordb_bench/results/QdrantCloud/result_20250602_standard_qdrantcloud.json +3556 -0
  80. vectordb_bench/results/ZillizCloud/result_20250613_standard_zillizcloud.json +6290 -0
  81. vectordb_bench/results/dbPrices.json +12 -4
  82. {vectordb_bench-0.0.29.dist-info → vectordb_bench-1.0.0.dist-info}/METADATA +131 -32
  83. {vectordb_bench-0.0.29.dist-info → vectordb_bench-1.0.0.dist-info}/RECORD +87 -65
  84. {vectordb_bench-0.0.29.dist-info → vectordb_bench-1.0.0.dist-info}/WHEEL +1 -1
  85. vectordb_bench/results/ZillizCloud/result_20230727_standard_zillizcloud.json +0 -791
  86. vectordb_bench/results/ZillizCloud/result_20230808_standard_zillizcloud.json +0 -679
  87. vectordb_bench/results/ZillizCloud/result_20240105_standard_202401_zillizcloud.json +0 -1352
  88. {vectordb_bench-0.0.29.dist-info → vectordb_bench-1.0.0.dist-info}/entry_points.txt +0 -0
  89. {vectordb_bench-0.0.29.dist-info → vectordb_bench-1.0.0.dist-info}/licenses/LICENSE +0 -0
  90. {vectordb_bench-0.0.29.dist-info → vectordb_bench-1.0.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,60 @@
1
+ from typing import Annotated, Unpack
2
+
3
+ import click
4
+ from pydantic import SecretStr
5
+
6
+ from vectordb_bench.backend.clients import DB
7
+ from vectordb_bench.cli.cli import (
8
+ CommonTypedDict,
9
+ cli,
10
+ click_parameter_decorators_from_typed_dict,
11
+ run,
12
+ )
13
+
14
+ DBTYPE = DB.QdrantLocal
15
+
16
+
17
+ class QdrantLocalTypedDict(CommonTypedDict):
18
+ url: Annotated[
19
+ str,
20
+ click.option("--url", type=str, help="Qdrant url", required=True),
21
+ ]
22
+ on_disk: Annotated[
23
+ bool,
24
+ click.option("--on-disk", type=bool, default=False, help="Store the vectors and the HNSW index on disk"),
25
+ ]
26
+ m: Annotated[
27
+ int,
28
+ click.option("--m", type=int, default=16, help="HNSW index parameter m, set 0 to disable the index"),
29
+ ]
30
+ ef_construct: Annotated[
31
+ int,
32
+ click.option("--ef-construct", type=int, default=200, help="HNSW index parameter ef_construct"),
33
+ ]
34
+ hnsw_ef: Annotated[
35
+ int,
36
+ click.option(
37
+ "--hnsw-ef",
38
+ type=int,
39
+ default=0,
40
+ help="HNSW index parameter hnsw_ef, set 0 to use ef_construct for search",
41
+ ),
42
+ ]
43
+
44
+
45
+ @cli.command()
46
+ @click_parameter_decorators_from_typed_dict(QdrantLocalTypedDict)
47
+ def QdrantLocal(**parameters: Unpack[QdrantLocalTypedDict]):
48
+ from .config import QdrantLocalConfig, QdrantLocalIndexConfig
49
+
50
+ run(
51
+ db=DBTYPE,
52
+ db_config=QdrantLocalConfig(url=SecretStr(parameters["url"])),
53
+ db_case_config=QdrantLocalIndexConfig(
54
+ on_disk=parameters["on_disk"],
55
+ m=parameters["m"],
56
+ ef_construct=parameters["ef_construct"],
57
+ hnsw_ef=parameters["hnsw_ef"],
58
+ ),
59
+ **parameters,
60
+ )
@@ -0,0 +1,47 @@
1
+ from pydantic import BaseModel, SecretStr
2
+
3
+ from ..api import DBCaseConfig, DBConfig, MetricType
4
+
5
+
6
+ class QdrantLocalConfig(DBConfig):
7
+ url: SecretStr
8
+
9
+ def to_dict(self) -> dict:
10
+ return {
11
+ "url": self.url.get_secret_value(),
12
+ }
13
+
14
+
15
+ class QdrantLocalIndexConfig(BaseModel, DBCaseConfig):
16
+ metric_type: MetricType | None = None
17
+ m: int
18
+ ef_construct: int
19
+ hnsw_ef: int | None = 0
20
+ on_disk: bool | None = False
21
+
22
+ def parse_metric(self) -> str:
23
+ if self.metric_type == MetricType.L2:
24
+ return "Euclid"
25
+
26
+ if self.metric_type == MetricType.IP:
27
+ return "Dot"
28
+
29
+ return "Cosine"
30
+
31
+ def index_param(self) -> dict:
32
+ return {
33
+ "distance": self.parse_metric(),
34
+ "m": self.m,
35
+ "ef_construct": self.ef_construct,
36
+ "on_disk": self.on_disk,
37
+ }
38
+
39
+ def search_param(self) -> dict:
40
+ search_params = {
41
+ "exact": False, # Force to use ANNs
42
+ }
43
+
44
+ if self.hnsw_ef != 0:
45
+ search_params["hnsw_ef"] = self.hnsw_ef
46
+
47
+ return search_params
@@ -0,0 +1,232 @@
1
+ """Wrapper around the Qdrant over VectorDB"""
2
+
3
+ import logging
4
+ import time
5
+ from collections.abc import Iterable
6
+ from contextlib import contextmanager
7
+
8
+ from qdrant_client import QdrantClient
9
+ from qdrant_client.http.models import (
10
+ Batch,
11
+ CollectionStatus,
12
+ FieldCondition,
13
+ Filter,
14
+ HnswConfigDiff,
15
+ OptimizersConfigDiff,
16
+ PayloadSchemaType,
17
+ Range,
18
+ SearchParams,
19
+ VectorParams,
20
+ )
21
+
22
+ from ..api import VectorDB
23
+ from .config import QdrantLocalIndexConfig
24
+
25
+ log = logging.getLogger(__name__)
26
+
27
+ SECONDS_WAITING_FOR_INDEXING_API_CALL = 5
28
+ QDRANT_BATCH_SIZE = 100
29
+
30
+
31
+ def qdrant_collection_exists(client: QdrantClient, collection_name: str) -> bool:
32
+ collection_exists = True
33
+
34
+ try:
35
+ client.get_collection(collection_name)
36
+ except Exception:
37
+ collection_exists = False
38
+
39
+ return collection_exists
40
+
41
+
42
+ class QdrantLocal(VectorDB):
43
+ def __init__(
44
+ self,
45
+ dim: int,
46
+ db_config: dict,
47
+ db_case_config: QdrantLocalIndexConfig,
48
+ collection_name: str = "QdrantLocalCollection",
49
+ drop_old: bool = False,
50
+ name: str = "QdrantLocal",
51
+ **kwargs,
52
+ ):
53
+ """Initialize wrapper around the qdrant."""
54
+ self.name = name
55
+ self.db_config = db_config
56
+ self.case_config = db_case_config
57
+ self.search_parameter = self.case_config.search_param()
58
+ self.collection_name = collection_name
59
+ self.client = None
60
+
61
+ self._primary_field = "pk"
62
+ self._vector_field = "vector"
63
+
64
+ client = QdrantClient(**self.db_config)
65
+
66
+ # Lets just print the parameters here for double check
67
+ log.info(f"Case config: {self.case_config.index_param()}")
68
+ log.info(f"Search parameter: {self.search_parameter}")
69
+
70
+ if drop_old and qdrant_collection_exists(client, self.collection_name):
71
+ log.info(f"{self.name} client drop_old collection: {self.collection_name}")
72
+ client.delete_collection(self.collection_name)
73
+
74
+ if not qdrant_collection_exists(client, self.collection_name):
75
+ log.info(f"{self.name} create collection: {self.collection_name}")
76
+ self._create_collection(dim, client)
77
+
78
+ client = None
79
+
80
+ @contextmanager
81
+ def init(self):
82
+ """
83
+ Examples:
84
+ >>> with self.init():
85
+ >>> self.insert_embeddings()
86
+ >>> self.search_embedding()
87
+ """
88
+ # create connection
89
+ self.client = QdrantClient(**self.db_config)
90
+ yield
91
+ self.client = None
92
+ del self.client
93
+
94
+ def _create_collection(self, dim: int, qdrant_client: QdrantClient):
95
+ log.info(f"Create collection: {self.collection_name}")
96
+ log.info(
97
+ f"Index parameters: m={self.case_config.index_param()['m']}, "
98
+ f"ef_construct={self.case_config.index_param()['ef_construct']}, "
99
+ f"on_disk={self.case_config.index_param()['on_disk']}"
100
+ )
101
+
102
+ # If the on_disk is true, we enable both on disk index and vectors.
103
+ try:
104
+ qdrant_client.create_collection(
105
+ collection_name=self.collection_name,
106
+ vectors_config=VectorParams(
107
+ size=dim,
108
+ distance=self.case_config.index_param()["distance"],
109
+ on_disk=self.case_config.index_param()["on_disk"],
110
+ ),
111
+ hnsw_config=HnswConfigDiff(
112
+ m=self.case_config.index_param()["m"],
113
+ ef_construct=self.case_config.index_param()["ef_construct"],
114
+ on_disk=self.case_config.index_param()["on_disk"],
115
+ ),
116
+ )
117
+
118
+ qdrant_client.create_payload_index(
119
+ collection_name=self.collection_name,
120
+ field_name=self._primary_field,
121
+ field_schema=PayloadSchemaType.INTEGER,
122
+ )
123
+
124
+ except Exception as e:
125
+ if "already exists!" in str(e):
126
+ return
127
+ log.warning(f"Failed to create collection: {self.collection_name} error: {e}")
128
+ raise e from None
129
+
130
+ def optimize(self, data_size: int | None = None):
131
+ assert self.client, "Please call self.init() before"
132
+ # wait for vectors to be fully indexed
133
+ try:
134
+ while True:
135
+ info = self.client.get_collection(self.collection_name)
136
+ time.sleep(SECONDS_WAITING_FOR_INDEXING_API_CALL)
137
+ if info.status != CollectionStatus.GREEN:
138
+ continue
139
+ if info.status == CollectionStatus.GREEN:
140
+ log.info(f"Finishing building index for collection: {self.collection_name}")
141
+ msg = (
142
+ f"Stored vectors: {info.vectors_count}, Indexed vectors: {info.indexed_vectors_count}, "
143
+ f"Collection status: {info.indexed_vectors_count}"
144
+ )
145
+ log.info(msg)
146
+ return
147
+
148
+ except Exception as e:
149
+ log.warning(f"QdrantCloud ready to search error: {e}")
150
+ raise e from None
151
+
152
+ def insert_embeddings(
153
+ self,
154
+ embeddings: Iterable[list[float]],
155
+ metadata: list[int],
156
+ **kwargs,
157
+ ) -> tuple[int, Exception]:
158
+ """Insert embeddings into the database.
159
+
160
+ Args:
161
+ embeddings(list[list[float]]): list of embeddings
162
+ metadata(list[int]): list of metadata
163
+ kwargs: other arguments
164
+
165
+ Returns:
166
+ tuple[int, Exception]: number of embeddings inserted and exception if any
167
+ """
168
+ assert self.client is not None
169
+ assert len(embeddings) == len(metadata)
170
+ insert_count = 0
171
+
172
+ # disable indexing for quick insertion
173
+ self.client.update_collection(
174
+ collection_name=self.collection_name,
175
+ optimizer_config=OptimizersConfigDiff(indexing_threshold=0),
176
+ )
177
+ try:
178
+ for offset in range(0, len(embeddings), QDRANT_BATCH_SIZE):
179
+ vectors = embeddings[offset : offset + QDRANT_BATCH_SIZE]
180
+ ids = metadata[offset : offset + QDRANT_BATCH_SIZE]
181
+ payloads = [{self._primary_field: v} for v in ids]
182
+ _ = self.client.upsert(
183
+ collection_name=self.collection_name,
184
+ wait=True,
185
+ points=Batch(ids=ids, payloads=payloads, vectors=vectors),
186
+ )
187
+ insert_count += QDRANT_BATCH_SIZE
188
+ # enable indexing after insertion
189
+ self.client.update_collection(
190
+ collection_name=self.collection_name,
191
+ optimizer_config=OptimizersConfigDiff(indexing_threshold=100),
192
+ )
193
+
194
+ except Exception as e:
195
+ log.info(f"Failed to insert data, {e}")
196
+ return insert_count, e
197
+ else:
198
+ return insert_count, None
199
+
200
+ def search_embedding(
201
+ self,
202
+ query: list[float],
203
+ k: int = 100,
204
+ filters: dict | None = None,
205
+ timeout: int | None = None,
206
+ ) -> list[int]:
207
+ """Perform a search on a query embedding and return results with score.
208
+ Should call self.init() first.
209
+ """
210
+ assert self.client is not None
211
+
212
+ f = None
213
+ if filters:
214
+ f = Filter(
215
+ must=[
216
+ FieldCondition(
217
+ key=self._primary_field,
218
+ range=Range(
219
+ gt=filters.get("id"),
220
+ ),
221
+ ),
222
+ ],
223
+ )
224
+ res = self.client.query_points(
225
+ collection_name=self.collection_name,
226
+ query=query,
227
+ limit=k,
228
+ query_filter=f,
229
+ search_params=SearchParams(**self.search_parameter),
230
+ ).points
231
+
232
+ return [result.id for result in res]
@@ -15,12 +15,33 @@ from .. import DB
15
15
  class WeaviateTypedDict(CommonTypedDict):
16
16
  api_key: Annotated[
17
17
  str,
18
- click.option("--api-key", type=str, help="Weaviate api key", required=True),
18
+ click.option("--api-key", type=str, help="Weaviate api key", required=False, default=""),
19
19
  ]
20
20
  url: Annotated[
21
21
  str,
22
22
  click.option("--url", type=str, help="Weaviate url", required=True),
23
23
  ]
24
+ no_auth: Annotated[
25
+ bool,
26
+ click.option(
27
+ "--no-auth",
28
+ is_flag=True,
29
+ help="Do not use api-key, set it to true if you are using a local setup. Default is False.",
30
+ default=False,
31
+ ),
32
+ ]
33
+ m: Annotated[
34
+ int,
35
+ click.option("--m", type=int, default=16, help="HNSW index parameter m."),
36
+ ]
37
+ ef_construct: Annotated[
38
+ int,
39
+ click.option("--ef-construction", type=int, default=256, help="HNSW index parameter ef_construction"),
40
+ ]
41
+ ef: Annotated[
42
+ int,
43
+ click.option("--ef", type=int, default=256, help="HNSW index parameter ef for search"),
44
+ ]
24
45
 
25
46
 
26
47
  @cli.command()
@@ -32,9 +53,14 @@ def Weaviate(**parameters: Unpack[WeaviateTypedDict]):
32
53
  db=DB.WeaviateCloud,
33
54
  db_config=WeaviateConfig(
34
55
  db_label=parameters["db_label"],
35
- api_key=SecretStr(parameters["api_key"]),
56
+ api_key=SecretStr(parameters["api_key"]) if parameters["api_key"] != "" else SecretStr("-"),
36
57
  url=SecretStr(parameters["url"]),
58
+ no_auth=parameters["no_auth"],
59
+ ),
60
+ db_case_config=WeaviateIndexConfig(
61
+ efConstruction=parameters["ef_construction"],
62
+ maxConnections=parameters["m"],
63
+ ef=parameters["ef"],
37
64
  ),
38
- db_case_config=WeaviateIndexConfig(ef=256, efConstruction=256, maxConnections=16),
39
65
  **parameters,
40
66
  )
@@ -6,11 +6,13 @@ from ..api import DBCaseConfig, DBConfig, MetricType
6
6
  class WeaviateConfig(DBConfig):
7
7
  url: SecretStr
8
8
  api_key: SecretStr
9
+ no_auth: bool | None = False
9
10
 
10
11
  def to_dict(self) -> dict:
11
12
  return {
12
13
  "url": self.url.get_secret_value(),
13
14
  "auth_client_secret": self.api_key.get_secret_value(),
15
+ "no_auth": self.no_auth,
14
16
  }
15
17
 
16
18
 
@@ -38,6 +38,11 @@ class WeaviateCloud(VectorDB):
38
38
  self._vector_field = "vector"
39
39
  self._index_name = "vector_idx"
40
40
 
41
+ # If local setup is used, we
42
+ if db_config["no_auth"]:
43
+ del db_config["auth_client_secret"]
44
+ del db_config["no_auth"]
45
+
41
46
  from weaviate import Client
42
47
 
43
48
  client = Client(**db_config)