vectordb-bench 1.0.8__py3-none-any.whl → 1.0.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vectordb_bench/backend/clients/pgvector/config.py +18 -10
- vectordb_bench/backend/clients/pgvector/pgvector.py +105 -78
- vectordb_bench/backend/runner/serial_runner.py +1 -1
- vectordb_bench/cli/cli.py +8 -5
- vectordb_bench/interface.py +1 -1
- {vectordb_bench-1.0.8.dist-info → vectordb_bench-1.0.9.dist-info}/METADATA +10 -4
- {vectordb_bench-1.0.8.dist-info → vectordb_bench-1.0.9.dist-info}/RECORD +11 -11
- {vectordb_bench-1.0.8.dist-info → vectordb_bench-1.0.9.dist-info}/WHEEL +0 -0
- {vectordb_bench-1.0.8.dist-info → vectordb_bench-1.0.9.dist-info}/entry_points.txt +0 -0
- {vectordb_bench-1.0.8.dist-info → vectordb_bench-1.0.9.dist-info}/licenses/LICENSE +0 -0
- {vectordb_bench-1.0.8.dist-info → vectordb_bench-1.0.9.dist-info}/top_level.txt +0 -0
@@ -21,21 +21,25 @@ class PgVectorConfigDict(TypedDict):
|
|
21
21
|
|
22
22
|
|
23
23
|
class PgVectorConfig(DBConfig):
|
24
|
-
user_name: SecretStr =
|
24
|
+
user_name: SecretStr = "postgres"
|
25
25
|
password: SecretStr
|
26
26
|
host: str = "localhost"
|
27
27
|
port: int = 5432
|
28
|
-
db_name: str
|
28
|
+
db_name: str = "vectordb"
|
29
|
+
table_name: str = "vdbbench_table_test"
|
29
30
|
|
30
31
|
def to_dict(self) -> PgVectorConfigDict:
|
31
|
-
user_str = self.user_name.get_secret_value()
|
32
|
+
user_str = self.user_name.get_secret_value() if isinstance(self.user_name, SecretStr) else self.user_name
|
32
33
|
pwd_str = self.password.get_secret_value()
|
33
34
|
return {
|
34
|
-
"
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
35
|
+
"connect_config": {
|
36
|
+
"host": self.host,
|
37
|
+
"port": self.port,
|
38
|
+
"dbname": self.db_name,
|
39
|
+
"user": user_str,
|
40
|
+
"password": pwd_str,
|
41
|
+
},
|
42
|
+
"table_name": self.table_name,
|
39
43
|
}
|
40
44
|
|
41
45
|
|
@@ -59,6 +63,10 @@ class PgVectorIndexConfig(BaseModel, DBCaseConfig):
|
|
59
63
|
metric_type: MetricType | None = None
|
60
64
|
create_index_before_load: bool = False
|
61
65
|
create_index_after_load: bool = True
|
66
|
+
# Scan more of the index to get enough results for filter-cases.
|
67
|
+
# Options: "strict_order" (order by distance), "relaxed_order" (slightly out of order but better recall)
|
68
|
+
# See: https://github.com/pgvector/pgvector?tab=readme-ov-file#iterative-index-scans
|
69
|
+
iterative_scan: str = "relaxed_order"
|
62
70
|
|
63
71
|
def parse_metric(self) -> str:
|
64
72
|
d = {
|
@@ -205,7 +213,7 @@ class PgVectorIVFFlatConfig(PgVectorIndexConfig):
|
|
205
213
|
}
|
206
214
|
|
207
215
|
def session_param(self) -> PgVectorSessionCommands:
|
208
|
-
session_parameters = {"ivfflat.probes": self.probes}
|
216
|
+
session_parameters = {"ivfflat.probes": self.probes, "ivfflat.iterative_scan": self.iterative_scan}
|
209
217
|
return {"session_options": self._optionally_build_set_options(session_parameters)}
|
210
218
|
|
211
219
|
|
@@ -255,7 +263,7 @@ class PgVectorHNSWConfig(PgVectorIndexConfig):
|
|
255
263
|
}
|
256
264
|
|
257
265
|
def session_param(self) -> PgVectorSessionCommands:
|
258
|
-
session_parameters = {"hnsw.ef_search": self.ef_search}
|
266
|
+
session_parameters = {"hnsw.ef_search": self.ef_search, "hnsw.iterative_scan": self.iterative_scan}
|
259
267
|
return {"session_options": self._optionally_build_set_options(session_parameters)}
|
260
268
|
|
261
269
|
|
@@ -10,6 +10,8 @@ import psycopg
|
|
10
10
|
from pgvector.psycopg import register_vector
|
11
11
|
from psycopg import Connection, Cursor, sql
|
12
12
|
|
13
|
+
from vectordb_bench.backend.filter import Filter, FilterOp
|
14
|
+
|
13
15
|
from ..api import VectorDB
|
14
16
|
from .config import PgVectorConfigDict, PgVectorIndexConfig
|
15
17
|
|
@@ -19,39 +21,46 @@ log = logging.getLogger(__name__)
|
|
19
21
|
class PgVector(VectorDB):
|
20
22
|
"""Use psycopg instructions"""
|
21
23
|
|
24
|
+
supported_filter_types: list[FilterOp] = [
|
25
|
+
FilterOp.NonFilter,
|
26
|
+
FilterOp.NumGE,
|
27
|
+
FilterOp.StrEqual,
|
28
|
+
]
|
29
|
+
|
22
30
|
conn: psycopg.Connection[Any] | None = None
|
23
31
|
cursor: psycopg.Cursor[Any] | None = None
|
24
32
|
|
25
|
-
|
26
|
-
_unfiltered_search: sql.Composed
|
33
|
+
_search: sql.Composed
|
27
34
|
|
28
35
|
def __init__(
|
29
36
|
self,
|
30
37
|
dim: int,
|
31
38
|
db_config: PgVectorConfigDict,
|
32
39
|
db_case_config: PgVectorIndexConfig,
|
33
|
-
collection_name: str = "pg_vector_collection",
|
34
40
|
drop_old: bool = False,
|
41
|
+
with_scalar_labels: bool = False,
|
35
42
|
**kwargs,
|
36
43
|
):
|
37
44
|
self.name = "PgVector"
|
38
|
-
self.db_config = db_config
|
39
45
|
self.case_config = db_case_config
|
40
|
-
self.table_name =
|
46
|
+
self.table_name = db_config["table_name"]
|
47
|
+
self.connect_config = db_config["connect_config"]
|
41
48
|
self.dim = dim
|
49
|
+
self.with_scalar_labels = with_scalar_labels
|
42
50
|
|
43
51
|
self._index_name = "pgvector_index"
|
44
52
|
self._primary_field = "id"
|
45
53
|
self._vector_field = "embedding"
|
54
|
+
self._scalar_label_field = "label"
|
46
55
|
|
47
56
|
# construct basic units
|
48
|
-
self.conn, self.cursor = self._create_connection(**self.
|
57
|
+
self.conn, self.cursor = self._create_connection(**self.connect_config)
|
49
58
|
|
50
59
|
# create vector extension
|
51
60
|
self.cursor.execute("CREATE EXTENSION IF NOT EXISTS vector")
|
52
61
|
self.conn.commit()
|
53
62
|
|
54
|
-
log.info(f"{self.name} config values: {self.
|
63
|
+
log.info(f"{self.name} config values: {self.connect_config}\n{self.case_config}")
|
55
64
|
if not any(
|
56
65
|
(
|
57
66
|
self.case_config.create_index_before_load,
|
@@ -60,7 +69,7 @@ class PgVector(VectorDB):
|
|
60
69
|
):
|
61
70
|
msg = (
|
62
71
|
f"{self.name} config must create an index using create_index_before_load or create_index_after_load"
|
63
|
-
f"{self.name} config values: {self.
|
72
|
+
f"{self.name} config values: {self.connect_config}\n{self.case_config}"
|
64
73
|
)
|
65
74
|
log.error(msg)
|
66
75
|
raise RuntimeError(msg)
|
@@ -89,13 +98,13 @@ class PgVector(VectorDB):
|
|
89
98
|
|
90
99
|
return conn, cursor
|
91
100
|
|
92
|
-
def _generate_search_query(self
|
101
|
+
def _generate_search_query(self) -> sql.Composed:
|
93
102
|
index_param = self.case_config.index_param()
|
94
103
|
reranking = self.case_config.search_param()["reranking"]
|
95
104
|
column_name = (
|
96
|
-
sql.SQL("binary_quantize({0})").format(sql.Identifier(
|
105
|
+
sql.SQL("binary_quantize({0})").format(sql.Identifier(self._vector_field))
|
97
106
|
if index_param["quantization_type"] == "bit" and index_param["table_quantization_type"] != "bit"
|
98
|
-
else sql.SQL(
|
107
|
+
else sql.SQL(self._vector_field)
|
99
108
|
)
|
100
109
|
search_vector = (
|
101
110
|
sql.SQL("binary_quantize({0})").format(sql.Placeholder())
|
@@ -114,12 +123,14 @@ class PgVector(VectorDB):
|
|
114
123
|
"""
|
115
124
|
SELECT i.id
|
116
125
|
FROM (
|
117
|
-
SELECT
|
126
|
+
SELECT {primary_field}, {vector_field} {reranking_metric_fun_op} %s::{table_quantization_type} AS distance
|
118
127
|
FROM public.{table_name} {where_clause}
|
119
128
|
ORDER BY {column_name}::{quantization_type}({dim})
|
120
|
-
""",
|
129
|
+
""", # noqa: E501
|
121
130
|
).format(
|
122
131
|
table_name=sql.Identifier(self.table_name),
|
132
|
+
primary_field=sql.Identifier(self._primary_field),
|
133
|
+
vector_field=sql.Identifier(self._vector_field),
|
123
134
|
column_name=column_name,
|
124
135
|
reranking_metric_fun_op=sql.SQL(
|
125
136
|
self.case_config.search_param()["reranking_metric_fun_op"],
|
@@ -128,7 +139,7 @@ class PgVector(VectorDB):
|
|
128
139
|
table_quantization_type=sql.SQL(index_param["table_quantization_type"]),
|
129
140
|
quantization_type=sql.SQL(index_param["quantization_type"]),
|
130
141
|
dim=sql.Literal(self.dim),
|
131
|
-
where_clause=sql.SQL(
|
142
|
+
where_clause=sql.SQL(self.where_clause),
|
132
143
|
),
|
133
144
|
sql.SQL(self.case_config.search_param()["metric_fun_op"]),
|
134
145
|
sql.SQL(
|
@@ -154,15 +165,16 @@ class PgVector(VectorDB):
|
|
154
165
|
[
|
155
166
|
sql.SQL(
|
156
167
|
"""
|
157
|
-
SELECT
|
168
|
+
SELECT {primary_field} FROM public.{table_name}
|
158
169
|
{where_clause} ORDER BY {column_name}::{quantization_type}({dim})
|
159
170
|
""",
|
160
171
|
).format(
|
161
172
|
table_name=sql.Identifier(self.table_name),
|
173
|
+
primary_field=sql.Identifier(self._primary_field),
|
162
174
|
column_name=column_name,
|
163
175
|
quantization_type=sql.SQL(index_param["quantization_type"]),
|
164
176
|
dim=sql.Literal(self.dim),
|
165
|
-
where_clause=sql.SQL(
|
177
|
+
where_clause=sql.SQL(self.where_clause),
|
166
178
|
),
|
167
179
|
sql.SQL(self.case_config.search_param()["metric_fun_op"]),
|
168
180
|
sql.SQL(" {search_vector}::{quantization_type}({dim}) LIMIT %s::int").format(
|
@@ -176,10 +188,12 @@ class PgVector(VectorDB):
|
|
176
188
|
search_query = sql.Composed(
|
177
189
|
[
|
178
190
|
sql.SQL(
|
179
|
-
"SELECT
|
191
|
+
"SELECT {primary_field} FROM public.{table_name} {where_clause} ORDER BY {vector_field}",
|
180
192
|
).format(
|
181
193
|
table_name=sql.Identifier(self.table_name),
|
182
|
-
|
194
|
+
primary_field=sql.Identifier(self._primary_field),
|
195
|
+
vector_field=sql.Identifier(self._vector_field),
|
196
|
+
where_clause=sql.SQL(self.where_clause),
|
183
197
|
),
|
184
198
|
sql.SQL(self.case_config.search_param()["metric_fun_op"]),
|
185
199
|
sql.SQL(" {search_vector}::{quantization_type}({dim}) LIMIT %s::int").format(
|
@@ -201,7 +215,7 @@ class PgVector(VectorDB):
|
|
201
215
|
>>> self.search_embedding()
|
202
216
|
"""
|
203
217
|
|
204
|
-
self.conn, self.cursor = self._create_connection(**self.
|
218
|
+
self.conn, self.cursor = self._create_connection(**self.connect_config)
|
205
219
|
|
206
220
|
# index configuration may have commands defined that we should set during each client session
|
207
221
|
session_options: Sequence[dict[str, Any]] = self.case_config.session_param()["session_options"]
|
@@ -216,9 +230,6 @@ class PgVector(VectorDB):
|
|
216
230
|
self.cursor.execute(command)
|
217
231
|
self.conn.commit()
|
218
232
|
|
219
|
-
self._filtered_search = self._generate_search_query(filtered=True)
|
220
|
-
self._unfiltered_search = self._generate_search_query()
|
221
|
-
|
222
233
|
try:
|
223
234
|
yield
|
224
235
|
finally:
|
@@ -274,7 +285,7 @@ class PgVector(VectorDB):
|
|
274
285
|
)
|
275
286
|
self.cursor.execute(
|
276
287
|
sql.SQL("ALTER USER {} SET maintenance_work_mem TO {};").format(
|
277
|
-
sql.Identifier(self.
|
288
|
+
sql.Identifier(self.connect_config["user"]),
|
278
289
|
index_param["maintenance_work_mem"],
|
279
290
|
),
|
280
291
|
)
|
@@ -288,7 +299,7 @@ class PgVector(VectorDB):
|
|
288
299
|
)
|
289
300
|
self.cursor.execute(
|
290
301
|
sql.SQL("ALTER USER {} SET max_parallel_maintenance_workers TO '{}';").format(
|
291
|
-
sql.Identifier(self.
|
302
|
+
sql.Identifier(self.connect_config["user"]),
|
292
303
|
index_param["max_parallel_workers"],
|
293
304
|
),
|
294
305
|
)
|
@@ -299,7 +310,7 @@ class PgVector(VectorDB):
|
|
299
310
|
)
|
300
311
|
self.cursor.execute(
|
301
312
|
sql.SQL("ALTER USER {} SET max_parallel_workers TO '{}';").format(
|
302
|
-
sql.Identifier(self.
|
313
|
+
sql.Identifier(self.connect_config["user"]),
|
303
314
|
index_param["max_parallel_workers"],
|
304
315
|
),
|
305
316
|
)
|
@@ -382,18 +393,36 @@ class PgVector(VectorDB):
|
|
382
393
|
log.info(f"{self.name} client create table : {self.table_name}")
|
383
394
|
|
384
395
|
# create table
|
385
|
-
self.
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
|
396
|
+
if self.with_scalar_labels:
|
397
|
+
self.cursor.execute(
|
398
|
+
sql.SQL(
|
399
|
+
"""
|
400
|
+
CREATE TABLE IF NOT EXISTS public.{table_name}
|
401
|
+
({primary_field} BIGINT PRIMARY KEY, embedding {table_quantization_type}({dim}), {label_field} VARCHAR(64));
|
402
|
+
""", # noqa: E501
|
403
|
+
).format(
|
404
|
+
table_name=sql.Identifier(self.table_name),
|
405
|
+
table_quantization_type=sql.SQL(index_param["table_quantization_type"]),
|
406
|
+
dim=dim,
|
407
|
+
primary_field=sql.Identifier(self._primary_field),
|
408
|
+
label_field=sql.Identifier(self._scalar_label_field),
|
409
|
+
)
|
395
410
|
)
|
396
|
-
|
411
|
+
else:
|
412
|
+
self.cursor.execute(
|
413
|
+
sql.SQL(
|
414
|
+
"""
|
415
|
+
CREATE TABLE IF NOT EXISTS public.{table_name}
|
416
|
+
({primary_field} BIGINT PRIMARY KEY, embedding {table_quantization_type}({dim}));
|
417
|
+
"""
|
418
|
+
).format(
|
419
|
+
table_name=sql.Identifier(self.table_name),
|
420
|
+
table_quantization_type=sql.SQL(index_param["table_quantization_type"]),
|
421
|
+
dim=dim,
|
422
|
+
primary_field=sql.Identifier(self._primary_field),
|
423
|
+
)
|
424
|
+
)
|
425
|
+
|
397
426
|
self.cursor.execute(
|
398
427
|
sql.SQL(
|
399
428
|
"ALTER TABLE public.{table_name} ALTER COLUMN embedding SET STORAGE PLAIN;",
|
@@ -404,14 +433,17 @@ class PgVector(VectorDB):
|
|
404
433
|
log.warning(f"Failed to create pgvector table: {self.table_name} error: {e}")
|
405
434
|
raise e from None
|
406
435
|
|
407
|
-
def insert_embeddings(
|
436
|
+
def insert_embeddings( # noqa: PLR0912
|
408
437
|
self,
|
409
438
|
embeddings: list[list[float]],
|
410
439
|
metadata: list[int],
|
440
|
+
labels_data: list[str] | None = None,
|
411
441
|
**kwargs: Any,
|
412
442
|
) -> tuple[int, Exception | None]:
|
413
443
|
assert self.conn is not None, "Connection is not initialized"
|
414
444
|
assert self.cursor is not None, "Cursor is not initialized"
|
445
|
+
if self.with_scalar_labels:
|
446
|
+
assert labels_data is not None, "labels_data should be provided if with_scalar_labels is set to True"
|
415
447
|
|
416
448
|
index_param = self.case_config.index_param()
|
417
449
|
|
@@ -433,7 +465,10 @@ class PgVector(VectorDB):
|
|
433
465
|
embeddings_bit += "1"
|
434
466
|
else:
|
435
467
|
embeddings_bit += "0"
|
436
|
-
|
468
|
+
if self.with_scalar_labels:
|
469
|
+
copy.write_row((str(row), embeddings_bit, labels_data[i]))
|
470
|
+
else:
|
471
|
+
copy.write_row((str(row), embeddings_bit))
|
437
472
|
else:
|
438
473
|
with self.cursor.copy(
|
439
474
|
sql.SQL("COPY public.{table_name} FROM STDIN (FORMAT BINARY)").format(
|
@@ -441,29 +476,47 @@ class PgVector(VectorDB):
|
|
441
476
|
)
|
442
477
|
) as copy:
|
443
478
|
if index_param["table_quantization_type"] == "halfvec":
|
444
|
-
copy.set_types(["bigint", "halfvec"])
|
445
479
|
for i, row in enumerate(metadata_arr):
|
446
|
-
|
480
|
+
if self.with_scalar_labels:
|
481
|
+
copy.set_types(["bigint", "halfvec", "varchar"])
|
482
|
+
copy.write_row((row, np.float16(embeddings_arr[i]), labels_data[i]))
|
483
|
+
else:
|
484
|
+
copy.set_types(["bigint", "halfvec"])
|
485
|
+
copy.write_row((row, np.float16(embeddings_arr[i])))
|
447
486
|
else:
|
448
|
-
copy.set_types(["bigint", "vector"])
|
449
487
|
for i, row in enumerate(metadata_arr):
|
450
|
-
|
488
|
+
if self.with_scalar_labels:
|
489
|
+
copy.set_types(["bigint", "vector", "varchar"])
|
490
|
+
copy.write_row((row, embeddings_arr[i], labels_data[i]))
|
491
|
+
else:
|
492
|
+
copy.set_types(["bigint", "vector"])
|
493
|
+
copy.write_row((row, embeddings_arr[i]))
|
451
494
|
self.conn.commit()
|
452
495
|
|
453
|
-
if kwargs.get("last_batch"):
|
454
|
-
self._post_insert()
|
455
|
-
|
456
496
|
return len(metadata), None
|
457
497
|
except Exception as e:
|
458
498
|
log.warning(f"Failed to insert data into pgvector table ({self.table_name}), error: {e}")
|
459
499
|
return 0, e
|
460
500
|
|
501
|
+
def prepare_filter(self, filters: Filter):
|
502
|
+
if filters.type == FilterOp.NonFilter:
|
503
|
+
self.where_clause = ""
|
504
|
+
elif filters.type == FilterOp.NumGE:
|
505
|
+
self.where_clause = f"WHERE {self._primary_field} >= {filters.int_value}"
|
506
|
+
elif filters.type == FilterOp.StrEqual:
|
507
|
+
self.where_clause = f"WHERE {self._scalar_label_field} = '{filters.label_value}'"
|
508
|
+
else:
|
509
|
+
msg = f"Not support Filter for PgVector - {filters}"
|
510
|
+
raise ValueError(msg)
|
511
|
+
|
512
|
+
self._search = self._generate_search_query()
|
513
|
+
|
461
514
|
def search_embedding(
|
462
515
|
self,
|
463
516
|
query: list[float],
|
464
517
|
k: int = 100,
|
465
|
-
filters: dict | None = None,
|
466
518
|
timeout: int | None = None,
|
519
|
+
**kwargs: Any,
|
467
520
|
) -> list[int]:
|
468
521
|
assert self.conn is not None, "Connection is not initialized"
|
469
522
|
assert self.cursor is not None, "Cursor is not initialized"
|
@@ -471,36 +524,10 @@ class PgVector(VectorDB):
|
|
471
524
|
index_param = self.case_config.index_param()
|
472
525
|
search_param = self.case_config.search_param()
|
473
526
|
q = np.asarray(query)
|
474
|
-
|
475
|
-
|
476
|
-
if index_param["quantization_type"] == "bit" and search_param["reranking"]
|
477
|
-
|
478
|
-
|
479
|
-
|
480
|
-
prepare=True,
|
481
|
-
binary=True,
|
482
|
-
)
|
483
|
-
else:
|
484
|
-
result = self.cursor.execute(
|
485
|
-
self._filtered_search,
|
486
|
-
(gt, q, k),
|
487
|
-
prepare=True,
|
488
|
-
binary=True,
|
489
|
-
)
|
490
|
-
|
491
|
-
elif index_param["quantization_type"] == "bit" and search_param["reranking"]:
|
492
|
-
result = self.cursor.execute(
|
493
|
-
self._unfiltered_search,
|
494
|
-
(q, q, k),
|
495
|
-
prepare=True,
|
496
|
-
binary=True,
|
497
|
-
)
|
498
|
-
else:
|
499
|
-
result = self.cursor.execute(
|
500
|
-
self._unfiltered_search,
|
501
|
-
(q, k),
|
502
|
-
prepare=True,
|
503
|
-
binary=True,
|
504
|
-
)
|
505
|
-
|
527
|
+
result = self.cursor.execute(
|
528
|
+
self._search,
|
529
|
+
(q, q, k) if index_param["quantization_type"] == "bit" and search_param["reranking"] else (q, k),
|
530
|
+
prepare=True,
|
531
|
+
binary=True,
|
532
|
+
)
|
506
533
|
return [int(i[0]) for i in result.fetchall()]
|
vectordb_bench/cli/cli.py
CHANGED
@@ -183,9 +183,9 @@ def get_custom_case_config(parameters: dict) -> dict:
|
|
183
183
|
"with_gt": parameters["custom_dataset_with_gt"],
|
184
184
|
},
|
185
185
|
}
|
186
|
-
elif parameters["case_type"] == "NewIntFilterPerformanceCase"
|
186
|
+
elif parameters["case_type"] == "NewIntFilterPerformanceCase":
|
187
187
|
custom_case_config = {
|
188
|
-
"dataset_with_size_type"
|
188
|
+
"dataset_with_size_type": parameters["dataset_with_size_type"],
|
189
189
|
"filter_rate": parameters["filter_rate"],
|
190
190
|
}
|
191
191
|
return custom_case_config
|
@@ -425,10 +425,12 @@ class CommonTypedDict(TypedDict):
|
|
425
425
|
str,
|
426
426
|
click.option(
|
427
427
|
"--dataset-with-size-type",
|
428
|
-
help="Dataset with size type for NewIntFilterPerformanceCase, you can use Medium Cohere (768dim, 1M)|
|
428
|
+
help="Dataset with size type for NewIntFilterPerformanceCase, you can use Medium Cohere (768dim, 1M)|"
|
429
|
+
"Large Cohere (768dim, 10M)|Medium Bioasq (1024dim, 1M)|Large Bioasq (1024dim, 10M)|"
|
430
|
+
"Large OpenAI (1536dim, 5M)|Medium OpenAI (1536dim, 500K)",
|
429
431
|
default="Medium Cohere (768dim, 1M)",
|
430
432
|
show_default=True,
|
431
|
-
)
|
433
|
+
),
|
432
434
|
]
|
433
435
|
filter_rate: Annotated[
|
434
436
|
float,
|
@@ -437,9 +439,10 @@ class CommonTypedDict(TypedDict):
|
|
437
439
|
help="Filter rate for NewIntFilterPerformanceCase",
|
438
440
|
default=0.01,
|
439
441
|
show_default=True,
|
440
|
-
)
|
442
|
+
),
|
441
443
|
]
|
442
444
|
|
445
|
+
|
443
446
|
class HNSWBaseTypedDict(TypedDict):
|
444
447
|
m: Annotated[int | None, click.option("--m", type=int, help="hnsw m")]
|
445
448
|
ef_construction: Annotated[
|
vectordb_bench/interface.py
CHANGED
@@ -279,7 +279,7 @@ class BenchMarkRunner:
|
|
279
279
|
p.send_signal(sig)
|
280
280
|
except psutil.NoSuchProcess:
|
281
281
|
pass
|
282
|
-
|
282
|
+
_, alive = psutil.wait_procs(children, timeout=timeout, callback=on_terminate)
|
283
283
|
|
284
284
|
for p in alive:
|
285
285
|
log.warning(f"force killing child process: {p}")
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: vectordb-bench
|
3
|
-
Version: 1.0.
|
3
|
+
Version: 1.0.9
|
4
4
|
Summary: VectorDBBench is not just an offering of benchmark results for mainstream vector databases and cloud services, it's your go-to tool for the ultimate performance and cost-effectiveness comparison. Designed with ease-of-use in mind, VectorDBBench is devised to help users, even non-professionals, reproduce results or test new systems, making the hunt for the optimal choice amongst a plethora of cloud services and open-source vector databases a breeze.
|
5
5
|
Author-email: XuanYang-cn <xuan.yang@zilliz.com>
|
6
6
|
Project-URL: repository, https://github.com/zilliztech/VectorDBBench
|
@@ -375,12 +375,18 @@ Options:
|
|
375
375
|
|
376
376
|
### Run Hologres from command line
|
377
377
|
|
378
|
+
It is recommended to use the following code for installation.
|
379
|
+
```shell
|
380
|
+
pip install vectordb-bench[hologres] "psycopg[binary]" pgvector
|
381
|
+
```
|
382
|
+
|
378
383
|
Execute tests for the index types: HGraph.
|
379
384
|
|
380
385
|
```shell
|
381
|
-
vectordbbench hologreshgraph --host
|
382
|
-
--
|
383
|
-
--
|
386
|
+
NUM_PER_BATCH=10000 vectordbbench hologreshgraph --host Hologres_Endpoint --port 80 \
|
387
|
+
--user ACCESS_ID --password ACCESS_KEY --database DATABASE_NAME \
|
388
|
+
--m 64 --ef-construction 400 --case-type Performance768D10M \
|
389
|
+
--index-type HGraph --ef-search 400 --k 10 --num-concurrency 1,60,70,75,80,90,95,100,110,120
|
384
390
|
```
|
385
391
|
|
386
392
|
To list the options for Hologres, execute `vectordbbench hologreshgraph --help`, The following are some Hologres-specific command-line options.
|
@@ -1,7 +1,7 @@
|
|
1
1
|
vectordb_bench/__init__.py,sha256=JkFWNtQCp6wKVFtBvf5oMwMTli6erkpzcbcYbnYPML4,2581
|
2
2
|
vectordb_bench/__main__.py,sha256=2zZQJ9tg7gVCWWq9HaoJ8_hTR-3AXZgFHfJK4l88DFA,853
|
3
3
|
vectordb_bench/base.py,sha256=AgavIF0P9ku_RmCRk1KKziba-wI4ZpA2aJvjJzNhRSs,129
|
4
|
-
vectordb_bench/interface.py,sha256=
|
4
|
+
vectordb_bench/interface.py,sha256=DSSyEu4GWubCZcf6rO7ybqorutAT8zVhGidIBEvZMIA,10183
|
5
5
|
vectordb_bench/log_util.py,sha256=wDNaU_JBBOfKi_Z4vq7LDa0kOlLjoNNzDX3VZQn_Dxo,3239
|
6
6
|
vectordb_bench/metric.py,sha256=c73EOPHGXwTInl85MNLPJb9oWrFRYAtp-e5qfFoxw34,3020
|
7
7
|
vectordb_bench/models.py,sha256=aEZsn_Hs8oL04g7rzZcZ7NRu-HpXOr6qtOyEc_XM3ro,15198
|
@@ -65,8 +65,8 @@ vectordb_bench/backend/clients/pgvecto_rs/cli.py,sha256=n0cMbUrGS2jzCpusVExxRDJb
|
|
65
65
|
vectordb_bench/backend/clients/pgvecto_rs/config.py,sha256=jWs3078s5chH37O94zSHoQ98ptLTYiJeHiLy6BQgTE4,4725
|
66
66
|
vectordb_bench/backend/clients/pgvecto_rs/pgvecto_rs.py,sha256=ZSOPpQjLtWxpQz7-R24X-e2oVLHJsZeEmaOzfd5pELA,9828
|
67
67
|
vectordb_bench/backend/clients/pgvector/cli.py,sha256=0MRBSZe6xwmE0dcb10rzOneo3YlOUmegjcpMn3wZ3yE,6770
|
68
|
-
vectordb_bench/backend/clients/pgvector/config.py,sha256=
|
69
|
-
vectordb_bench/backend/clients/pgvector/pgvector.py,sha256=
|
68
|
+
vectordb_bench/backend/clients/pgvector/config.py,sha256=h2tNd6gQlv70erSa0Ni7jqUcA8uz3LYeLiopQbDatVI,10574
|
69
|
+
vectordb_bench/backend/clients/pgvector/pgvector.py,sha256=q3s-fTy6teqAw06tQOy0peu_ymfzbZ1FuYnMRRxIsGU,23121
|
70
70
|
vectordb_bench/backend/clients/pgvectorscale/cli.py,sha256=3XL2NdBXh9ug8SyUwPD6fGXkjYflahew5GO2xIza43g,3403
|
71
71
|
vectordb_bench/backend/clients/pgvectorscale/config.py,sha256=ZMcRQPyCMzMJLXw56zODUGJmqOP-sOMA1entNsfE-Ck,3122
|
72
72
|
vectordb_bench/backend/clients/pgvectorscale/pgvectorscale.py,sha256=NONFdcE-b-mt6GsRTru6UbMMu8iqX8PfRF43fY_AODw,10136
|
@@ -103,11 +103,11 @@ vectordb_bench/backend/runner/__init__.py,sha256=lkk-naYS2ai3kQLwNaqSsnudL9SVl0O
|
|
103
103
|
vectordb_bench/backend/runner/mp_runner.py,sha256=qPjMkusf2Uicm8ROzhjsJ7dYI6SCbINDRBkM5_x5zi8,11191
|
104
104
|
vectordb_bench/backend/runner/rate_runner.py,sha256=onC1FOT0LosunRq1CW8VRX7XoWF2xgslZVRGHYryTrk,5763
|
105
105
|
vectordb_bench/backend/runner/read_write_runner.py,sha256=dqlaEkTb4mAZ45DRAlt-bIQMQ3aTZ6JDG778jb2eb8c,11410
|
106
|
-
vectordb_bench/backend/runner/serial_runner.py,sha256=
|
106
|
+
vectordb_bench/backend/runner/serial_runner.py,sha256=pivPYAv_GhCcbeJ5nCoe0MrGzl2XajLrUwssSXpoYRc,12790
|
107
107
|
vectordb_bench/backend/runner/util.py,sha256=tjTFUxth6hNnVrlU82TqkHhfeZo4ymj7WlyK4zFyPTg,522
|
108
108
|
vectordb_bench/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
109
109
|
vectordb_bench/cli/batch_cli.py,sha256=lnVrIP1rweoqfFkrdTLzxnLzy713xP2AnW6xmhd4bu0,3658
|
110
|
-
vectordb_bench/cli/cli.py,sha256=
|
110
|
+
vectordb_bench/cli/cli.py,sha256=JnEiT2yQmz3Z2IjEYfgRDbXbwl_yGLJ7X3nuUQk3MZc,19735
|
111
111
|
vectordb_bench/cli/vectordbbench.py,sha256=g61M3naKy87AeblCfh2P9U2jKWKmcNjburLNFsfK6QE,2067
|
112
112
|
vectordb_bench/config-files/batch_sample_config.yml,sha256=3n0SfLgVWeboAZZcO8j_UP4A9CExHGPE8tOmtVPPFiA,370
|
113
113
|
vectordb_bench/config-files/sample_config.yml,sha256=yw9ZgHczNi9PedNuTVxZKiOTI6AVoQS1h8INNgoDjPk,340
|
@@ -196,9 +196,9 @@ vectordb_bench/results/S3Vectors/result_20250722_standard_s3vectors.json,sha256=
|
|
196
196
|
vectordb_bench/results/WeaviateCloud/result_20230727_standard_weaviatecloud.json,sha256=WBlfjmbO3R4G6F4lDuneEigffUyTU7ti1SyWoff3oNI,15497
|
197
197
|
vectordb_bench/results/WeaviateCloud/result_20230808_standard_weaviatecloud.json,sha256=lXjudo-l-6H0EOIemoB5n4GddOOHJnwndrGwCJIH-EY,7865
|
198
198
|
vectordb_bench/results/ZillizCloud/result_20250613_standard_zillizcloud.json,sha256=gZCnDanS5Yb6Uzvb0Q6wDxMl81UAoGzsZRHU8JwqNds,215610
|
199
|
-
vectordb_bench-1.0.
|
200
|
-
vectordb_bench-1.0.
|
201
|
-
vectordb_bench-1.0.
|
202
|
-
vectordb_bench-1.0.
|
203
|
-
vectordb_bench-1.0.
|
204
|
-
vectordb_bench-1.0.
|
199
|
+
vectordb_bench-1.0.9.dist-info/licenses/LICENSE,sha256=HXbxhrb5u5SegVzeLNF_voVgRsJMavcLaOmD1N0lZkM,1067
|
200
|
+
vectordb_bench-1.0.9.dist-info/METADATA,sha256=Dq846vYzNlG3npkwafTjx4QqKLDvirnjwIBbf7H_xbo,42251
|
201
|
+
vectordb_bench-1.0.9.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
202
|
+
vectordb_bench-1.0.9.dist-info/entry_points.txt,sha256=Qzw6gVx96ui8esG21H6yHsI6nboEohRmV424TYhQNrA,113
|
203
|
+
vectordb_bench-1.0.9.dist-info/top_level.txt,sha256=jnhZFZAuKX1J60yt-XOeBZ__ctiZMvoC_s0RFq29lpM,15
|
204
|
+
vectordb_bench-1.0.9.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|