vectordb-bench 0.0.8__py3-none-any.whl → 0.0.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vectordb_bench/backend/clients/api.py +1 -0
- vectordb_bench/backend/clients/milvus/milvus.py +2 -3
- vectordb_bench/backend/clients/pgvector/config.py +157 -42
- vectordb_bench/backend/clients/pgvector/pgvector.py +243 -83
- vectordb_bench/backend/runner/serial_runner.py +0 -2
- vectordb_bench/backend/task_runner.py +1 -1
- vectordb_bench/frontend/components/run_test/caseSelector.py +6 -3
- vectordb_bench/frontend/const/dbCaseConfigs.py +118 -2
- vectordb_bench/models.py +6 -3
- {vectordb_bench-0.0.8.dist-info → vectordb_bench-0.0.9.dist-info}/METADATA +4 -3
- {vectordb_bench-0.0.8.dist-info → vectordb_bench-0.0.9.dist-info}/RECORD +15 -15
- {vectordb_bench-0.0.8.dist-info → vectordb_bench-0.0.9.dist-info}/LICENSE +0 -0
- {vectordb_bench-0.0.8.dist-info → vectordb_bench-0.0.9.dist-info}/WHEEL +0 -0
- {vectordb_bench-0.0.8.dist-info → vectordb_bench-0.0.9.dist-info}/entry_points.txt +0 -0
- {vectordb_bench-0.0.8.dist-info → vectordb_bench-0.0.9.dist-info}/top_level.txt +0 -0
@@ -89,6 +89,7 @@ class Milvus(VectorDB):
|
|
89
89
|
connections.disconnect("default")
|
90
90
|
|
91
91
|
def _optimize(self):
|
92
|
+
self._post_insert()
|
92
93
|
log.info(f"{self.name} optimizing before search")
|
93
94
|
try:
|
94
95
|
self.col.load()
|
@@ -116,7 +117,7 @@ class Milvus(VectorDB):
|
|
116
117
|
time.sleep(5)
|
117
118
|
|
118
119
|
wait_index()
|
119
|
-
|
120
|
+
|
120
121
|
# Skip compaction if use GPU indexType
|
121
122
|
if self.case_config.index in [IndexType.GPU_CAGRA, IndexType.GPU_IVF_FLAT, IndexType.GPU_IVF_PQ]:
|
122
123
|
log.debug("skip compaction for gpu index type.")
|
@@ -179,8 +180,6 @@ class Milvus(VectorDB):
|
|
179
180
|
]
|
180
181
|
res = self.col.insert(insert_data)
|
181
182
|
insert_count += len(res.primary_keys)
|
182
|
-
if kwargs.get("last_batch"):
|
183
|
-
self._post_insert()
|
184
183
|
except MilvusException as e:
|
185
184
|
log.info(f"Failed to insert data: {e}")
|
186
185
|
return (insert_count, e)
|
@@ -1,29 +1,62 @@
|
|
1
|
+
from abc import abstractmethod
|
2
|
+
from typing import Any, Mapping, Optional, Sequence, TypedDict
|
1
3
|
from pydantic import BaseModel, SecretStr
|
2
|
-
from
|
4
|
+
from typing_extensions import LiteralString
|
5
|
+
from ..api import DBCaseConfig, DBConfig, IndexType, MetricType
|
3
6
|
|
4
7
|
POSTGRE_URL_PLACEHOLDER = "postgresql://%s:%s@%s/%s"
|
5
8
|
|
9
|
+
|
10
|
+
class PgVectorConfigDict(TypedDict):
|
11
|
+
"""These keys will be directly used as kwargs in psycopg connection string,
|
12
|
+
so the names must match exactly psycopg API"""
|
13
|
+
|
14
|
+
user: str
|
15
|
+
password: str
|
16
|
+
host: str
|
17
|
+
port: int
|
18
|
+
dbname: str
|
19
|
+
|
20
|
+
|
6
21
|
class PgVectorConfig(DBConfig):
|
7
|
-
user_name: SecretStr = "postgres"
|
22
|
+
user_name: SecretStr = SecretStr("postgres")
|
8
23
|
password: SecretStr
|
9
24
|
host: str = "localhost"
|
10
25
|
port: int = 5432
|
11
26
|
db_name: str
|
12
27
|
|
13
|
-
def to_dict(self) ->
|
28
|
+
def to_dict(self) -> PgVectorConfigDict:
|
14
29
|
user_str = self.user_name.get_secret_value()
|
15
30
|
pwd_str = self.password.get_secret_value()
|
16
31
|
return {
|
17
|
-
"host"
|
18
|
-
"port"
|
19
|
-
"dbname"
|
20
|
-
"user"
|
21
|
-
"password"
|
32
|
+
"host": self.host,
|
33
|
+
"port": self.port,
|
34
|
+
"dbname": self.db_name,
|
35
|
+
"user": user_str,
|
36
|
+
"password": pwd_str,
|
22
37
|
}
|
23
38
|
|
39
|
+
|
40
|
+
class PgVectorIndexParam(TypedDict):
|
41
|
+
metric: str
|
42
|
+
index_type: str
|
43
|
+
index_creation_with_options: Sequence[dict[str, Any]]
|
44
|
+
maintenance_work_mem: Optional[str]
|
45
|
+
max_parallel_workers: Optional[int]
|
46
|
+
|
47
|
+
|
48
|
+
class PgVectorSearchParam(TypedDict):
|
49
|
+
metric_fun_op: LiteralString
|
50
|
+
|
51
|
+
|
52
|
+
class PgVectorSessionCommands(TypedDict):
|
53
|
+
session_options: Sequence[dict[str, Any]]
|
54
|
+
|
55
|
+
|
24
56
|
class PgVectorIndexConfig(BaseModel, DBCaseConfig):
|
25
57
|
metric_type: MetricType | None = None
|
26
|
-
|
58
|
+
create_index_before_load: bool = False
|
59
|
+
create_index_after_load: bool = True
|
27
60
|
|
28
61
|
def parse_metric(self) -> str:
|
29
62
|
if self.metric_type == MetricType.L2:
|
@@ -32,7 +65,7 @@ class PgVectorIndexConfig(BaseModel, DBCaseConfig):
|
|
32
65
|
return "vector_ip_ops"
|
33
66
|
return "vector_cosine_ops"
|
34
67
|
|
35
|
-
def parse_metric_fun_op(self) ->
|
68
|
+
def parse_metric_fun_op(self) -> LiteralString:
|
36
69
|
if self.metric_type == MetricType.L2:
|
37
70
|
return "<->"
|
38
71
|
elif self.metric_type == MetricType.IP:
|
@@ -46,55 +79,137 @@ class PgVectorIndexConfig(BaseModel, DBCaseConfig):
|
|
46
79
|
return "max_inner_product"
|
47
80
|
return "cosine_distance"
|
48
81
|
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
82
|
+
@abstractmethod
|
83
|
+
def index_param(self) -> PgVectorIndexParam:
|
84
|
+
...
|
85
|
+
|
86
|
+
@abstractmethod
|
87
|
+
def search_param(self) -> PgVectorSearchParam:
|
88
|
+
...
|
89
|
+
|
90
|
+
@abstractmethod
|
91
|
+
def session_param(self) -> PgVectorSessionCommands:
|
92
|
+
...
|
93
|
+
|
94
|
+
@staticmethod
|
95
|
+
def _optionally_build_with_options(with_options: Mapping[str, Any]) -> Sequence[dict[str, Any]]:
|
96
|
+
"""Walk through mappings, creating a List of {key1 = value} pairs. That will be used to build a where clause"""
|
97
|
+
options = []
|
98
|
+
for option_name, value in with_options.items():
|
99
|
+
if value is not None:
|
100
|
+
options.append(
|
101
|
+
{
|
102
|
+
"option_name": option_name,
|
103
|
+
"val": str(value),
|
104
|
+
}
|
105
|
+
)
|
106
|
+
return options
|
107
|
+
|
108
|
+
@staticmethod
|
109
|
+
def _optionally_build_set_options(
|
110
|
+
set_mapping: Mapping[str, Any]
|
111
|
+
) -> Sequence[dict[str, Any]]:
|
112
|
+
"""Walk through options, creating 'SET 'key1 = "value1";' commands"""
|
113
|
+
session_options = []
|
114
|
+
for setting_name, value in set_mapping.items():
|
115
|
+
if value:
|
116
|
+
session_options.append(
|
117
|
+
{"parameter": {
|
118
|
+
"setting_name": setting_name,
|
119
|
+
"val": str(value),
|
120
|
+
},
|
121
|
+
}
|
122
|
+
)
|
123
|
+
return session_options
|
124
|
+
|
125
|
+
|
126
|
+
class PgVectorIVFFlatConfig(PgVectorIndexConfig):
|
127
|
+
"""
|
128
|
+
An IVFFlat index divides vectors into lists, and then searches a subset of those lists that are
|
129
|
+
closest to the query vector. It has faster build times and uses less memory than HNSW,
|
130
|
+
but has lower query performance (in terms of speed-recall tradeoff).
|
131
|
+
|
132
|
+
Three keys to achieving good recall are:
|
133
|
+
|
134
|
+
Create the index after the table has some data
|
135
|
+
Choose an appropriate number of lists - a good place to start is rows / 1000 for up to 1M rows and sqrt(rows) for
|
136
|
+
over 1M rows.
|
137
|
+
When querying, specify an appropriate number of probes (higher is better for recall, lower is better for speed) -
|
138
|
+
a good place to start is sqrt(lists)
|
139
|
+
"""
|
140
|
+
|
141
|
+
lists: int | None
|
142
|
+
probes: int | None
|
143
|
+
index: IndexType = IndexType.ES_IVFFlat
|
144
|
+
maintenance_work_mem: Optional[str] = None
|
145
|
+
max_parallel_workers: Optional[int] = None
|
146
|
+
|
147
|
+
def index_param(self) -> PgVectorIndexParam:
|
148
|
+
index_parameters = {"lists": self.lists}
|
58
149
|
return {
|
59
|
-
"
|
150
|
+
"metric": self.parse_metric(),
|
60
151
|
"index_type": self.index.value,
|
61
|
-
"
|
152
|
+
"index_creation_with_options": self._optionally_build_with_options(
|
153
|
+
index_parameters
|
154
|
+
),
|
155
|
+
"maintenance_work_mem": self.maintenance_work_mem,
|
156
|
+
"max_parallel_workers": self.max_parallel_workers,
|
62
157
|
}
|
63
158
|
|
64
|
-
def
|
159
|
+
def search_param(self) -> PgVectorSearchParam:
|
65
160
|
return {
|
66
|
-
"
|
67
|
-
"efConstruction" : self.efConstruction,
|
68
|
-
"metric" : self.parse_metric()
|
161
|
+
"metric_fun_op": self.parse_metric_fun_op(),
|
69
162
|
}
|
70
163
|
|
71
|
-
def
|
164
|
+
def session_param(self) -> PgVectorSessionCommands:
|
165
|
+
session_parameters = {"ivfflat.probes": self.probes}
|
72
166
|
return {
|
73
|
-
"
|
74
|
-
"metric_fun" : self.parse_metric_fun_str(),
|
75
|
-
"metric_fun_op" : self.parse_metric_fun_op(),
|
167
|
+
"session_options": self._optionally_build_set_options(session_parameters)
|
76
168
|
}
|
77
169
|
|
78
170
|
|
79
|
-
class
|
80
|
-
|
81
|
-
|
82
|
-
|
171
|
+
class PgVectorHNSWConfig(PgVectorIndexConfig):
|
172
|
+
"""
|
173
|
+
An HNSW index creates a multilayer graph. It has better query performance than IVFFlat (in terms of
|
174
|
+
speed-recall tradeoff), but has slower build times and uses more memory. Also, an index can be
|
175
|
+
created without any data in the table since there isn't a training step like IVFFlat.
|
176
|
+
"""
|
177
|
+
|
178
|
+
m: int | None # DETAIL: Valid values are between "2" and "100".
|
179
|
+
ef_construction: (
|
180
|
+
int | None
|
181
|
+
) # ef_construction must be greater than or equal to 2 * m
|
182
|
+
ef_search: int | None
|
183
|
+
index: IndexType = IndexType.ES_HNSW
|
184
|
+
maintenance_work_mem: Optional[str] = None
|
185
|
+
max_parallel_workers: Optional[int] = None
|
186
|
+
|
187
|
+
def index_param(self) -> PgVectorIndexParam:
|
188
|
+
index_parameters = {"m": self.m, "ef_construction": self.ef_construction}
|
189
|
+
return {
|
190
|
+
"metric": self.parse_metric(),
|
191
|
+
"index_type": self.index.value,
|
192
|
+
"index_creation_with_options": self._optionally_build_with_options(
|
193
|
+
index_parameters
|
194
|
+
),
|
195
|
+
"maintenance_work_mem": self.maintenance_work_mem,
|
196
|
+
"max_parallel_workers": self.max_parallel_workers,
|
197
|
+
}
|
83
198
|
|
84
|
-
def
|
199
|
+
def search_param(self) -> PgVectorSearchParam:
|
85
200
|
return {
|
86
|
-
"
|
87
|
-
"metric" : self.parse_metric()
|
201
|
+
"metric_fun_op": self.parse_metric_fun_op(),
|
88
202
|
}
|
89
203
|
|
90
|
-
def
|
204
|
+
def session_param(self) -> PgVectorSessionCommands:
|
205
|
+
session_parameters = {"hnsw.ef_search": self.ef_search}
|
91
206
|
return {
|
92
|
-
"
|
93
|
-
"metric_fun" : self.parse_metric_fun_str(),
|
94
|
-
"metric_fun_op" : self.parse_metric_fun_op(),
|
207
|
+
"session_options": self._optionally_build_set_options(session_parameters)
|
95
208
|
}
|
96
209
|
|
210
|
+
|
97
211
|
_pgvector_case_config = {
|
98
|
-
|
99
|
-
|
212
|
+
IndexType.HNSW: PgVectorHNSWConfig,
|
213
|
+
IndexType.ES_HNSW: PgVectorHNSWConfig,
|
214
|
+
IndexType.IVFFlat: PgVectorIVFFlatConfig,
|
100
215
|
}
|
@@ -1,25 +1,36 @@
|
|
1
1
|
"""Wrapper around the Pgvector vector database over VectorDB"""
|
2
2
|
|
3
|
-
import io
|
4
3
|
import logging
|
4
|
+
import pprint
|
5
5
|
from contextlib import contextmanager
|
6
|
-
from typing import Any
|
7
|
-
import pandas as pd
|
8
|
-
import psycopg2
|
9
|
-
import psycopg2.extras
|
6
|
+
from typing import Any, Generator, Optional, Tuple, Sequence
|
10
7
|
|
11
|
-
|
8
|
+
import numpy as np
|
9
|
+
import psycopg
|
10
|
+
from pgvector.psycopg import register_vector
|
11
|
+
from psycopg import Connection, Cursor, sql
|
12
|
+
|
13
|
+
from ..api import VectorDB
|
14
|
+
from .config import PgVectorConfigDict, PgVectorIndexConfig
|
15
|
+
|
16
|
+
log = logging.getLogger(__name__)
|
12
17
|
|
13
|
-
log = logging.getLogger(__name__)
|
14
18
|
|
15
19
|
class PgVector(VectorDB):
|
16
|
-
"""
|
20
|
+
"""Use psycopg instructions"""
|
21
|
+
|
22
|
+
conn: psycopg.Connection[Any] | None = None
|
23
|
+
cursor: psycopg.Cursor[Any] | None = None
|
24
|
+
|
25
|
+
# TODO add filters support
|
26
|
+
_unfiltered_search: sql.Composed
|
27
|
+
|
17
28
|
def __init__(
|
18
29
|
self,
|
19
30
|
dim: int,
|
20
|
-
db_config:
|
21
|
-
db_case_config:
|
22
|
-
collection_name: str = "
|
31
|
+
db_config: PgVectorConfigDict,
|
32
|
+
db_case_config: PgVectorIndexConfig,
|
33
|
+
collection_name: str = "pg_vector_collection",
|
23
34
|
drop_old: bool = False,
|
24
35
|
**kwargs,
|
25
36
|
):
|
@@ -29,44 +40,89 @@ class PgVector(VectorDB):
|
|
29
40
|
self.table_name = collection_name
|
30
41
|
self.dim = dim
|
31
42
|
|
32
|
-
self._index_name = "
|
43
|
+
self._index_name = "pgvector_index"
|
33
44
|
self._primary_field = "id"
|
34
45
|
self._vector_field = "embedding"
|
35
46
|
|
36
47
|
# construct basic units
|
37
|
-
self.conn =
|
38
|
-
|
39
|
-
self.cursor = self.conn.cursor()
|
40
|
-
|
48
|
+
self.conn, self.cursor = self._create_connection(**self.db_config)
|
49
|
+
|
41
50
|
# create vector extension
|
42
|
-
self.cursor.execute(
|
51
|
+
self.cursor.execute("CREATE EXTENSION IF NOT EXISTS vector")
|
43
52
|
self.conn.commit()
|
44
|
-
|
45
|
-
|
46
|
-
|
53
|
+
|
54
|
+
log.info(f"{self.name} config values: {self.db_config}\n{self.case_config}")
|
55
|
+
if not any(
|
56
|
+
(
|
57
|
+
self.case_config.create_index_before_load,
|
58
|
+
self.case_config.create_index_after_load,
|
59
|
+
)
|
60
|
+
):
|
61
|
+
err = f"{self.name} config must create an index using create_index_before_load and/or create_index_after_load"
|
62
|
+
log.error(err)
|
63
|
+
raise RuntimeError(
|
64
|
+
f"{err}\n{pprint.pformat(self.db_config)}\n{pprint.pformat(self.case_config)}"
|
65
|
+
)
|
66
|
+
|
67
|
+
if drop_old:
|
47
68
|
# self.pg_table.drop(pg_engine, checkfirst=True)
|
48
69
|
self._drop_index()
|
49
70
|
self._drop_table()
|
50
71
|
self._create_table(dim)
|
51
|
-
self.
|
52
|
-
|
72
|
+
if self.case_config.create_index_before_load:
|
73
|
+
self._create_index()
|
74
|
+
|
53
75
|
self.cursor.close()
|
54
76
|
self.conn.close()
|
55
77
|
self.cursor = None
|
56
78
|
self.conn = None
|
57
79
|
|
80
|
+
@staticmethod
|
81
|
+
def _create_connection(**kwargs) -> Tuple[Connection, Cursor]:
|
82
|
+
conn = psycopg.connect(**kwargs)
|
83
|
+
register_vector(conn)
|
84
|
+
conn.autocommit = False
|
85
|
+
cursor = conn.cursor()
|
86
|
+
|
87
|
+
assert conn is not None, "Connection is not initialized"
|
88
|
+
assert cursor is not None, "Cursor is not initialized"
|
89
|
+
|
90
|
+
return conn, cursor
|
91
|
+
|
58
92
|
@contextmanager
|
59
|
-
def init(self) -> None:
|
93
|
+
def init(self) -> Generator[None, None, None]:
|
60
94
|
"""
|
61
95
|
Examples:
|
62
96
|
>>> with self.init():
|
63
97
|
>>> self.insert_embeddings()
|
64
98
|
>>> self.search_embedding()
|
65
99
|
"""
|
66
|
-
|
67
|
-
self.conn.
|
68
|
-
|
69
|
-
|
100
|
+
|
101
|
+
self.conn, self.cursor = self._create_connection(**self.db_config)
|
102
|
+
|
103
|
+
# index configuration may have commands defined that we should set during each client session
|
104
|
+
session_options: Sequence[dict[str, Any]] = self.case_config.session_param()["session_options"]
|
105
|
+
|
106
|
+
if len(session_options) > 0:
|
107
|
+
for setting in session_options:
|
108
|
+
command = sql.SQL("SET {setting_name} " + "= {val};").format(
|
109
|
+
setting_name=sql.Identifier(setting['parameter']['setting_name']),
|
110
|
+
val=sql.Identifier(str(setting['parameter']['val'])),
|
111
|
+
)
|
112
|
+
log.debug(command.as_string(self.cursor))
|
113
|
+
self.cursor.execute(command)
|
114
|
+
self.conn.commit()
|
115
|
+
|
116
|
+
self._unfiltered_search = sql.Composed(
|
117
|
+
[
|
118
|
+
sql.SQL("SELECT id FROM public.{} ORDER BY embedding ").format(
|
119
|
+
sql.Identifier(self.table_name)
|
120
|
+
),
|
121
|
+
sql.SQL(self.case_config.search_param()["metric_fun_op"]),
|
122
|
+
sql.SQL(" %s::vector LIMIT %s::int"),
|
123
|
+
]
|
124
|
+
)
|
125
|
+
|
70
126
|
try:
|
71
127
|
yield
|
72
128
|
finally:
|
@@ -74,61 +130,170 @@ class PgVector(VectorDB):
|
|
74
130
|
self.conn.close()
|
75
131
|
self.cursor = None
|
76
132
|
self.conn = None
|
77
|
-
|
133
|
+
|
78
134
|
def _drop_table(self):
|
79
135
|
assert self.conn is not None, "Connection is not initialized"
|
80
136
|
assert self.cursor is not None, "Cursor is not initialized"
|
81
|
-
|
82
|
-
|
137
|
+
log.info(f"{self.name} client drop table : {self.table_name}")
|
138
|
+
|
139
|
+
self.cursor.execute(
|
140
|
+
sql.SQL("DROP TABLE IF EXISTS public.{table_name}").format(
|
141
|
+
table_name=sql.Identifier(self.table_name)
|
142
|
+
)
|
143
|
+
)
|
83
144
|
self.conn.commit()
|
84
|
-
|
145
|
+
|
85
146
|
def ready_to_load(self):
|
86
147
|
pass
|
87
148
|
|
88
149
|
def optimize(self):
|
89
|
-
|
90
|
-
|
150
|
+
self._post_insert()
|
151
|
+
|
91
152
|
def _post_insert(self):
|
92
153
|
log.info(f"{self.name} post insert before optimize")
|
93
|
-
self.
|
94
|
-
|
154
|
+
if self.case_config.create_index_after_load:
|
155
|
+
self._drop_index()
|
156
|
+
self._create_index()
|
95
157
|
|
96
|
-
def ready_to_search(self):
|
97
|
-
pass
|
98
|
-
|
99
158
|
def _drop_index(self):
|
100
159
|
assert self.conn is not None, "Connection is not initialized"
|
101
160
|
assert self.cursor is not None, "Cursor is not initialized"
|
102
|
-
|
103
|
-
|
161
|
+
log.info(f"{self.name} client drop index : {self._index_name}")
|
162
|
+
|
163
|
+
drop_index_sql = sql.SQL("DROP INDEX IF EXISTS {index_name}").format(
|
164
|
+
index_name=sql.Identifier(self._index_name)
|
165
|
+
)
|
166
|
+
log.debug(drop_index_sql.as_string(self.cursor))
|
167
|
+
self.cursor.execute(drop_index_sql)
|
104
168
|
self.conn.commit()
|
105
|
-
|
169
|
+
|
170
|
+
def _set_parallel_index_build_param(self):
|
171
|
+
assert self.conn is not None, "Connection is not initialized"
|
172
|
+
assert self.cursor is not None, "Cursor is not initialized"
|
173
|
+
|
174
|
+
index_param = self.case_config.index_param()
|
175
|
+
|
176
|
+
if index_param["maintenance_work_mem"] is not None:
|
177
|
+
self.cursor.execute(
|
178
|
+
sql.SQL("SET maintenance_work_mem TO {};").format(
|
179
|
+
index_param["maintenance_work_mem"]
|
180
|
+
)
|
181
|
+
)
|
182
|
+
self.cursor.execute(
|
183
|
+
sql.SQL("ALTER USER {} SET maintenance_work_mem TO {};").format(
|
184
|
+
sql.Identifier(self.db_config["user"]),
|
185
|
+
index_param["maintenance_work_mem"],
|
186
|
+
)
|
187
|
+
)
|
188
|
+
self.conn.commit()
|
189
|
+
|
190
|
+
if index_param["max_parallel_workers"] is not None:
|
191
|
+
self.cursor.execute(
|
192
|
+
sql.SQL("SET max_parallel_maintenance_workers TO '{}';").format(
|
193
|
+
index_param["max_parallel_workers"]
|
194
|
+
)
|
195
|
+
)
|
196
|
+
self.cursor.execute(
|
197
|
+
sql.SQL(
|
198
|
+
"ALTER USER {} SET max_parallel_maintenance_workers TO '{}';"
|
199
|
+
).format(
|
200
|
+
sql.Identifier(self.db_config["user"]),
|
201
|
+
index_param["max_parallel_workers"],
|
202
|
+
)
|
203
|
+
)
|
204
|
+
self.cursor.execute(
|
205
|
+
sql.SQL("SET max_parallel_workers TO '{}';").format(
|
206
|
+
index_param["max_parallel_workers"]
|
207
|
+
)
|
208
|
+
)
|
209
|
+
self.cursor.execute(
|
210
|
+
sql.SQL(
|
211
|
+
"ALTER USER {} SET max_parallel_workers TO '{}';"
|
212
|
+
).format(
|
213
|
+
sql.Identifier(self.db_config["user"]),
|
214
|
+
index_param["max_parallel_workers"],
|
215
|
+
)
|
216
|
+
)
|
217
|
+
self.cursor.execute(
|
218
|
+
sql.SQL(
|
219
|
+
"ALTER TABLE {} SET (parallel_workers = {});"
|
220
|
+
).format(
|
221
|
+
sql.Identifier(self.table_name),
|
222
|
+
index_param["max_parallel_workers"],
|
223
|
+
)
|
224
|
+
)
|
225
|
+
self.conn.commit()
|
226
|
+
|
227
|
+
results = self.cursor.execute(
|
228
|
+
sql.SQL("SHOW max_parallel_maintenance_workers;")
|
229
|
+
).fetchall()
|
230
|
+
results.extend(
|
231
|
+
self.cursor.execute(sql.SQL("SHOW max_parallel_workers;")).fetchall()
|
232
|
+
)
|
233
|
+
results.extend(
|
234
|
+
self.cursor.execute(sql.SQL("SHOW maintenance_work_mem;")).fetchall()
|
235
|
+
)
|
236
|
+
log.info(f"{self.name} parallel index creation parameters: {results}")
|
237
|
+
|
106
238
|
def _create_index(self):
|
107
239
|
assert self.conn is not None, "Connection is not initialized"
|
108
240
|
assert self.cursor is not None, "Cursor is not initialized"
|
109
|
-
|
241
|
+
log.info(f"{self.name} client create index : {self._index_name}")
|
242
|
+
|
110
243
|
index_param = self.case_config.index_param()
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
244
|
+
self._set_parallel_index_build_param()
|
245
|
+
options = []
|
246
|
+
for option in index_param["index_creation_with_options"]:
|
247
|
+
if option['val'] is not None:
|
248
|
+
options.append(
|
249
|
+
sql.SQL("{option_name} = {val}").format(
|
250
|
+
option_name=sql.Identifier(option['option_name']),
|
251
|
+
val=sql.Identifier(str(option['val'])),
|
252
|
+
)
|
253
|
+
)
|
254
|
+
if any(options):
|
255
|
+
with_clause = sql.SQL("WITH ({});").format(sql.SQL(", ").join(options))
|
117
256
|
else:
|
118
|
-
|
257
|
+
with_clause = sql.Composed(())
|
258
|
+
|
259
|
+
index_create_sql = sql.SQL(
|
260
|
+
"CREATE INDEX IF NOT EXISTS {index_name} ON public.{table_name} USING {index_type} (embedding {embedding_metric})"
|
261
|
+
).format(
|
262
|
+
index_name=sql.Identifier(self._index_name),
|
263
|
+
table_name=sql.Identifier(self.table_name),
|
264
|
+
index_type=sql.Identifier(index_param["index_type"]),
|
265
|
+
embedding_metric=sql.Identifier(index_param["metric"]),
|
266
|
+
)
|
267
|
+
index_create_sql_with_with_clause = (
|
268
|
+
index_create_sql + with_clause
|
269
|
+
).join(" ")
|
270
|
+
log.debug(index_create_sql_with_with_clause.as_string(self.cursor))
|
271
|
+
self.cursor.execute(index_create_sql_with_with_clause)
|
119
272
|
self.conn.commit()
|
120
|
-
|
121
|
-
def _create_table(self, dim
|
273
|
+
|
274
|
+
def _create_table(self, dim: int):
|
122
275
|
assert self.conn is not None, "Connection is not initialized"
|
123
276
|
assert self.cursor is not None, "Cursor is not initialized"
|
124
|
-
|
277
|
+
|
125
278
|
try:
|
279
|
+
log.info(f"{self.name} client create table : {self.table_name}")
|
280
|
+
|
126
281
|
# create table
|
127
|
-
self.cursor.execute(
|
128
|
-
|
282
|
+
self.cursor.execute(
|
283
|
+
sql.SQL(
|
284
|
+
"CREATE TABLE IF NOT EXISTS public.{table_name} (id BIGINT PRIMARY KEY, embedding vector({dim}));"
|
285
|
+
).format(table_name=sql.Identifier(self.table_name), dim=dim)
|
286
|
+
)
|
287
|
+
self.cursor.execute(
|
288
|
+
sql.SQL(
|
289
|
+
"ALTER TABLE public.{table_name} ALTER COLUMN embedding SET STORAGE PLAIN;"
|
290
|
+
).format(table_name=sql.Identifier(self.table_name))
|
291
|
+
)
|
129
292
|
self.conn.commit()
|
130
293
|
except Exception as e:
|
131
|
-
log.warning(
|
294
|
+
log.warning(
|
295
|
+
f"Failed to create pgvector table: {self.table_name} error: {e}"
|
296
|
+
)
|
132
297
|
raise e from None
|
133
298
|
|
134
299
|
def insert_embeddings(
|
@@ -136,31 +301,35 @@ class PgVector(VectorDB):
|
|
136
301
|
embeddings: list[list[float]],
|
137
302
|
metadata: list[int],
|
138
303
|
**kwargs: Any,
|
139
|
-
) ->
|
304
|
+
) -> Tuple[int, Optional[Exception]]:
|
140
305
|
assert self.conn is not None, "Connection is not initialized"
|
141
306
|
assert self.cursor is not None, "Cursor is not initialized"
|
142
307
|
|
143
308
|
try:
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
309
|
+
metadata_arr = np.array(metadata)
|
310
|
+
embeddings_arr = np.array(embeddings)
|
311
|
+
|
312
|
+
with self.cursor.copy(
|
313
|
+
sql.SQL("COPY public.{table_name} FROM STDIN (FORMAT BINARY)").format(
|
314
|
+
table_name=sql.Identifier(self.table_name)
|
315
|
+
)
|
316
|
+
) as copy:
|
317
|
+
copy.set_types(["bigint", "vector"])
|
318
|
+
for i, row in enumerate(metadata_arr):
|
319
|
+
copy.write_row((row, embeddings_arr[i]))
|
153
320
|
self.conn.commit()
|
154
|
-
|
321
|
+
|
155
322
|
if kwargs.get("last_batch"):
|
156
323
|
self._post_insert()
|
157
|
-
|
324
|
+
|
158
325
|
return len(metadata), None
|
159
326
|
except Exception as e:
|
160
|
-
log.warning(
|
327
|
+
log.warning(
|
328
|
+
f"Failed to insert data into pgvector table ({self.table_name}), error: {e}"
|
329
|
+
)
|
161
330
|
return 0, e
|
162
331
|
|
163
|
-
def search_embedding(
|
332
|
+
def search_embedding(
|
164
333
|
self,
|
165
334
|
query: list[float],
|
166
335
|
k: int = 100,
|
@@ -170,18 +339,9 @@ class PgVector(VectorDB):
|
|
170
339
|
assert self.conn is not None, "Connection is not initialized"
|
171
340
|
assert self.cursor is not None, "Cursor is not initialized"
|
172
341
|
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
self.cursor.execute(f"SELECT id FROM public.\"{self.table_name}\" ORDER BY embedding {search_param['metric_fun_op']} '{query}' LIMIT {k};")
|
178
|
-
elif self.case_config.index == IndexType.IVFFlat:
|
179
|
-
self.cursor.execute(f'SET ivfflat.probes = {search_param["probes"]}')
|
180
|
-
self.cursor.execute(f"SELECT id FROM public.\"{self.table_name}\" ORDER BY embedding {search_param['metric_fun_op']} '{query}' LIMIT {k};")
|
181
|
-
else:
|
182
|
-
assert "Invalid index type {self.case_config.index}"
|
183
|
-
self.conn.commit()
|
184
|
-
result = self.cursor.fetchall()
|
342
|
+
# TODO add filters support
|
343
|
+
result = self.cursor.execute(
|
344
|
+
self._unfiltered_search, (query, k), prepare=True, binary=True
|
345
|
+
)
|
185
346
|
|
186
|
-
return [int(i[0]) for i in result]
|
187
|
-
|
347
|
+
return [int(i[0]) for i in result.fetchall()]
|
@@ -46,11 +46,9 @@ class SerialInsertRunner:
|
|
46
46
|
del(emb_np)
|
47
47
|
log.debug(f"batch dataset size: {len(all_embeddings)}, {len(all_metadata)}")
|
48
48
|
|
49
|
-
last_batch = self.dataset.data.size - count == len(all_metadata)
|
50
49
|
insert_count, error = self.db.insert_embeddings(
|
51
50
|
embeddings=all_embeddings,
|
52
51
|
metadata=all_metadata,
|
53
|
-
last_batch=last_batch,
|
54
52
|
)
|
55
53
|
if error is not None:
|
56
54
|
raise error
|
@@ -140,8 +140,8 @@ class CaseRunner(BaseModel):
|
|
140
140
|
)
|
141
141
|
|
142
142
|
self._init_search_runner()
|
143
|
-
m.recall, m.serial_latency_p99 = self._serial_search()
|
144
143
|
m.qps = self._conc_search()
|
144
|
+
m.recall, m.serial_latency_p99 = self._serial_search()
|
145
145
|
except Exception as e:
|
146
146
|
log.warning(f"Failed to run performance case, reason = {e}")
|
147
147
|
traceback.print_exc()
|
@@ -65,25 +65,28 @@ def caseConfigSetting(st, allCaseConfigs, case, activedDbList):
|
|
65
65
|
key = "%s-%s-%s" % (db, case, config.label.value)
|
66
66
|
if config.inputType == InputType.Text:
|
67
67
|
caseConfig[config.label] = column.text_input(
|
68
|
-
config.label.value,
|
68
|
+
config.displayLabel if config.displayLabel else config.label.value,
|
69
69
|
key=key,
|
70
|
+
help=config.inputHelp,
|
70
71
|
value=config.inputConfig["value"],
|
71
72
|
)
|
72
73
|
elif config.inputType == InputType.Option:
|
73
74
|
caseConfig[config.label] = column.selectbox(
|
74
|
-
config.label.value,
|
75
|
+
config.displayLabel if config.displayLabel else config.label.value,
|
75
76
|
config.inputConfig["options"],
|
76
77
|
key=key,
|
78
|
+
help=config.inputHelp,
|
77
79
|
)
|
78
80
|
elif config.inputType == InputType.Number:
|
79
81
|
caseConfig[config.label] = column.number_input(
|
80
|
-
config.label.value,
|
82
|
+
config.displayLabel if config.displayLabel else config.label.value,
|
81
83
|
# format="%d",
|
82
84
|
step=config.inputConfig.get("step", 1),
|
83
85
|
min_value=config.inputConfig["min"],
|
84
86
|
max_value=config.inputConfig["max"],
|
85
87
|
key=key,
|
86
88
|
value=config.inputConfig["value"],
|
89
|
+
help=config.inputHelp,
|
87
90
|
)
|
88
91
|
k += 1
|
89
92
|
if k == 0:
|
@@ -49,6 +49,8 @@ class CaseConfigInput(BaseModel):
|
|
49
49
|
label: CaseConfigParamType
|
50
50
|
inputType: InputType = InputType.Text
|
51
51
|
inputConfig: dict = {}
|
52
|
+
inputHelp: str = ""
|
53
|
+
displayLabel: str = ""
|
52
54
|
# todo type should be a function
|
53
55
|
isDisplayed: typing.Any = lambda x: True
|
54
56
|
|
@@ -71,6 +73,18 @@ CaseConfigParamInput_IndexType = CaseConfigInput(
|
|
71
73
|
},
|
72
74
|
)
|
73
75
|
|
76
|
+
CaseConfigParamInput_IndexType_PgVector = CaseConfigInput(
|
77
|
+
label=CaseConfigParamType.IndexType,
|
78
|
+
inputHelp="Select Index Type",
|
79
|
+
inputType=InputType.Option,
|
80
|
+
inputConfig={
|
81
|
+
"options": [
|
82
|
+
IndexType.HNSW.value,
|
83
|
+
IndexType.IVFFlat.value,
|
84
|
+
],
|
85
|
+
},
|
86
|
+
)
|
87
|
+
|
74
88
|
CaseConfigParamInput_M = CaseConfigInput(
|
75
89
|
label=CaseConfigParamType.M,
|
76
90
|
inputType=InputType.Number,
|
@@ -83,6 +97,19 @@ CaseConfigParamInput_M = CaseConfigInput(
|
|
83
97
|
== IndexType.HNSW.value,
|
84
98
|
)
|
85
99
|
|
100
|
+
CaseConfigParamInput_m = CaseConfigInput(
|
101
|
+
label=CaseConfigParamType.m,
|
102
|
+
inputType=InputType.Number,
|
103
|
+
inputConfig={
|
104
|
+
"min": 4,
|
105
|
+
"max": 64,
|
106
|
+
"value": 16,
|
107
|
+
},
|
108
|
+
isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None)
|
109
|
+
== IndexType.HNSW.value,
|
110
|
+
)
|
111
|
+
|
112
|
+
|
86
113
|
CaseConfigParamInput_EFConstruction_Milvus = CaseConfigInput(
|
87
114
|
label=CaseConfigParamType.EFConstruction,
|
88
115
|
inputType=InputType.Number,
|
@@ -115,6 +142,30 @@ CaseConfigParamInput_EFConstruction_ES = CaseConfigInput(
|
|
115
142
|
},
|
116
143
|
)
|
117
144
|
|
145
|
+
CaseConfigParamInput_maintenance_work_mem_PgVector = CaseConfigInput(
|
146
|
+
label=CaseConfigParamType.maintenance_work_mem,
|
147
|
+
inputHelp="Recommended value: 1.33x the index size, not to exceed the available free memory."
|
148
|
+
"Specify in gigabytes. e.g. 8GB",
|
149
|
+
inputType=InputType.Text,
|
150
|
+
inputConfig={
|
151
|
+
"value": "8GB",
|
152
|
+
},
|
153
|
+
)
|
154
|
+
|
155
|
+
CaseConfigParamInput_max_parallel_workers_PgVector = CaseConfigInput(
|
156
|
+
label=CaseConfigParamType.max_parallel_workers,
|
157
|
+
displayLabel="Max parallel workers",
|
158
|
+
inputHelp="Recommended value: (cpu cores - 1). This will set the parameters: max_parallel_maintenance_workers,"
|
159
|
+
" max_parallel_workers & table(parallel_workers)",
|
160
|
+
inputType=InputType.Number,
|
161
|
+
inputConfig={
|
162
|
+
"min": 0,
|
163
|
+
"max": 1024,
|
164
|
+
"value": 16,
|
165
|
+
},
|
166
|
+
)
|
167
|
+
|
168
|
+
|
118
169
|
CaseConfigParamInput_EFConstruction_PgVectoRS = CaseConfigInput(
|
119
170
|
label=CaseConfigParamType.EFConstruction,
|
120
171
|
inputType=InputType.Number,
|
@@ -127,6 +178,19 @@ CaseConfigParamInput_EFConstruction_PgVectoRS = CaseConfigInput(
|
|
127
178
|
== IndexType.HNSW.value,
|
128
179
|
)
|
129
180
|
|
181
|
+
CaseConfigParamInput_EFConstruction_PgVector = CaseConfigInput(
|
182
|
+
label=CaseConfigParamType.ef_construction,
|
183
|
+
inputType=InputType.Number,
|
184
|
+
inputConfig={
|
185
|
+
"min": 8,
|
186
|
+
"max": 1024,
|
187
|
+
"value": 256,
|
188
|
+
},
|
189
|
+
isDisplayed=lambda config: config[CaseConfigParamType.IndexType]
|
190
|
+
== IndexType.HNSW.value,
|
191
|
+
)
|
192
|
+
|
193
|
+
|
130
194
|
CaseConfigParamInput_M_ES = CaseConfigInput(
|
131
195
|
label=CaseConfigParamType.M,
|
132
196
|
inputType=InputType.Number,
|
@@ -379,6 +443,8 @@ CaseConfigParamInput_Lists = CaseConfigInput(
|
|
379
443
|
"max": 65536,
|
380
444
|
"value": 10,
|
381
445
|
},
|
446
|
+
isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None)
|
447
|
+
in [IndexType.IVFFlat.value],
|
382
448
|
)
|
383
449
|
|
384
450
|
CaseConfigParamInput_Probes = CaseConfigInput(
|
@@ -391,6 +457,42 @@ CaseConfigParamInput_Probes = CaseConfigInput(
|
|
391
457
|
},
|
392
458
|
)
|
393
459
|
|
460
|
+
CaseConfigParamInput_Lists_PgVector = CaseConfigInput(
|
461
|
+
label=CaseConfigParamType.lists,
|
462
|
+
inputType=InputType.Number,
|
463
|
+
inputConfig={
|
464
|
+
"min": 1,
|
465
|
+
"max": 65536,
|
466
|
+
"value": 10,
|
467
|
+
},
|
468
|
+
isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None)
|
469
|
+
== IndexType.IVFFlat.value,
|
470
|
+
)
|
471
|
+
|
472
|
+
CaseConfigParamInput_Probes_PgVector = CaseConfigInput(
|
473
|
+
label=CaseConfigParamType.probes,
|
474
|
+
inputType=InputType.Number,
|
475
|
+
inputConfig={
|
476
|
+
"min": 1,
|
477
|
+
"max": 65536,
|
478
|
+
"value": 1,
|
479
|
+
},
|
480
|
+
isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None)
|
481
|
+
== IndexType.IVFFlat.value,
|
482
|
+
)
|
483
|
+
|
484
|
+
CaseConfigParamInput_EFSearch_PgVector = CaseConfigInput(
|
485
|
+
label=CaseConfigParamType.ef_search,
|
486
|
+
inputType=InputType.Number,
|
487
|
+
inputConfig={
|
488
|
+
"min": 1,
|
489
|
+
"max": 2048,
|
490
|
+
"value": 256,
|
491
|
+
},
|
492
|
+
isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None)
|
493
|
+
== IndexType.HNSW.value,
|
494
|
+
)
|
495
|
+
|
394
496
|
CaseConfigParamInput_QuantizationType_PgVectoRS = CaseConfigInput(
|
395
497
|
label=CaseConfigParamType.quantizationType,
|
396
498
|
inputType=InputType.Option,
|
@@ -479,8 +581,22 @@ ESPerformanceConfig = [
|
|
479
581
|
CaseConfigParamInput_NumCandidates_ES,
|
480
582
|
]
|
481
583
|
|
482
|
-
PgVectorLoadingConfig = [
|
483
|
-
|
584
|
+
PgVectorLoadingConfig = [CaseConfigParamInput_IndexType_PgVector,
|
585
|
+
CaseConfigParamInput_Lists_PgVector,
|
586
|
+
CaseConfigParamInput_m,
|
587
|
+
CaseConfigParamInput_EFConstruction_PgVector,
|
588
|
+
CaseConfigParamInput_maintenance_work_mem_PgVector,
|
589
|
+
CaseConfigParamInput_max_parallel_workers_PgVector,
|
590
|
+
]
|
591
|
+
PgVectorPerformanceConfig = [CaseConfigParamInput_IndexType_PgVector,
|
592
|
+
CaseConfigParamInput_m,
|
593
|
+
CaseConfigParamInput_EFConstruction_PgVector,
|
594
|
+
CaseConfigParamInput_EFSearch_PgVector,
|
595
|
+
CaseConfigParamInput_Lists_PgVector,
|
596
|
+
CaseConfigParamInput_Probes_PgVector,
|
597
|
+
CaseConfigParamInput_maintenance_work_mem_PgVector,
|
598
|
+
CaseConfigParamInput_max_parallel_workers_PgVector,
|
599
|
+
]
|
484
600
|
|
485
601
|
PgVectoRSLoadingConfig = [
|
486
602
|
CaseConfigParamInput_IndexType,
|
vectordb_bench/models.py
CHANGED
@@ -1,8 +1,8 @@
|
|
1
1
|
import logging
|
2
2
|
import pathlib
|
3
3
|
from datetime import date
|
4
|
-
from
|
5
|
-
from
|
4
|
+
from enum import Enum, StrEnum, auto
|
5
|
+
from typing import List, Self, Sequence, Set
|
6
6
|
|
7
7
|
import ujson
|
8
8
|
|
@@ -37,8 +37,10 @@ class CaseConfigParamType(Enum):
|
|
37
37
|
IndexType = "IndexType"
|
38
38
|
M = "M"
|
39
39
|
EFConstruction = "efConstruction"
|
40
|
+
ef_construction = "ef_construction"
|
40
41
|
EF = "ef"
|
41
42
|
SearchList = "search_list"
|
43
|
+
ef_search = "ef_search"
|
42
44
|
Nlist = "nlist"
|
43
45
|
Nprobe = "nprobe"
|
44
46
|
MaxConnections = "maxConnections"
|
@@ -60,7 +62,8 @@ class CaseConfigParamType(Enum):
|
|
60
62
|
cache_dataset_on_device = "cache_dataset_on_device"
|
61
63
|
refine_ratio = "refine_ratio"
|
62
64
|
level = "level"
|
63
|
-
|
65
|
+
maintenance_work_mem = "maintenance_work_mem"
|
66
|
+
max_parallel_workers = "max_parallel_workers"
|
64
67
|
|
65
68
|
class CustomizedCase(BaseModel):
|
66
69
|
pass
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: vectordb-bench
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.9
|
4
4
|
Summary: VectorDBBench is not just an offering of benchmark results for mainstream vector databases and cloud services, it's your go-to tool for the ultimate performance and cost-effectiveness comparison. Designed with ease-of-use in mind, VectorDBBench is devised to help users, even non-professionals, reproduce results or test new systems, making the hunt for the optimal choice amongst a plethora of cloud services and open-source vector databases a breeze.
|
5
5
|
Author-email: XuanYang-cn <xuan.yang@zilliz.com>
|
6
6
|
Project-URL: repository, https://github.com/zilliztech/VectorDBBench
|
@@ -12,7 +12,7 @@ Description-Content-Type: text/markdown
|
|
12
12
|
License-File: LICENSE
|
13
13
|
Requires-Dist: pytz
|
14
14
|
Requires-Dist: streamlit-autorefresh
|
15
|
-
Requires-Dist: streamlit
|
15
|
+
Requires-Dist: streamlit !=1.34.0
|
16
16
|
Requires-Dist: streamlit-extras
|
17
17
|
Requires-Dist: tqdm
|
18
18
|
Requires-Dist: s3fs
|
@@ -36,6 +36,7 @@ Requires-Dist: sqlalchemy ; extra == 'all'
|
|
36
36
|
Requires-Dist: redis ; extra == 'all'
|
37
37
|
Requires-Dist: chromadb ; extra == 'all'
|
38
38
|
Requires-Dist: psycopg2 ; extra == 'all'
|
39
|
+
Requires-Dist: psycopg ; extra == 'all'
|
39
40
|
Provides-Extra: chromadb
|
40
41
|
Requires-Dist: chromadb ; extra == 'chromadb'
|
41
42
|
Provides-Extra: elastic
|
@@ -44,7 +45,7 @@ Provides-Extra: pgvecto_rs
|
|
44
45
|
Requires-Dist: psycopg2 ; extra == 'pgvecto_rs'
|
45
46
|
Provides-Extra: pgvector
|
46
47
|
Requires-Dist: pgvector ; extra == 'pgvector'
|
47
|
-
Requires-Dist:
|
48
|
+
Requires-Dist: psycopg ; extra == 'pgvector'
|
48
49
|
Provides-Extra: pinecone
|
49
50
|
Requires-Dist: pinecone-client ; extra == 'pinecone'
|
50
51
|
Provides-Extra: qdrant
|
@@ -4,27 +4,27 @@ vectordb_bench/base.py,sha256=d34WCGXZI1u5RGQtqrPHd3HbOF5AmioFrM2j30Aj1sY,130
|
|
4
4
|
vectordb_bench/interface.py,sha256=7-nIr9oSDRfkbFDLJW0iZHCa7ItFG9O2TQAS7wHvWfk,9645
|
5
5
|
vectordb_bench/log_util.py,sha256=nMnW-sN24WyURcI07t-WA3q2N5R-YIvFgboRsSrNJDg,2906
|
6
6
|
vectordb_bench/metric.py,sha256=-SAcUm2m0OkHcph2QZusx-wZh8wCTrrHMy1Kv0WWL2w,1332
|
7
|
-
vectordb_bench/models.py,sha256=
|
7
|
+
vectordb_bench/models.py,sha256=MOxNLVaU-5GZ-2_X2fCNEyLEKVCLuqNQJNLgjzDOvYI,8755
|
8
8
|
vectordb_bench/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
9
9
|
vectordb_bench/backend/assembler.py,sha256=W03o8xaOoa5CTBr5586nIzm4sEJ4a-85sfcEG-d60VY,2000
|
10
10
|
vectordb_bench/backend/cases.py,sha256=3Ektg8VzU9FESgkwoh0tz6JYDIjhGarnKhIcaMczu5k,13570
|
11
11
|
vectordb_bench/backend/data_source.py,sha256=j4-eD0nIe7Y6fSM5WKEij3GfhyU_YOQ3L5Tyl-1GxX0,5446
|
12
12
|
vectordb_bench/backend/dataset.py,sha256=E-ZdYNXwCN3Fa4b_9rvhbiJgPLiKXQmi_fqZk0r7AHk,8295
|
13
13
|
vectordb_bench/backend/result_collector.py,sha256=jdQf5-q1z5y07SKy9Sig1wFROmm-p9x_Y81fId0sjaU,807
|
14
|
-
vectordb_bench/backend/task_runner.py,sha256=
|
14
|
+
vectordb_bench/backend/task_runner.py,sha256=LLfnsIDDXGTbHKtQlsjWWJop6ouFEFMQuZsqurLDkMI,9526
|
15
15
|
vectordb_bench/backend/utils.py,sha256=2UixYyfKvl8zRiashywB1l6hTI3jMtiZhiVm_bXHV1Y,1811
|
16
16
|
vectordb_bench/backend/clients/__init__.py,sha256=vYN2PZo1-zeO2kN-WRndxo2-BzmpOTLSGw8nUXTXvxI,4582
|
17
|
-
vectordb_bench/backend/clients/api.py,sha256=
|
17
|
+
vectordb_bench/backend/clients/api.py,sha256=U4jdrwkZpiOvvOc5BSylbnvc1n-O1bn5ucYIrGIKBuM,5640
|
18
18
|
vectordb_bench/backend/clients/chroma/chroma.py,sha256=Rg-GVWSDLdw32XfltJQlS3JHtNX1BJYDHxTSy086tKA,3739
|
19
19
|
vectordb_bench/backend/clients/chroma/config.py,sha256=7Tp_di0cdBsh4kX-IijTLsmFK2JJpcrXP2K6e24OUGc,345
|
20
20
|
vectordb_bench/backend/clients/elastic_cloud/config.py,sha256=xkaBNtsayByelVLda8LiSEwxjQjESpijJ8IFOh03f_0,1598
|
21
21
|
vectordb_bench/backend/clients/elastic_cloud/elastic_cloud.py,sha256=rWHthqGEpYwwka-0bsjyWfCwTAsYKNPvB17qe0Z1VDQ,5709
|
22
22
|
vectordb_bench/backend/clients/milvus/config.py,sha256=Usx-fpNsjHYZ1s-cL8qh9RXtCrh43wOs0jmdkswr8Ac,6677
|
23
|
-
vectordb_bench/backend/clients/milvus/milvus.py,sha256=
|
23
|
+
vectordb_bench/backend/clients/milvus/milvus.py,sha256=XFihIefPF7SRG7rENXg72_FZNB4zV0Hde_aNc_zOS9U,7138
|
24
24
|
vectordb_bench/backend/clients/pgvecto_rs/config.py,sha256=scdEXN6RT4yGA5j8fXSAooAvB550WQQ1JnN7SBQCUZM,3648
|
25
25
|
vectordb_bench/backend/clients/pgvecto_rs/pgvecto_rs.py,sha256=Od9g1wIhgslXBavEwCV8-LYsclqOJB3OwpeU6ZA265k,6195
|
26
|
-
vectordb_bench/backend/clients/pgvector/config.py,sha256=
|
27
|
-
vectordb_bench/backend/clients/pgvector/pgvector.py,sha256=
|
26
|
+
vectordb_bench/backend/clients/pgvector/config.py,sha256=p4Dlp9PXXSCY3gtSTzZzQO8-PAl7kixp_wID9Nu4CNs,7206
|
27
|
+
vectordb_bench/backend/clients/pgvector/pgvector.py,sha256=MAGmnMmP_LYRYs3zVjHezeOBEod1XiOuS45nqWtOgvo,12496
|
28
28
|
vectordb_bench/backend/clients/pinecone/config.py,sha256=4WvMu-9zxgoGfP5GPb7hpW-PRYEORADhlQvMa8JJh8k,384
|
29
29
|
vectordb_bench/backend/clients/pinecone/pinecone.py,sha256=U31QbXLuTcNPp7PK24glE6LM23-YpbxK_Kj-NmEwoZY,4078
|
30
30
|
vectordb_bench/backend/clients/qdrant_cloud/config.py,sha256=jk6gLcjZnjV0kQlc4RrrcXyekF6qkwzgWOYD3Mm8AOU,1385
|
@@ -37,7 +37,7 @@ vectordb_bench/backend/clients/zilliz_cloud/config.py,sha256=3Tk7X4r0n2SLzan110x
|
|
37
37
|
vectordb_bench/backend/clients/zilliz_cloud/zilliz_cloud.py,sha256=4JcwiVEJcdEykW6n471nfHeIlmhIDa-gOZ7G5H_4krY,681
|
38
38
|
vectordb_bench/backend/runner/__init__.py,sha256=5dZfPky8pY9Bi9HD5GZ3Fge8V2FJWrkGkQUkNL2v1t0,230
|
39
39
|
vectordb_bench/backend/runner/mp_runner.py,sha256=NEyDeim_lRbSNBbseUHZQzt12DwLMibCD1N4od6zQe0,4809
|
40
|
-
vectordb_bench/backend/runner/serial_runner.py,sha256=
|
40
|
+
vectordb_bench/backend/runner/serial_runner.py,sha256=LVmhv2qIuCU_v85CrWWiKKVfZUx5wmB0__HbO8HeQgo,9168
|
41
41
|
vectordb_bench/frontend/utils.py,sha256=BzKR1kMX1ErlXAzkFUb06O2mIcxBbquRzJtxoHgRnKs,162
|
42
42
|
vectordb_bench/frontend/vdb_benchmark.py,sha256=z9VCsMp2Ra43J-oFXRsX98Ww3Xq_koUpZ9PEZoU5qws,1649
|
43
43
|
vectordb_bench/frontend/components/check_results/charts.py,sha256=zbtEyUSk2FJbSlaGw1LH4boSljFXfhVZlC8rrIgVw_4,5113
|
@@ -51,13 +51,13 @@ vectordb_bench/frontend/components/check_results/priceTable.py,sha256=E7sxhSCjkB
|
|
51
51
|
vectordb_bench/frontend/components/check_results/stPageConfig.py,sha256=rAL2prWx0hT7Q3QWz6ALyKUMNladX6U48GlKvVq3DFA,429
|
52
52
|
vectordb_bench/frontend/components/get_results/saveAsImage.py,sha256=MdQCqjrX5rQyK34XfTkVykVLOcOouIz4enMR1P5GBiY,1457
|
53
53
|
vectordb_bench/frontend/components/run_test/autoRefresh.py,sha256=ofsl2sdmBd2y9O_xaJDr58NPycJsDwCdf2rEyE_f6e8,288
|
54
|
-
vectordb_bench/frontend/components/run_test/caseSelector.py,sha256=
|
54
|
+
vectordb_bench/frontend/components/run_test/caseSelector.py,sha256=B1rtbSDlzPHbdPK52mxFf3FbF4qYs1J9YNYdSnTxCRg,3945
|
55
55
|
vectordb_bench/frontend/components/run_test/dbConfigSetting.py,sha256=hoelDzXP-J2EmzvgGh6Euk7uBfu9iw0YGM7lxjo0cb8,2074
|
56
56
|
vectordb_bench/frontend/components/run_test/dbSelector.py,sha256=r8ABYpu6aWA94FiQxB0g75n7jU_mOFY5nYcHkKQ8e8g,1242
|
57
57
|
vectordb_bench/frontend/components/run_test/generateTasks.py,sha256=9r1vb03FMSJ_vG4px_wHMKMB_RWaKv6ttv0FptsytgA,812
|
58
58
|
vectordb_bench/frontend/components/run_test/hideSidebar.py,sha256=vb5kzIMmbMqWX67qFEHek21X4sGO_tPyn_uPqUEtp3Q,234
|
59
59
|
vectordb_bench/frontend/components/run_test/submitTask.py,sha256=4W-wvrnwPZjEAyHjeLtahLE8-Ekjw2wYNX5aAXZbark,3050
|
60
|
-
vectordb_bench/frontend/const/dbCaseConfigs.py,sha256=
|
60
|
+
vectordb_bench/frontend/const/dbCaseConfigs.py,sha256=dxKTrLeKXoizTu7dKxOZjH4ZdhHr-YuDYp6Xvbe652A,18345
|
61
61
|
vectordb_bench/frontend/const/dbPrices.py,sha256=10aBKjVcEg8y7TPSda28opmBM1KmXNrvbU9WM_BsZcE,176
|
62
62
|
vectordb_bench/frontend/const/styles.py,sha256=B2ycRJ6CHBzew8B2P7z9nnzY-20W5SlggXLI2olfxac,2174
|
63
63
|
vectordb_bench/frontend/pages/quries_per_dollar.py,sha256=SpXwKwdarwPz7RtF_qxyODfwARBb3VI9iKElYtnwEVs,2422
|
@@ -80,9 +80,9 @@ vectordb_bench/results/WeaviateCloud/result_20230808_standard_weaviatecloud.json
|
|
80
80
|
vectordb_bench/results/ZillizCloud/result_20230727_standard_zillizcloud.json,sha256=wzrlCEsqaoy4EujDNeLebCKZIC__aXNe2NhFDEdewKo,17398
|
81
81
|
vectordb_bench/results/ZillizCloud/result_20230808_standard_zillizcloud.json,sha256=G44g4aTJfeC0FyqosPEtaC-iy8JUX-bVpnA6dn0iiYU,14969
|
82
82
|
vectordb_bench/results/ZillizCloud/result_20240105_standard_202401_zillizcloud.json,sha256=5R5PGJheoCOksx9uOXeSu8Z24Zc6Xp9LUkgJ-OzGAtM,41007
|
83
|
-
vectordb_bench-0.0.
|
84
|
-
vectordb_bench-0.0.
|
85
|
-
vectordb_bench-0.0.
|
86
|
-
vectordb_bench-0.0.
|
87
|
-
vectordb_bench-0.0.
|
88
|
-
vectordb_bench-0.0.
|
83
|
+
vectordb_bench-0.0.9.dist-info/LICENSE,sha256=HXbxhrb5u5SegVzeLNF_voVgRsJMavcLaOmD1N0lZkM,1067
|
84
|
+
vectordb_bench-0.0.9.dist-info/METADATA,sha256=VYoxjLKMXlz7VDWTz8y5l4wwVk8EDWK15FG-zlhEwiM,22681
|
85
|
+
vectordb_bench-0.0.9.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
86
|
+
vectordb_bench-0.0.9.dist-info/entry_points.txt,sha256=RVG6ppvzIsstAM199pDqeKu8lnxntjwYapn0smVQY7A,60
|
87
|
+
vectordb_bench-0.0.9.dist-info/top_level.txt,sha256=jnhZFZAuKX1J60yt-XOeBZ__ctiZMvoC_s0RFq29lpM,15
|
88
|
+
vectordb_bench-0.0.9.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|