vectordb-bench 0.0.21__py3-none-any.whl → 0.0.23__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vectordb_bench/backend/clients/__init__.py +48 -0
- vectordb_bench/backend/clients/api.py +1 -0
- vectordb_bench/backend/clients/aws_opensearch/aws_opensearch.py +53 -4
- vectordb_bench/backend/clients/aws_opensearch/cli.py +85 -1
- vectordb_bench/backend/clients/aws_opensearch/config.py +10 -0
- vectordb_bench/backend/clients/mariadb/cli.py +107 -0
- vectordb_bench/backend/clients/mariadb/config.py +71 -0
- vectordb_bench/backend/clients/mariadb/mariadb.py +214 -0
- vectordb_bench/backend/clients/milvus/cli.py +50 -0
- vectordb_bench/backend/clients/milvus/config.py +33 -0
- vectordb_bench/backend/clients/mongodb/config.py +53 -0
- vectordb_bench/backend/clients/mongodb/mongodb.py +200 -0
- vectordb_bench/backend/clients/pgvector/cli.py +13 -1
- vectordb_bench/backend/clients/pgvector/config.py +22 -5
- vectordb_bench/backend/clients/pgvector/pgvector.py +62 -19
- vectordb_bench/backend/clients/tidb/cli.py +98 -0
- vectordb_bench/backend/clients/tidb/config.py +49 -0
- vectordb_bench/backend/clients/tidb/tidb.py +234 -0
- vectordb_bench/cli/vectordbbench.py +4 -0
- vectordb_bench/frontend/components/custom/displaypPrams.py +12 -1
- vectordb_bench/frontend/components/run_test/submitTask.py +20 -3
- vectordb_bench/frontend/config/dbCaseConfigs.py +128 -0
- vectordb_bench/frontend/config/styles.py +2 -0
- vectordb_bench/log_util.py +15 -2
- vectordb_bench/models.py +7 -0
- {vectordb_bench-0.0.21.dist-info → vectordb_bench-0.0.23.dist-info}/METADATA +67 -3
- {vectordb_bench-0.0.21.dist-info → vectordb_bench-0.0.23.dist-info}/RECORD +31 -23
- {vectordb_bench-0.0.21.dist-info → vectordb_bench-0.0.23.dist-info}/WHEEL +1 -1
- {vectordb_bench-0.0.21.dist-info → vectordb_bench-0.0.23.dist-info}/LICENSE +0 -0
- {vectordb_bench-0.0.21.dist-info → vectordb_bench-0.0.23.dist-info}/entry_points.txt +0 -0
- {vectordb_bench-0.0.21.dist-info → vectordb_bench-0.0.23.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,98 @@
|
|
1
|
+
from typing import Annotated, Unpack
|
2
|
+
|
3
|
+
import click
|
4
|
+
from pydantic import SecretStr
|
5
|
+
|
6
|
+
from vectordb_bench.backend.clients import DB
|
7
|
+
|
8
|
+
from ....cli.cli import CommonTypedDict, cli, click_parameter_decorators_from_typed_dict, run
|
9
|
+
|
10
|
+
|
11
|
+
class TiDBTypedDict(CommonTypedDict):
|
12
|
+
user_name: Annotated[
|
13
|
+
str,
|
14
|
+
click.option(
|
15
|
+
"--username",
|
16
|
+
type=str,
|
17
|
+
help="Username",
|
18
|
+
default="root",
|
19
|
+
show_default=True,
|
20
|
+
required=True,
|
21
|
+
),
|
22
|
+
]
|
23
|
+
password: Annotated[
|
24
|
+
str,
|
25
|
+
click.option(
|
26
|
+
"--password",
|
27
|
+
type=str,
|
28
|
+
default="",
|
29
|
+
show_default=True,
|
30
|
+
help="Password",
|
31
|
+
),
|
32
|
+
]
|
33
|
+
host: Annotated[
|
34
|
+
str,
|
35
|
+
click.option(
|
36
|
+
"--host",
|
37
|
+
type=str,
|
38
|
+
default="127.0.0.1",
|
39
|
+
show_default=True,
|
40
|
+
required=True,
|
41
|
+
help="Db host",
|
42
|
+
),
|
43
|
+
]
|
44
|
+
port: Annotated[
|
45
|
+
int,
|
46
|
+
click.option(
|
47
|
+
"--port",
|
48
|
+
type=int,
|
49
|
+
default=4000,
|
50
|
+
show_default=True,
|
51
|
+
required=True,
|
52
|
+
help="Db Port",
|
53
|
+
),
|
54
|
+
]
|
55
|
+
db_name: Annotated[
|
56
|
+
str,
|
57
|
+
click.option(
|
58
|
+
"--db-name",
|
59
|
+
type=str,
|
60
|
+
default="test",
|
61
|
+
show_default=True,
|
62
|
+
required=True,
|
63
|
+
help="Db name",
|
64
|
+
),
|
65
|
+
]
|
66
|
+
ssl: Annotated[
|
67
|
+
bool,
|
68
|
+
click.option(
|
69
|
+
"--ssl/--no-ssl",
|
70
|
+
default=False,
|
71
|
+
show_default=True,
|
72
|
+
is_flag=True,
|
73
|
+
help="Enable or disable SSL, for TiDB Serverless SSL must be enabled",
|
74
|
+
),
|
75
|
+
]
|
76
|
+
|
77
|
+
|
78
|
+
@cli.command()
|
79
|
+
@click_parameter_decorators_from_typed_dict(TiDBTypedDict)
|
80
|
+
def TiDB(
|
81
|
+
**parameters: Unpack[TiDBTypedDict],
|
82
|
+
):
|
83
|
+
from .config import TiDBConfig, TiDBIndexConfig
|
84
|
+
|
85
|
+
run(
|
86
|
+
db=DB.TiDB,
|
87
|
+
db_config=TiDBConfig(
|
88
|
+
db_label=parameters["db_label"],
|
89
|
+
user_name=parameters["username"],
|
90
|
+
password=SecretStr(parameters["password"]),
|
91
|
+
host=parameters["host"],
|
92
|
+
port=parameters["port"],
|
93
|
+
db_name=parameters["db_name"],
|
94
|
+
ssl=parameters["ssl"],
|
95
|
+
),
|
96
|
+
db_case_config=TiDBIndexConfig(),
|
97
|
+
**parameters,
|
98
|
+
)
|
@@ -0,0 +1,49 @@
|
|
1
|
+
from pydantic import SecretStr, BaseModel, validator
|
2
|
+
from ..api import DBConfig, DBCaseConfig, MetricType
|
3
|
+
|
4
|
+
|
5
|
+
class TiDBConfig(DBConfig):
|
6
|
+
user_name: str = "root"
|
7
|
+
password: SecretStr
|
8
|
+
host: str = "127.0.0.1"
|
9
|
+
port: int = 4000
|
10
|
+
db_name: str = "test"
|
11
|
+
ssl: bool = False
|
12
|
+
|
13
|
+
@validator("*")
|
14
|
+
def not_empty_field(cls, v: any, field: any):
|
15
|
+
return v
|
16
|
+
|
17
|
+
def to_dict(self) -> dict:
|
18
|
+
pwd_str = self.password.get_secret_value()
|
19
|
+
return {
|
20
|
+
"host": self.host,
|
21
|
+
"port": self.port,
|
22
|
+
"user": self.user_name,
|
23
|
+
"password": pwd_str,
|
24
|
+
"database": self.db_name,
|
25
|
+
"ssl_verify_cert": self.ssl,
|
26
|
+
"ssl_verify_identity": self.ssl,
|
27
|
+
}
|
28
|
+
|
29
|
+
|
30
|
+
class TiDBIndexConfig(BaseModel, DBCaseConfig):
|
31
|
+
metric_type: MetricType | None = None
|
32
|
+
|
33
|
+
def get_metric_fn(self) -> str:
|
34
|
+
if self.metric_type == MetricType.L2:
|
35
|
+
return "vec_l2_distance"
|
36
|
+
elif self.metric_type == MetricType.COSINE:
|
37
|
+
return "vec_cosine_distance"
|
38
|
+
else:
|
39
|
+
raise ValueError(f"Unsupported metric type: {self.metric_type}")
|
40
|
+
|
41
|
+
def index_param(self) -> dict:
|
42
|
+
return {
|
43
|
+
"metric_fn": self.get_metric_fn(),
|
44
|
+
}
|
45
|
+
|
46
|
+
def search_param(self) -> dict:
|
47
|
+
return {
|
48
|
+
"metric_fn": self.get_metric_fn(),
|
49
|
+
}
|
@@ -0,0 +1,234 @@
|
|
1
|
+
import concurrent.futures
|
2
|
+
import io
|
3
|
+
import logging
|
4
|
+
import time
|
5
|
+
from contextlib import contextmanager
|
6
|
+
from typing import Any, Optional, Tuple
|
7
|
+
|
8
|
+
import pymysql
|
9
|
+
|
10
|
+
from ..api import VectorDB
|
11
|
+
from .config import TiDBIndexConfig
|
12
|
+
|
13
|
+
log = logging.getLogger(__name__)
|
14
|
+
|
15
|
+
|
16
|
+
class TiDB(VectorDB):
|
17
|
+
def __init__(
|
18
|
+
self,
|
19
|
+
dim: int,
|
20
|
+
db_config: dict,
|
21
|
+
db_case_config: TiDBIndexConfig,
|
22
|
+
collection_name: str = "vector_bench_test",
|
23
|
+
drop_old: bool = False,
|
24
|
+
**kwargs,
|
25
|
+
):
|
26
|
+
self.name = "TiDB"
|
27
|
+
self.db_config = db_config
|
28
|
+
self.case_config = db_case_config
|
29
|
+
self.table_name = collection_name
|
30
|
+
self.dim = dim
|
31
|
+
self.conn = None # To be inited by init()
|
32
|
+
self.cursor = None # To be inited by init()
|
33
|
+
|
34
|
+
self.search_fn = db_case_config.search_param()["metric_fn"]
|
35
|
+
|
36
|
+
if drop_old:
|
37
|
+
self._drop_table()
|
38
|
+
self._create_table()
|
39
|
+
|
40
|
+
@contextmanager
|
41
|
+
def init(self):
|
42
|
+
with self._get_connection() as (conn, cursor):
|
43
|
+
self.conn = conn
|
44
|
+
self.cursor = cursor
|
45
|
+
try:
|
46
|
+
yield
|
47
|
+
finally:
|
48
|
+
self.conn = None
|
49
|
+
self.cursor = None
|
50
|
+
|
51
|
+
@contextmanager
|
52
|
+
def _get_connection(self):
|
53
|
+
with pymysql.connect(**self.db_config) as conn:
|
54
|
+
conn.autocommit = False
|
55
|
+
with conn.cursor() as cursor:
|
56
|
+
yield conn, cursor
|
57
|
+
|
58
|
+
def _drop_table(self):
|
59
|
+
try:
|
60
|
+
with self._get_connection() as (conn, cursor):
|
61
|
+
cursor.execute(f"DROP TABLE IF EXISTS {self.table_name}")
|
62
|
+
conn.commit()
|
63
|
+
except Exception as e:
|
64
|
+
log.warning("Failed to drop table: %s error: %s", self.table_name, e)
|
65
|
+
raise e
|
66
|
+
|
67
|
+
def _create_table(self):
|
68
|
+
try:
|
69
|
+
index_param = self.case_config.index_param()
|
70
|
+
with self._get_connection() as (conn, cursor):
|
71
|
+
cursor.execute(
|
72
|
+
f"""
|
73
|
+
CREATE TABLE {self.table_name} (
|
74
|
+
id BIGINT PRIMARY KEY,
|
75
|
+
embedding VECTOR({self.dim}) NOT NULL,
|
76
|
+
VECTOR INDEX (({index_param["metric_fn"]}(embedding)))
|
77
|
+
);
|
78
|
+
"""
|
79
|
+
)
|
80
|
+
conn.commit()
|
81
|
+
except Exception as e:
|
82
|
+
log.warning("Failed to create table: %s error: %s", self.table_name, e)
|
83
|
+
raise e
|
84
|
+
|
85
|
+
def ready_to_load(self) -> bool:
|
86
|
+
pass
|
87
|
+
|
88
|
+
def optimize(self, data_size: int | None = None) -> None:
|
89
|
+
while True:
|
90
|
+
progress = self._optimize_check_tiflash_replica_progress()
|
91
|
+
if progress != 1:
|
92
|
+
log.info("Data replication not ready, progress: %d", progress)
|
93
|
+
time.sleep(2)
|
94
|
+
else:
|
95
|
+
break
|
96
|
+
|
97
|
+
log.info("Waiting TiFlash to catch up...")
|
98
|
+
self._optimize_wait_tiflash_catch_up()
|
99
|
+
|
100
|
+
log.info("Start compacting TiFlash replica...")
|
101
|
+
self._optimize_compact_tiflash()
|
102
|
+
|
103
|
+
log.info("Waiting index build to finish...")
|
104
|
+
log_reduce_seq = 0
|
105
|
+
while True:
|
106
|
+
pending_rows = self._optimize_get_tiflash_index_pending_rows()
|
107
|
+
if pending_rows > 0:
|
108
|
+
if log_reduce_seq % 15 == 0:
|
109
|
+
log.info("Index not fully built, pending rows: %d", pending_rows)
|
110
|
+
log_reduce_seq += 1
|
111
|
+
time.sleep(2)
|
112
|
+
else:
|
113
|
+
break
|
114
|
+
|
115
|
+
log.info("Index build finished successfully.")
|
116
|
+
|
117
|
+
def _optimize_check_tiflash_replica_progress(self):
|
118
|
+
try:
|
119
|
+
database = self.db_config["database"]
|
120
|
+
with self._get_connection() as (_, cursor):
|
121
|
+
cursor.execute(
|
122
|
+
f"""
|
123
|
+
SELECT PROGRESS FROM information_schema.tiflash_replica
|
124
|
+
WHERE TABLE_SCHEMA = "{database}" AND TABLE_NAME = "{self.table_name}"
|
125
|
+
"""
|
126
|
+
)
|
127
|
+
result = cursor.fetchone()
|
128
|
+
return result[0]
|
129
|
+
except Exception as e:
|
130
|
+
log.warning("Failed to check TiFlash replica progress: %s", e)
|
131
|
+
raise e
|
132
|
+
|
133
|
+
def _optimize_wait_tiflash_catch_up(self):
|
134
|
+
try:
|
135
|
+
with self._get_connection() as (conn, cursor):
|
136
|
+
cursor.execute('SET @@TIDB_ISOLATION_READ_ENGINES="tidb,tiflash"')
|
137
|
+
conn.commit()
|
138
|
+
cursor.execute(f"SELECT COUNT(*) FROM {self.table_name}")
|
139
|
+
result = cursor.fetchone()
|
140
|
+
return result[0]
|
141
|
+
except Exception as e:
|
142
|
+
log.warning("Failed to wait TiFlash to catch up: %s", e)
|
143
|
+
raise e
|
144
|
+
|
145
|
+
def _optimize_compact_tiflash(self):
|
146
|
+
try:
|
147
|
+
with self._get_connection() as (conn, cursor):
|
148
|
+
cursor.execute(f"ALTER TABLE {self.table_name} COMPACT")
|
149
|
+
conn.commit()
|
150
|
+
except Exception as e:
|
151
|
+
log.warning("Failed to compact table: %s", e)
|
152
|
+
raise e
|
153
|
+
|
154
|
+
def _optimize_get_tiflash_index_pending_rows(self):
|
155
|
+
try:
|
156
|
+
database = self.db_config["database"]
|
157
|
+
with self._get_connection() as (_, cursor):
|
158
|
+
cursor.execute(
|
159
|
+
f"""
|
160
|
+
SELECT SUM(ROWS_STABLE_NOT_INDEXED)
|
161
|
+
FROM information_schema.tiflash_indexes
|
162
|
+
WHERE TIDB_DATABASE = "{database}" AND TIDB_TABLE = "{self.table_name}"
|
163
|
+
"""
|
164
|
+
)
|
165
|
+
result = cursor.fetchone()
|
166
|
+
return result[0]
|
167
|
+
except Exception as e:
|
168
|
+
log.warning("Failed to read TiFlash index pending rows: %s", e)
|
169
|
+
raise e
|
170
|
+
|
171
|
+
def _insert_embeddings_serial(
|
172
|
+
self,
|
173
|
+
embeddings: list[list[float]],
|
174
|
+
metadata: list[int],
|
175
|
+
offset: int,
|
176
|
+
size: int,
|
177
|
+
) -> Exception:
|
178
|
+
try:
|
179
|
+
with self._get_connection() as (conn, cursor):
|
180
|
+
buf = io.StringIO()
|
181
|
+
buf.write(f"INSERT INTO {self.table_name} (id, embedding) VALUES ")
|
182
|
+
for i in range(offset, offset + size):
|
183
|
+
if i > offset:
|
184
|
+
buf.write(",")
|
185
|
+
buf.write(f'({metadata[i]}, "{str(embeddings[i])}")')
|
186
|
+
cursor.execute(buf.getvalue())
|
187
|
+
conn.commit()
|
188
|
+
except Exception as e:
|
189
|
+
log.warning("Failed to insert data into table: %s", e)
|
190
|
+
raise e
|
191
|
+
|
192
|
+
def insert_embeddings(
|
193
|
+
self,
|
194
|
+
embeddings: list[list[float]],
|
195
|
+
metadata: list[int],
|
196
|
+
**kwargs: Any,
|
197
|
+
) -> Tuple[int, Optional[Exception]]:
|
198
|
+
workers = 10
|
199
|
+
# Avoid exceeding MAX_ALLOWED_PACKET (default=64MB)
|
200
|
+
max_batch_size = 64 * 1024 * 1024 // 24 // self.dim
|
201
|
+
batch_size = len(embeddings) // workers
|
202
|
+
if batch_size > max_batch_size:
|
203
|
+
batch_size = max_batch_size
|
204
|
+
with concurrent.futures.ThreadPoolExecutor(max_workers=workers) as executor:
|
205
|
+
futures = []
|
206
|
+
for i in range(0, len(embeddings), batch_size):
|
207
|
+
offset = i
|
208
|
+
size = min(batch_size, len(embeddings) - i)
|
209
|
+
future = executor.submit(self._insert_embeddings_serial, embeddings, metadata, offset, size)
|
210
|
+
futures.append(future)
|
211
|
+
done, pending = concurrent.futures.wait(futures, return_when=concurrent.futures.FIRST_EXCEPTION)
|
212
|
+
executor.shutdown(wait=False)
|
213
|
+
for future in done:
|
214
|
+
future.result()
|
215
|
+
for future in pending:
|
216
|
+
future.cancel()
|
217
|
+
return len(metadata), None
|
218
|
+
|
219
|
+
def search_embedding(
|
220
|
+
self,
|
221
|
+
query: list[float],
|
222
|
+
k: int = 100,
|
223
|
+
filters: dict | None = None,
|
224
|
+
timeout: int | None = None,
|
225
|
+
**kwargs: Any,
|
226
|
+
) -> list[int]:
|
227
|
+
self.cursor.execute(
|
228
|
+
f"""
|
229
|
+
SELECT id FROM {self.table_name}
|
230
|
+
ORDER BY {self.search_fn}(embedding, "{str(query)}") LIMIT {k};
|
231
|
+
"""
|
232
|
+
)
|
233
|
+
result = self.cursor.fetchall()
|
234
|
+
return [int(i[0]) for i in result]
|
@@ -1,5 +1,6 @@
|
|
1
1
|
from ..backend.clients.alloydb.cli import AlloyDBScaNN
|
2
2
|
from ..backend.clients.aws_opensearch.cli import AWSOpenSearch
|
3
|
+
from ..backend.clients.mariadb.cli import MariaDBHNSW
|
3
4
|
from ..backend.clients.memorydb.cli import MemoryDB
|
4
5
|
from ..backend.clients.milvus.cli import MilvusAutoIndex
|
5
6
|
from ..backend.clients.pgdiskann.cli import PgDiskAnn
|
@@ -10,6 +11,7 @@ from ..backend.clients.redis.cli import Redis
|
|
10
11
|
from ..backend.clients.test.cli import Test
|
11
12
|
from ..backend.clients.weaviate_cloud.cli import Weaviate
|
12
13
|
from ..backend.clients.zilliz_cloud.cli import ZillizAutoIndex
|
14
|
+
from ..backend.clients.tidb.cli import TiDB
|
13
15
|
from .cli import cli
|
14
16
|
|
15
17
|
cli.add_command(PgVectorHNSW)
|
@@ -25,6 +27,8 @@ cli.add_command(AWSOpenSearch)
|
|
25
27
|
cli.add_command(PgVectorScaleDiskAnn)
|
26
28
|
cli.add_command(PgDiskAnn)
|
27
29
|
cli.add_command(AlloyDBScaNN)
|
30
|
+
cli.add_command(MariaDBHNSW)
|
31
|
+
cli.add_command(TiDB)
|
28
32
|
|
29
33
|
|
30
34
|
if __name__ == "__main__":
|
@@ -3,7 +3,7 @@ def displayParams(st):
|
|
3
3
|
"""
|
4
4
|
- `Folder Path` - The path to the folder containing all the files. Please ensure that all files in the folder are in the `Parquet` format.
|
5
5
|
- Vectors data files: The file must be named `train.parquet` and should have two columns: `id` as an incrementing `int` and `emb` as an array of `float32`.
|
6
|
-
- Query test vectors: The file must be named `test.parquet` and should have two columns: `id` as an incrementing `int` and `emb` as an array of `float32`.
|
6
|
+
- Query test vectors: The file must be named `test.parquet` and should have two columns: `id` as an incrementing `int` and `emb` as an array of `float32`.
|
7
7
|
- Ground truth file: The file must be named `neighbors.parquet` and should have two columns: `id` corresponding to query vectors and `neighbors_id` as an array of `int`.
|
8
8
|
|
9
9
|
- `Train File Count` - If the vector file is too large, you can consider splitting it into multiple files. The naming format for the split files should be `train-[index]-of-[file_count].parquet`. For example, `train-01-of-10.parquet` represents the second file (0-indexed) among 10 split files.
|
@@ -11,3 +11,14 @@ def displayParams(st):
|
|
11
11
|
- `Use Shuffled Data` - If you check this option, the vector data files need to be modified. VectorDBBench will load the data labeled with `shuffle`. For example, use `shuffle_train.parquet` instead of `train.parquet` and `shuffle_train-04-of-10.parquet` instead of `train-04-of-10.parquet`. The `id` column in the shuffled data can be in any order.
|
12
12
|
"""
|
13
13
|
)
|
14
|
+
st.caption(
|
15
|
+
"""We recommend limiting the number of test query vectors, like 1,000.""",
|
16
|
+
help="""
|
17
|
+
When conducting concurrent query tests, Vdbbench creates a large number of processes.
|
18
|
+
To minimize additional communication overhead during testing,
|
19
|
+
we prepare a complete set of test queries for each process, allowing them to run independently.\n
|
20
|
+
However, this means that as the number of concurrent processes increases,
|
21
|
+
the number of copied query vectors also increases significantly,
|
22
|
+
which can place substantial pressure on memory resources.
|
23
|
+
""",
|
24
|
+
)
|
@@ -1,6 +1,8 @@
|
|
1
1
|
from datetime import datetime
|
2
|
+
from vectordb_bench import config
|
2
3
|
from vectordb_bench.frontend.config import styles
|
3
4
|
from vectordb_bench.interface import benchmark_runner
|
5
|
+
from vectordb_bench.models import TaskConfig
|
4
6
|
|
5
7
|
|
6
8
|
def submitTask(st, tasks, isAllValid):
|
@@ -47,16 +49,31 @@ def advancedSettings(st):
|
|
47
49
|
k = container[0].number_input("k", min_value=1, value=100, label_visibility="collapsed")
|
48
50
|
container[1].caption("K value for number of nearest neighbors to search")
|
49
51
|
|
50
|
-
|
52
|
+
container = st.columns([1, 2])
|
53
|
+
defaultconcurrentInput = ",".join(map(str, config.NUM_CONCURRENCY))
|
54
|
+
concurrentInput = container[0].text_input(
|
55
|
+
"Concurrent Input", value=defaultconcurrentInput, label_visibility="collapsed"
|
56
|
+
)
|
57
|
+
container[1].caption("num of concurrencies for search tests to get max-qps")
|
58
|
+
return index_already_exists, use_aliyun, k, concurrentInput
|
51
59
|
|
52
60
|
|
53
|
-
def controlPanel(st, tasks, taskLabel, isAllValid):
|
54
|
-
index_already_exists, use_aliyun, k = advancedSettings(st)
|
61
|
+
def controlPanel(st, tasks: list[TaskConfig], taskLabel, isAllValid):
|
62
|
+
index_already_exists, use_aliyun, k, concurrentInput = advancedSettings(st)
|
55
63
|
|
56
64
|
def runHandler():
|
57
65
|
benchmark_runner.set_drop_old(not index_already_exists)
|
66
|
+
|
67
|
+
try:
|
68
|
+
concurrentInput_list = [int(item.strip()) for item in concurrentInput.split(",")]
|
69
|
+
except ValueError:
|
70
|
+
st.write("please input correct number")
|
71
|
+
return None
|
72
|
+
|
58
73
|
for task in tasks:
|
59
74
|
task.case_config.k = k
|
75
|
+
task.case_config.concurrency_search_config.num_concurrency = concurrentInput_list
|
76
|
+
|
60
77
|
benchmark_runner.set_download_address(use_aliyun)
|
61
78
|
benchmark_runner.run(tasks, taskLabel)
|
62
79
|
|
@@ -173,6 +173,7 @@ CaseConfigParamInput_IndexType = CaseConfigInput(
|
|
173
173
|
IndexType.GPU_IVF_FLAT.value,
|
174
174
|
IndexType.GPU_IVF_PQ.value,
|
175
175
|
IndexType.GPU_CAGRA.value,
|
176
|
+
IndexType.GPU_BRUTE_FORCE.value,
|
176
177
|
],
|
177
178
|
},
|
178
179
|
)
|
@@ -562,6 +563,7 @@ CaseConfigParamInput_Nlist = CaseConfigInput(
|
|
562
563
|
IndexType.IVFSQ8.value,
|
563
564
|
IndexType.GPU_IVF_FLAT.value,
|
564
565
|
IndexType.GPU_IVF_PQ.value,
|
566
|
+
IndexType.GPU_BRUTE_FORCE.value,
|
565
567
|
],
|
566
568
|
)
|
567
569
|
|
@@ -579,6 +581,7 @@ CaseConfigParamInput_Nprobe = CaseConfigInput(
|
|
579
581
|
IndexType.IVFSQ8.value,
|
580
582
|
IndexType.GPU_IVF_FLAT.value,
|
581
583
|
IndexType.GPU_IVF_PQ.value,
|
584
|
+
IndexType.GPU_BRUTE_FORCE.value,
|
582
585
|
],
|
583
586
|
)
|
584
587
|
|
@@ -703,6 +706,7 @@ CaseConfigParamInput_cache_dataset_on_device = CaseConfigInput(
|
|
703
706
|
IndexType.GPU_CAGRA.value,
|
704
707
|
IndexType.GPU_IVF_PQ.value,
|
705
708
|
IndexType.GPU_IVF_FLAT.value,
|
709
|
+
IndexType.GPU_BRUTE_FORCE.value,
|
706
710
|
],
|
707
711
|
)
|
708
712
|
|
@@ -720,6 +724,7 @@ CaseConfigParamInput_refine_ratio = CaseConfigInput(
|
|
720
724
|
IndexType.GPU_CAGRA.value,
|
721
725
|
IndexType.GPU_IVF_PQ.value,
|
722
726
|
IndexType.GPU_IVF_FLAT.value,
|
727
|
+
IndexType.GPU_BRUTE_FORCE.value,
|
723
728
|
],
|
724
729
|
)
|
725
730
|
|
@@ -818,6 +823,19 @@ CaseConfigParamInput_QuantizationRatio_PgVectoRS = CaseConfigInput(
|
|
818
823
|
],
|
819
824
|
)
|
820
825
|
|
826
|
+
CaseConfigParamInput_TableQuantizationType_PgVector = CaseConfigInput(
|
827
|
+
label=CaseConfigParamType.tableQuantizationType,
|
828
|
+
inputType=InputType.Option,
|
829
|
+
inputConfig={
|
830
|
+
"options": ["none", "bit", "halfvec"],
|
831
|
+
},
|
832
|
+
isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None)
|
833
|
+
in [
|
834
|
+
IndexType.HNSW.value,
|
835
|
+
IndexType.IVFFlat.value,
|
836
|
+
],
|
837
|
+
)
|
838
|
+
|
821
839
|
CaseConfigParamInput_max_parallel_workers_PgVectorRS = CaseConfigInput(
|
822
840
|
label=CaseConfigParamType.max_parallel_workers,
|
823
841
|
displayLabel="Max parallel workers",
|
@@ -1040,6 +1058,84 @@ CaseConfigParamInput_NumCandidates_AliES = CaseConfigInput(
|
|
1040
1058
|
},
|
1041
1059
|
)
|
1042
1060
|
|
1061
|
+
CaseConfigParamInput_IndexType_MariaDB = CaseConfigInput(
|
1062
|
+
label=CaseConfigParamType.IndexType,
|
1063
|
+
inputHelp="Select Index Type",
|
1064
|
+
inputType=InputType.Option,
|
1065
|
+
inputConfig={
|
1066
|
+
"options": [
|
1067
|
+
IndexType.HNSW.value,
|
1068
|
+
],
|
1069
|
+
},
|
1070
|
+
)
|
1071
|
+
|
1072
|
+
CaseConfigParamInput_StorageEngine_MariaDB = CaseConfigInput(
|
1073
|
+
label=CaseConfigParamType.storage_engine,
|
1074
|
+
inputHelp="Select Storage Engine",
|
1075
|
+
inputType=InputType.Option,
|
1076
|
+
inputConfig={
|
1077
|
+
"options": ["InnoDB", "MyISAM"],
|
1078
|
+
},
|
1079
|
+
)
|
1080
|
+
|
1081
|
+
CaseConfigParamInput_M_MariaDB = CaseConfigInput(
|
1082
|
+
label=CaseConfigParamType.M,
|
1083
|
+
inputHelp="M parameter in MHNSW vector indexing",
|
1084
|
+
inputType=InputType.Number,
|
1085
|
+
inputConfig={
|
1086
|
+
"min": 3,
|
1087
|
+
"max": 200,
|
1088
|
+
"value": 6,
|
1089
|
+
},
|
1090
|
+
isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None)
|
1091
|
+
== IndexType.HNSW.value,
|
1092
|
+
)
|
1093
|
+
|
1094
|
+
CaseConfigParamInput_EFSearch_MariaDB = CaseConfigInput(
|
1095
|
+
label=CaseConfigParamType.ef_search,
|
1096
|
+
inputHelp="mhnsw_ef_search",
|
1097
|
+
inputType=InputType.Number,
|
1098
|
+
inputConfig={
|
1099
|
+
"min": 1,
|
1100
|
+
"max": 10000,
|
1101
|
+
"value": 20,
|
1102
|
+
},
|
1103
|
+
isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None)
|
1104
|
+
== IndexType.HNSW.value,
|
1105
|
+
)
|
1106
|
+
|
1107
|
+
CaseConfigParamInput_CacheSize_MariaDB = CaseConfigInput(
|
1108
|
+
label=CaseConfigParamType.max_cache_size,
|
1109
|
+
inputHelp="mhnsw_max_cache_size",
|
1110
|
+
inputType=InputType.Number,
|
1111
|
+
inputConfig={
|
1112
|
+
"min": 1048576,
|
1113
|
+
"max": (1 << 53) - 1,
|
1114
|
+
"value": 16 * 1024 ** 3,
|
1115
|
+
},
|
1116
|
+
isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None)
|
1117
|
+
== IndexType.HNSW.value,
|
1118
|
+
)
|
1119
|
+
|
1120
|
+
CaseConfigParamInput_MongoDBQuantizationType = CaseConfigInput(
|
1121
|
+
label=CaseConfigParamType.mongodb_quantization_type,
|
1122
|
+
inputType=InputType.Option,
|
1123
|
+
inputConfig={
|
1124
|
+
"options": ["none", "scalar", "binary"],
|
1125
|
+
},
|
1126
|
+
)
|
1127
|
+
|
1128
|
+
|
1129
|
+
CaseConfigParamInput_MongoDBNumCandidatesRatio = CaseConfigInput(
|
1130
|
+
label=CaseConfigParamType.mongodb_num_candidates_ratio,
|
1131
|
+
inputType=InputType.Number,
|
1132
|
+
inputConfig={
|
1133
|
+
"min": 10,
|
1134
|
+
"max": 20,
|
1135
|
+
"value": 10,
|
1136
|
+
},
|
1137
|
+
)
|
1138
|
+
|
1043
1139
|
|
1044
1140
|
MilvusLoadConfig = [
|
1045
1141
|
CaseConfigParamInput_IndexType,
|
@@ -1113,6 +1209,7 @@ PgVectorLoadingConfig = [
|
|
1113
1209
|
CaseConfigParamInput_m,
|
1114
1210
|
CaseConfigParamInput_EFConstruction_PgVector,
|
1115
1211
|
CaseConfigParamInput_QuantizationType_PgVector,
|
1212
|
+
CaseConfigParamInput_TableQuantizationType_PgVector,
|
1116
1213
|
CaseConfigParamInput_maintenance_work_mem_PgVector,
|
1117
1214
|
CaseConfigParamInput_max_parallel_workers_PgVector,
|
1118
1215
|
]
|
@@ -1124,6 +1221,7 @@ PgVectorPerformanceConfig = [
|
|
1124
1221
|
CaseConfigParamInput_Lists_PgVector,
|
1125
1222
|
CaseConfigParamInput_Probes_PgVector,
|
1126
1223
|
CaseConfigParamInput_QuantizationType_PgVector,
|
1224
|
+
CaseConfigParamInput_TableQuantizationType_PgVector,
|
1127
1225
|
CaseConfigParamInput_maintenance_work_mem_PgVector,
|
1128
1226
|
CaseConfigParamInput_max_parallel_workers_PgVector,
|
1129
1227
|
CaseConfigParamInput_reranking_PgVector,
|
@@ -1224,6 +1322,28 @@ AliyunElasticsearchPerformanceConfig = [
|
|
1224
1322
|
CaseConfigParamInput_NumCandidates_AliES,
|
1225
1323
|
]
|
1226
1324
|
|
1325
|
+
MongoDBLoadingConfig = [
|
1326
|
+
CaseConfigParamInput_MongoDBQuantizationType,
|
1327
|
+
]
|
1328
|
+
MongoDBPerformanceConfig = [
|
1329
|
+
CaseConfigParamInput_MongoDBQuantizationType,
|
1330
|
+
CaseConfigParamInput_MongoDBNumCandidatesRatio,
|
1331
|
+
]
|
1332
|
+
|
1333
|
+
MariaDBLoadingConfig = [
|
1334
|
+
CaseConfigParamInput_IndexType_MariaDB,
|
1335
|
+
CaseConfigParamInput_StorageEngine_MariaDB,
|
1336
|
+
CaseConfigParamInput_M_MariaDB,
|
1337
|
+
CaseConfigParamInput_CacheSize_MariaDB,
|
1338
|
+
]
|
1339
|
+
MariaDBPerformanceConfig = [
|
1340
|
+
CaseConfigParamInput_IndexType_MariaDB,
|
1341
|
+
CaseConfigParamInput_StorageEngine_MariaDB,
|
1342
|
+
CaseConfigParamInput_M_MariaDB,
|
1343
|
+
CaseConfigParamInput_CacheSize_MariaDB,
|
1344
|
+
CaseConfigParamInput_EFSearch_MariaDB,
|
1345
|
+
]
|
1346
|
+
|
1227
1347
|
CASE_CONFIG_MAP = {
|
1228
1348
|
DB.Milvus: {
|
1229
1349
|
CaseLabel.Load: MilvusLoadConfig,
|
@@ -1272,4 +1392,12 @@ CASE_CONFIG_MAP = {
|
|
1272
1392
|
CaseLabel.Load: AliyunOpensearchLoadingConfig,
|
1273
1393
|
CaseLabel.Performance: AliyunOpenSearchPerformanceConfig,
|
1274
1394
|
},
|
1395
|
+
DB.MongoDB: {
|
1396
|
+
CaseLabel.Load: MongoDBLoadingConfig,
|
1397
|
+
CaseLabel.Performance: MongoDBPerformanceConfig,
|
1398
|
+
},
|
1399
|
+
DB.MariaDB: {
|
1400
|
+
CaseLabel.Load: MariaDBLoadingConfig,
|
1401
|
+
CaseLabel.Performance: MariaDBPerformanceConfig,
|
1402
|
+
},
|
1275
1403
|
}
|
@@ -47,6 +47,7 @@ DB_TO_ICON = {
|
|
47
47
|
DB.Redis: "https://assets.zilliz.com/Redis_Cloud_74b8bfef39.png",
|
48
48
|
DB.Chroma: "https://assets.zilliz.com/chroma_ceb3f06ed7.png",
|
49
49
|
DB.AWSOpenSearch: "https://assets.zilliz.com/opensearch_1eee37584e.jpeg",
|
50
|
+
DB.TiDB: "https://img2.pingcap.com/forms/3/d/3d7fd5f9767323d6f037795704211ac44b4923d6.png",
|
50
51
|
}
|
51
52
|
|
52
53
|
# RedisCloud color: #0D6EFD
|
@@ -61,4 +62,5 @@ COLOR_MAP = {
|
|
61
62
|
DB.PgVector.value: "#4C779A",
|
62
63
|
DB.Redis.value: "#0D6EFD",
|
63
64
|
DB.AWSOpenSearch.value: "#0DCAF0",
|
65
|
+
DB.TiDB.value: "#0D6EFD",
|
64
66
|
}
|