vectordb-bench 0.0.21__py3-none-any.whl → 0.0.23__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. vectordb_bench/backend/clients/__init__.py +48 -0
  2. vectordb_bench/backend/clients/api.py +1 -0
  3. vectordb_bench/backend/clients/aws_opensearch/aws_opensearch.py +53 -4
  4. vectordb_bench/backend/clients/aws_opensearch/cli.py +85 -1
  5. vectordb_bench/backend/clients/aws_opensearch/config.py +10 -0
  6. vectordb_bench/backend/clients/mariadb/cli.py +107 -0
  7. vectordb_bench/backend/clients/mariadb/config.py +71 -0
  8. vectordb_bench/backend/clients/mariadb/mariadb.py +214 -0
  9. vectordb_bench/backend/clients/milvus/cli.py +50 -0
  10. vectordb_bench/backend/clients/milvus/config.py +33 -0
  11. vectordb_bench/backend/clients/mongodb/config.py +53 -0
  12. vectordb_bench/backend/clients/mongodb/mongodb.py +200 -0
  13. vectordb_bench/backend/clients/pgvector/cli.py +13 -1
  14. vectordb_bench/backend/clients/pgvector/config.py +22 -5
  15. vectordb_bench/backend/clients/pgvector/pgvector.py +62 -19
  16. vectordb_bench/backend/clients/tidb/cli.py +98 -0
  17. vectordb_bench/backend/clients/tidb/config.py +49 -0
  18. vectordb_bench/backend/clients/tidb/tidb.py +234 -0
  19. vectordb_bench/cli/vectordbbench.py +4 -0
  20. vectordb_bench/frontend/components/custom/displaypPrams.py +12 -1
  21. vectordb_bench/frontend/components/run_test/submitTask.py +20 -3
  22. vectordb_bench/frontend/config/dbCaseConfigs.py +128 -0
  23. vectordb_bench/frontend/config/styles.py +2 -0
  24. vectordb_bench/log_util.py +15 -2
  25. vectordb_bench/models.py +7 -0
  26. {vectordb_bench-0.0.21.dist-info → vectordb_bench-0.0.23.dist-info}/METADATA +67 -3
  27. {vectordb_bench-0.0.21.dist-info → vectordb_bench-0.0.23.dist-info}/RECORD +31 -23
  28. {vectordb_bench-0.0.21.dist-info → vectordb_bench-0.0.23.dist-info}/WHEEL +1 -1
  29. {vectordb_bench-0.0.21.dist-info → vectordb_bench-0.0.23.dist-info}/LICENSE +0 -0
  30. {vectordb_bench-0.0.21.dist-info → vectordb_bench-0.0.23.dist-info}/entry_points.txt +0 -0
  31. {vectordb_bench-0.0.21.dist-info → vectordb_bench-0.0.23.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,98 @@
1
+ from typing import Annotated, Unpack
2
+
3
+ import click
4
+ from pydantic import SecretStr
5
+
6
+ from vectordb_bench.backend.clients import DB
7
+
8
+ from ....cli.cli import CommonTypedDict, cli, click_parameter_decorators_from_typed_dict, run
9
+
10
+
11
+ class TiDBTypedDict(CommonTypedDict):
12
+ user_name: Annotated[
13
+ str,
14
+ click.option(
15
+ "--username",
16
+ type=str,
17
+ help="Username",
18
+ default="root",
19
+ show_default=True,
20
+ required=True,
21
+ ),
22
+ ]
23
+ password: Annotated[
24
+ str,
25
+ click.option(
26
+ "--password",
27
+ type=str,
28
+ default="",
29
+ show_default=True,
30
+ help="Password",
31
+ ),
32
+ ]
33
+ host: Annotated[
34
+ str,
35
+ click.option(
36
+ "--host",
37
+ type=str,
38
+ default="127.0.0.1",
39
+ show_default=True,
40
+ required=True,
41
+ help="Db host",
42
+ ),
43
+ ]
44
+ port: Annotated[
45
+ int,
46
+ click.option(
47
+ "--port",
48
+ type=int,
49
+ default=4000,
50
+ show_default=True,
51
+ required=True,
52
+ help="Db Port",
53
+ ),
54
+ ]
55
+ db_name: Annotated[
56
+ str,
57
+ click.option(
58
+ "--db-name",
59
+ type=str,
60
+ default="test",
61
+ show_default=True,
62
+ required=True,
63
+ help="Db name",
64
+ ),
65
+ ]
66
+ ssl: Annotated[
67
+ bool,
68
+ click.option(
69
+ "--ssl/--no-ssl",
70
+ default=False,
71
+ show_default=True,
72
+ is_flag=True,
73
+ help="Enable or disable SSL, for TiDB Serverless SSL must be enabled",
74
+ ),
75
+ ]
76
+
77
+
78
+ @cli.command()
79
+ @click_parameter_decorators_from_typed_dict(TiDBTypedDict)
80
+ def TiDB(
81
+ **parameters: Unpack[TiDBTypedDict],
82
+ ):
83
+ from .config import TiDBConfig, TiDBIndexConfig
84
+
85
+ run(
86
+ db=DB.TiDB,
87
+ db_config=TiDBConfig(
88
+ db_label=parameters["db_label"],
89
+ user_name=parameters["username"],
90
+ password=SecretStr(parameters["password"]),
91
+ host=parameters["host"],
92
+ port=parameters["port"],
93
+ db_name=parameters["db_name"],
94
+ ssl=parameters["ssl"],
95
+ ),
96
+ db_case_config=TiDBIndexConfig(),
97
+ **parameters,
98
+ )
@@ -0,0 +1,49 @@
1
+ from pydantic import SecretStr, BaseModel, validator
2
+ from ..api import DBConfig, DBCaseConfig, MetricType
3
+
4
+
5
+ class TiDBConfig(DBConfig):
6
+ user_name: str = "root"
7
+ password: SecretStr
8
+ host: str = "127.0.0.1"
9
+ port: int = 4000
10
+ db_name: str = "test"
11
+ ssl: bool = False
12
+
13
+ @validator("*")
14
+ def not_empty_field(cls, v: any, field: any):
15
+ return v
16
+
17
+ def to_dict(self) -> dict:
18
+ pwd_str = self.password.get_secret_value()
19
+ return {
20
+ "host": self.host,
21
+ "port": self.port,
22
+ "user": self.user_name,
23
+ "password": pwd_str,
24
+ "database": self.db_name,
25
+ "ssl_verify_cert": self.ssl,
26
+ "ssl_verify_identity": self.ssl,
27
+ }
28
+
29
+
30
+ class TiDBIndexConfig(BaseModel, DBCaseConfig):
31
+ metric_type: MetricType | None = None
32
+
33
+ def get_metric_fn(self) -> str:
34
+ if self.metric_type == MetricType.L2:
35
+ return "vec_l2_distance"
36
+ elif self.metric_type == MetricType.COSINE:
37
+ return "vec_cosine_distance"
38
+ else:
39
+ raise ValueError(f"Unsupported metric type: {self.metric_type}")
40
+
41
+ def index_param(self) -> dict:
42
+ return {
43
+ "metric_fn": self.get_metric_fn(),
44
+ }
45
+
46
+ def search_param(self) -> dict:
47
+ return {
48
+ "metric_fn": self.get_metric_fn(),
49
+ }
@@ -0,0 +1,234 @@
1
+ import concurrent.futures
2
+ import io
3
+ import logging
4
+ import time
5
+ from contextlib import contextmanager
6
+ from typing import Any, Optional, Tuple
7
+
8
+ import pymysql
9
+
10
+ from ..api import VectorDB
11
+ from .config import TiDBIndexConfig
12
+
13
+ log = logging.getLogger(__name__)
14
+
15
+
16
+ class TiDB(VectorDB):
17
+ def __init__(
18
+ self,
19
+ dim: int,
20
+ db_config: dict,
21
+ db_case_config: TiDBIndexConfig,
22
+ collection_name: str = "vector_bench_test",
23
+ drop_old: bool = False,
24
+ **kwargs,
25
+ ):
26
+ self.name = "TiDB"
27
+ self.db_config = db_config
28
+ self.case_config = db_case_config
29
+ self.table_name = collection_name
30
+ self.dim = dim
31
+ self.conn = None # To be inited by init()
32
+ self.cursor = None # To be inited by init()
33
+
34
+ self.search_fn = db_case_config.search_param()["metric_fn"]
35
+
36
+ if drop_old:
37
+ self._drop_table()
38
+ self._create_table()
39
+
40
+ @contextmanager
41
+ def init(self):
42
+ with self._get_connection() as (conn, cursor):
43
+ self.conn = conn
44
+ self.cursor = cursor
45
+ try:
46
+ yield
47
+ finally:
48
+ self.conn = None
49
+ self.cursor = None
50
+
51
+ @contextmanager
52
+ def _get_connection(self):
53
+ with pymysql.connect(**self.db_config) as conn:
54
+ conn.autocommit = False
55
+ with conn.cursor() as cursor:
56
+ yield conn, cursor
57
+
58
+ def _drop_table(self):
59
+ try:
60
+ with self._get_connection() as (conn, cursor):
61
+ cursor.execute(f"DROP TABLE IF EXISTS {self.table_name}")
62
+ conn.commit()
63
+ except Exception as e:
64
+ log.warning("Failed to drop table: %s error: %s", self.table_name, e)
65
+ raise e
66
+
67
+ def _create_table(self):
68
+ try:
69
+ index_param = self.case_config.index_param()
70
+ with self._get_connection() as (conn, cursor):
71
+ cursor.execute(
72
+ f"""
73
+ CREATE TABLE {self.table_name} (
74
+ id BIGINT PRIMARY KEY,
75
+ embedding VECTOR({self.dim}) NOT NULL,
76
+ VECTOR INDEX (({index_param["metric_fn"]}(embedding)))
77
+ );
78
+ """
79
+ )
80
+ conn.commit()
81
+ except Exception as e:
82
+ log.warning("Failed to create table: %s error: %s", self.table_name, e)
83
+ raise e
84
+
85
+ def ready_to_load(self) -> bool:
86
+ pass
87
+
88
+ def optimize(self, data_size: int | None = None) -> None:
89
+ while True:
90
+ progress = self._optimize_check_tiflash_replica_progress()
91
+ if progress != 1:
92
+ log.info("Data replication not ready, progress: %d", progress)
93
+ time.sleep(2)
94
+ else:
95
+ break
96
+
97
+ log.info("Waiting TiFlash to catch up...")
98
+ self._optimize_wait_tiflash_catch_up()
99
+
100
+ log.info("Start compacting TiFlash replica...")
101
+ self._optimize_compact_tiflash()
102
+
103
+ log.info("Waiting index build to finish...")
104
+ log_reduce_seq = 0
105
+ while True:
106
+ pending_rows = self._optimize_get_tiflash_index_pending_rows()
107
+ if pending_rows > 0:
108
+ if log_reduce_seq % 15 == 0:
109
+ log.info("Index not fully built, pending rows: %d", pending_rows)
110
+ log_reduce_seq += 1
111
+ time.sleep(2)
112
+ else:
113
+ break
114
+
115
+ log.info("Index build finished successfully.")
116
+
117
+ def _optimize_check_tiflash_replica_progress(self):
118
+ try:
119
+ database = self.db_config["database"]
120
+ with self._get_connection() as (_, cursor):
121
+ cursor.execute(
122
+ f"""
123
+ SELECT PROGRESS FROM information_schema.tiflash_replica
124
+ WHERE TABLE_SCHEMA = "{database}" AND TABLE_NAME = "{self.table_name}"
125
+ """
126
+ )
127
+ result = cursor.fetchone()
128
+ return result[0]
129
+ except Exception as e:
130
+ log.warning("Failed to check TiFlash replica progress: %s", e)
131
+ raise e
132
+
133
+ def _optimize_wait_tiflash_catch_up(self):
134
+ try:
135
+ with self._get_connection() as (conn, cursor):
136
+ cursor.execute('SET @@TIDB_ISOLATION_READ_ENGINES="tidb,tiflash"')
137
+ conn.commit()
138
+ cursor.execute(f"SELECT COUNT(*) FROM {self.table_name}")
139
+ result = cursor.fetchone()
140
+ return result[0]
141
+ except Exception as e:
142
+ log.warning("Failed to wait TiFlash to catch up: %s", e)
143
+ raise e
144
+
145
+ def _optimize_compact_tiflash(self):
146
+ try:
147
+ with self._get_connection() as (conn, cursor):
148
+ cursor.execute(f"ALTER TABLE {self.table_name} COMPACT")
149
+ conn.commit()
150
+ except Exception as e:
151
+ log.warning("Failed to compact table: %s", e)
152
+ raise e
153
+
154
+ def _optimize_get_tiflash_index_pending_rows(self):
155
+ try:
156
+ database = self.db_config["database"]
157
+ with self._get_connection() as (_, cursor):
158
+ cursor.execute(
159
+ f"""
160
+ SELECT SUM(ROWS_STABLE_NOT_INDEXED)
161
+ FROM information_schema.tiflash_indexes
162
+ WHERE TIDB_DATABASE = "{database}" AND TIDB_TABLE = "{self.table_name}"
163
+ """
164
+ )
165
+ result = cursor.fetchone()
166
+ return result[0]
167
+ except Exception as e:
168
+ log.warning("Failed to read TiFlash index pending rows: %s", e)
169
+ raise e
170
+
171
+ def _insert_embeddings_serial(
172
+ self,
173
+ embeddings: list[list[float]],
174
+ metadata: list[int],
175
+ offset: int,
176
+ size: int,
177
+ ) -> Exception:
178
+ try:
179
+ with self._get_connection() as (conn, cursor):
180
+ buf = io.StringIO()
181
+ buf.write(f"INSERT INTO {self.table_name} (id, embedding) VALUES ")
182
+ for i in range(offset, offset + size):
183
+ if i > offset:
184
+ buf.write(",")
185
+ buf.write(f'({metadata[i]}, "{str(embeddings[i])}")')
186
+ cursor.execute(buf.getvalue())
187
+ conn.commit()
188
+ except Exception as e:
189
+ log.warning("Failed to insert data into table: %s", e)
190
+ raise e
191
+
192
+ def insert_embeddings(
193
+ self,
194
+ embeddings: list[list[float]],
195
+ metadata: list[int],
196
+ **kwargs: Any,
197
+ ) -> Tuple[int, Optional[Exception]]:
198
+ workers = 10
199
+ # Avoid exceeding MAX_ALLOWED_PACKET (default=64MB)
200
+ max_batch_size = 64 * 1024 * 1024 // 24 // self.dim
201
+ batch_size = len(embeddings) // workers
202
+ if batch_size > max_batch_size:
203
+ batch_size = max_batch_size
204
+ with concurrent.futures.ThreadPoolExecutor(max_workers=workers) as executor:
205
+ futures = []
206
+ for i in range(0, len(embeddings), batch_size):
207
+ offset = i
208
+ size = min(batch_size, len(embeddings) - i)
209
+ future = executor.submit(self._insert_embeddings_serial, embeddings, metadata, offset, size)
210
+ futures.append(future)
211
+ done, pending = concurrent.futures.wait(futures, return_when=concurrent.futures.FIRST_EXCEPTION)
212
+ executor.shutdown(wait=False)
213
+ for future in done:
214
+ future.result()
215
+ for future in pending:
216
+ future.cancel()
217
+ return len(metadata), None
218
+
219
+ def search_embedding(
220
+ self,
221
+ query: list[float],
222
+ k: int = 100,
223
+ filters: dict | None = None,
224
+ timeout: int | None = None,
225
+ **kwargs: Any,
226
+ ) -> list[int]:
227
+ self.cursor.execute(
228
+ f"""
229
+ SELECT id FROM {self.table_name}
230
+ ORDER BY {self.search_fn}(embedding, "{str(query)}") LIMIT {k};
231
+ """
232
+ )
233
+ result = self.cursor.fetchall()
234
+ return [int(i[0]) for i in result]
@@ -1,5 +1,6 @@
1
1
  from ..backend.clients.alloydb.cli import AlloyDBScaNN
2
2
  from ..backend.clients.aws_opensearch.cli import AWSOpenSearch
3
+ from ..backend.clients.mariadb.cli import MariaDBHNSW
3
4
  from ..backend.clients.memorydb.cli import MemoryDB
4
5
  from ..backend.clients.milvus.cli import MilvusAutoIndex
5
6
  from ..backend.clients.pgdiskann.cli import PgDiskAnn
@@ -10,6 +11,7 @@ from ..backend.clients.redis.cli import Redis
10
11
  from ..backend.clients.test.cli import Test
11
12
  from ..backend.clients.weaviate_cloud.cli import Weaviate
12
13
  from ..backend.clients.zilliz_cloud.cli import ZillizAutoIndex
14
+ from ..backend.clients.tidb.cli import TiDB
13
15
  from .cli import cli
14
16
 
15
17
  cli.add_command(PgVectorHNSW)
@@ -25,6 +27,8 @@ cli.add_command(AWSOpenSearch)
25
27
  cli.add_command(PgVectorScaleDiskAnn)
26
28
  cli.add_command(PgDiskAnn)
27
29
  cli.add_command(AlloyDBScaNN)
30
+ cli.add_command(MariaDBHNSW)
31
+ cli.add_command(TiDB)
28
32
 
29
33
 
30
34
  if __name__ == "__main__":
@@ -3,7 +3,7 @@ def displayParams(st):
3
3
  """
4
4
  - `Folder Path` - The path to the folder containing all the files. Please ensure that all files in the folder are in the `Parquet` format.
5
5
  - Vectors data files: The file must be named `train.parquet` and should have two columns: `id` as an incrementing `int` and `emb` as an array of `float32`.
6
- - Query test vectors: The file must be named `test.parquet` and should have two columns: `id` as an incrementing `int` and `emb` as an array of `float32`.
6
+ - Query test vectors: The file must be named `test.parquet` and should have two columns: `id` as an incrementing `int` and `emb` as an array of `float32`.
7
7
  - Ground truth file: The file must be named `neighbors.parquet` and should have two columns: `id` corresponding to query vectors and `neighbors_id` as an array of `int`.
8
8
 
9
9
  - `Train File Count` - If the vector file is too large, you can consider splitting it into multiple files. The naming format for the split files should be `train-[index]-of-[file_count].parquet`. For example, `train-01-of-10.parquet` represents the second file (0-indexed) among 10 split files.
@@ -11,3 +11,14 @@ def displayParams(st):
11
11
  - `Use Shuffled Data` - If you check this option, the vector data files need to be modified. VectorDBBench will load the data labeled with `shuffle`. For example, use `shuffle_train.parquet` instead of `train.parquet` and `shuffle_train-04-of-10.parquet` instead of `train-04-of-10.parquet`. The `id` column in the shuffled data can be in any order.
12
12
  """
13
13
  )
14
+ st.caption(
15
+ """We recommend limiting the number of test query vectors, like 1,000.""",
16
+ help="""
17
+ When conducting concurrent query tests, Vdbbench creates a large number of processes.
18
+ To minimize additional communication overhead during testing,
19
+ we prepare a complete set of test queries for each process, allowing them to run independently.\n
20
+ However, this means that as the number of concurrent processes increases,
21
+ the number of copied query vectors also increases significantly,
22
+ which can place substantial pressure on memory resources.
23
+ """,
24
+ )
@@ -1,6 +1,8 @@
1
1
  from datetime import datetime
2
+ from vectordb_bench import config
2
3
  from vectordb_bench.frontend.config import styles
3
4
  from vectordb_bench.interface import benchmark_runner
5
+ from vectordb_bench.models import TaskConfig
4
6
 
5
7
 
6
8
  def submitTask(st, tasks, isAllValid):
@@ -47,16 +49,31 @@ def advancedSettings(st):
47
49
  k = container[0].number_input("k", min_value=1, value=100, label_visibility="collapsed")
48
50
  container[1].caption("K value for number of nearest neighbors to search")
49
51
 
50
- return index_already_exists, use_aliyun, k
52
+ container = st.columns([1, 2])
53
+ defaultconcurrentInput = ",".join(map(str, config.NUM_CONCURRENCY))
54
+ concurrentInput = container[0].text_input(
55
+ "Concurrent Input", value=defaultconcurrentInput, label_visibility="collapsed"
56
+ )
57
+ container[1].caption("num of concurrencies for search tests to get max-qps")
58
+ return index_already_exists, use_aliyun, k, concurrentInput
51
59
 
52
60
 
53
- def controlPanel(st, tasks, taskLabel, isAllValid):
54
- index_already_exists, use_aliyun, k = advancedSettings(st)
61
+ def controlPanel(st, tasks: list[TaskConfig], taskLabel, isAllValid):
62
+ index_already_exists, use_aliyun, k, concurrentInput = advancedSettings(st)
55
63
 
56
64
  def runHandler():
57
65
  benchmark_runner.set_drop_old(not index_already_exists)
66
+
67
+ try:
68
+ concurrentInput_list = [int(item.strip()) for item in concurrentInput.split(",")]
69
+ except ValueError:
70
+ st.write("please input correct number")
71
+ return None
72
+
58
73
  for task in tasks:
59
74
  task.case_config.k = k
75
+ task.case_config.concurrency_search_config.num_concurrency = concurrentInput_list
76
+
60
77
  benchmark_runner.set_download_address(use_aliyun)
61
78
  benchmark_runner.run(tasks, taskLabel)
62
79
 
@@ -173,6 +173,7 @@ CaseConfigParamInput_IndexType = CaseConfigInput(
173
173
  IndexType.GPU_IVF_FLAT.value,
174
174
  IndexType.GPU_IVF_PQ.value,
175
175
  IndexType.GPU_CAGRA.value,
176
+ IndexType.GPU_BRUTE_FORCE.value,
176
177
  ],
177
178
  },
178
179
  )
@@ -562,6 +563,7 @@ CaseConfigParamInput_Nlist = CaseConfigInput(
562
563
  IndexType.IVFSQ8.value,
563
564
  IndexType.GPU_IVF_FLAT.value,
564
565
  IndexType.GPU_IVF_PQ.value,
566
+ IndexType.GPU_BRUTE_FORCE.value,
565
567
  ],
566
568
  )
567
569
 
@@ -579,6 +581,7 @@ CaseConfigParamInput_Nprobe = CaseConfigInput(
579
581
  IndexType.IVFSQ8.value,
580
582
  IndexType.GPU_IVF_FLAT.value,
581
583
  IndexType.GPU_IVF_PQ.value,
584
+ IndexType.GPU_BRUTE_FORCE.value,
582
585
  ],
583
586
  )
584
587
 
@@ -703,6 +706,7 @@ CaseConfigParamInput_cache_dataset_on_device = CaseConfigInput(
703
706
  IndexType.GPU_CAGRA.value,
704
707
  IndexType.GPU_IVF_PQ.value,
705
708
  IndexType.GPU_IVF_FLAT.value,
709
+ IndexType.GPU_BRUTE_FORCE.value,
706
710
  ],
707
711
  )
708
712
 
@@ -720,6 +724,7 @@ CaseConfigParamInput_refine_ratio = CaseConfigInput(
720
724
  IndexType.GPU_CAGRA.value,
721
725
  IndexType.GPU_IVF_PQ.value,
722
726
  IndexType.GPU_IVF_FLAT.value,
727
+ IndexType.GPU_BRUTE_FORCE.value,
723
728
  ],
724
729
  )
725
730
 
@@ -818,6 +823,19 @@ CaseConfigParamInput_QuantizationRatio_PgVectoRS = CaseConfigInput(
818
823
  ],
819
824
  )
820
825
 
826
+ CaseConfigParamInput_TableQuantizationType_PgVector = CaseConfigInput(
827
+ label=CaseConfigParamType.tableQuantizationType,
828
+ inputType=InputType.Option,
829
+ inputConfig={
830
+ "options": ["none", "bit", "halfvec"],
831
+ },
832
+ isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None)
833
+ in [
834
+ IndexType.HNSW.value,
835
+ IndexType.IVFFlat.value,
836
+ ],
837
+ )
838
+
821
839
  CaseConfigParamInput_max_parallel_workers_PgVectorRS = CaseConfigInput(
822
840
  label=CaseConfigParamType.max_parallel_workers,
823
841
  displayLabel="Max parallel workers",
@@ -1040,6 +1058,84 @@ CaseConfigParamInput_NumCandidates_AliES = CaseConfigInput(
1040
1058
  },
1041
1059
  )
1042
1060
 
1061
+ CaseConfigParamInput_IndexType_MariaDB = CaseConfigInput(
1062
+ label=CaseConfigParamType.IndexType,
1063
+ inputHelp="Select Index Type",
1064
+ inputType=InputType.Option,
1065
+ inputConfig={
1066
+ "options": [
1067
+ IndexType.HNSW.value,
1068
+ ],
1069
+ },
1070
+ )
1071
+
1072
+ CaseConfigParamInput_StorageEngine_MariaDB = CaseConfigInput(
1073
+ label=CaseConfigParamType.storage_engine,
1074
+ inputHelp="Select Storage Engine",
1075
+ inputType=InputType.Option,
1076
+ inputConfig={
1077
+ "options": ["InnoDB", "MyISAM"],
1078
+ },
1079
+ )
1080
+
1081
+ CaseConfigParamInput_M_MariaDB = CaseConfigInput(
1082
+ label=CaseConfigParamType.M,
1083
+ inputHelp="M parameter in MHNSW vector indexing",
1084
+ inputType=InputType.Number,
1085
+ inputConfig={
1086
+ "min": 3,
1087
+ "max": 200,
1088
+ "value": 6,
1089
+ },
1090
+ isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None)
1091
+ == IndexType.HNSW.value,
1092
+ )
1093
+
1094
+ CaseConfigParamInput_EFSearch_MariaDB = CaseConfigInput(
1095
+ label=CaseConfigParamType.ef_search,
1096
+ inputHelp="mhnsw_ef_search",
1097
+ inputType=InputType.Number,
1098
+ inputConfig={
1099
+ "min": 1,
1100
+ "max": 10000,
1101
+ "value": 20,
1102
+ },
1103
+ isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None)
1104
+ == IndexType.HNSW.value,
1105
+ )
1106
+
1107
+ CaseConfigParamInput_CacheSize_MariaDB = CaseConfigInput(
1108
+ label=CaseConfigParamType.max_cache_size,
1109
+ inputHelp="mhnsw_max_cache_size",
1110
+ inputType=InputType.Number,
1111
+ inputConfig={
1112
+ "min": 1048576,
1113
+ "max": (1 << 53) - 1,
1114
+ "value": 16 * 1024 ** 3,
1115
+ },
1116
+ isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None)
1117
+ == IndexType.HNSW.value,
1118
+ )
1119
+
1120
+ CaseConfigParamInput_MongoDBQuantizationType = CaseConfigInput(
1121
+ label=CaseConfigParamType.mongodb_quantization_type,
1122
+ inputType=InputType.Option,
1123
+ inputConfig={
1124
+ "options": ["none", "scalar", "binary"],
1125
+ },
1126
+ )
1127
+
1128
+
1129
+ CaseConfigParamInput_MongoDBNumCandidatesRatio = CaseConfigInput(
1130
+ label=CaseConfigParamType.mongodb_num_candidates_ratio,
1131
+ inputType=InputType.Number,
1132
+ inputConfig={
1133
+ "min": 10,
1134
+ "max": 20,
1135
+ "value": 10,
1136
+ },
1137
+ )
1138
+
1043
1139
 
1044
1140
  MilvusLoadConfig = [
1045
1141
  CaseConfigParamInput_IndexType,
@@ -1113,6 +1209,7 @@ PgVectorLoadingConfig = [
1113
1209
  CaseConfigParamInput_m,
1114
1210
  CaseConfigParamInput_EFConstruction_PgVector,
1115
1211
  CaseConfigParamInput_QuantizationType_PgVector,
1212
+ CaseConfigParamInput_TableQuantizationType_PgVector,
1116
1213
  CaseConfigParamInput_maintenance_work_mem_PgVector,
1117
1214
  CaseConfigParamInput_max_parallel_workers_PgVector,
1118
1215
  ]
@@ -1124,6 +1221,7 @@ PgVectorPerformanceConfig = [
1124
1221
  CaseConfigParamInput_Lists_PgVector,
1125
1222
  CaseConfigParamInput_Probes_PgVector,
1126
1223
  CaseConfigParamInput_QuantizationType_PgVector,
1224
+ CaseConfigParamInput_TableQuantizationType_PgVector,
1127
1225
  CaseConfigParamInput_maintenance_work_mem_PgVector,
1128
1226
  CaseConfigParamInput_max_parallel_workers_PgVector,
1129
1227
  CaseConfigParamInput_reranking_PgVector,
@@ -1224,6 +1322,28 @@ AliyunElasticsearchPerformanceConfig = [
1224
1322
  CaseConfigParamInput_NumCandidates_AliES,
1225
1323
  ]
1226
1324
 
1325
+ MongoDBLoadingConfig = [
1326
+ CaseConfigParamInput_MongoDBQuantizationType,
1327
+ ]
1328
+ MongoDBPerformanceConfig = [
1329
+ CaseConfigParamInput_MongoDBQuantizationType,
1330
+ CaseConfigParamInput_MongoDBNumCandidatesRatio,
1331
+ ]
1332
+
1333
+ MariaDBLoadingConfig = [
1334
+ CaseConfigParamInput_IndexType_MariaDB,
1335
+ CaseConfigParamInput_StorageEngine_MariaDB,
1336
+ CaseConfigParamInput_M_MariaDB,
1337
+ CaseConfigParamInput_CacheSize_MariaDB,
1338
+ ]
1339
+ MariaDBPerformanceConfig = [
1340
+ CaseConfigParamInput_IndexType_MariaDB,
1341
+ CaseConfigParamInput_StorageEngine_MariaDB,
1342
+ CaseConfigParamInput_M_MariaDB,
1343
+ CaseConfigParamInput_CacheSize_MariaDB,
1344
+ CaseConfigParamInput_EFSearch_MariaDB,
1345
+ ]
1346
+
1227
1347
  CASE_CONFIG_MAP = {
1228
1348
  DB.Milvus: {
1229
1349
  CaseLabel.Load: MilvusLoadConfig,
@@ -1272,4 +1392,12 @@ CASE_CONFIG_MAP = {
1272
1392
  CaseLabel.Load: AliyunOpensearchLoadingConfig,
1273
1393
  CaseLabel.Performance: AliyunOpenSearchPerformanceConfig,
1274
1394
  },
1395
+ DB.MongoDB: {
1396
+ CaseLabel.Load: MongoDBLoadingConfig,
1397
+ CaseLabel.Performance: MongoDBPerformanceConfig,
1398
+ },
1399
+ DB.MariaDB: {
1400
+ CaseLabel.Load: MariaDBLoadingConfig,
1401
+ CaseLabel.Performance: MariaDBPerformanceConfig,
1402
+ },
1275
1403
  }
@@ -47,6 +47,7 @@ DB_TO_ICON = {
47
47
  DB.Redis: "https://assets.zilliz.com/Redis_Cloud_74b8bfef39.png",
48
48
  DB.Chroma: "https://assets.zilliz.com/chroma_ceb3f06ed7.png",
49
49
  DB.AWSOpenSearch: "https://assets.zilliz.com/opensearch_1eee37584e.jpeg",
50
+ DB.TiDB: "https://img2.pingcap.com/forms/3/d/3d7fd5f9767323d6f037795704211ac44b4923d6.png",
50
51
  }
51
52
 
52
53
  # RedisCloud color: #0D6EFD
@@ -61,4 +62,5 @@ COLOR_MAP = {
61
62
  DB.PgVector.value: "#4C779A",
62
63
  DB.Redis.value: "#0D6EFD",
63
64
  DB.AWSOpenSearch.value: "#0DCAF0",
65
+ DB.TiDB.value: "#0D6EFD",
64
66
  }