vectordb-bench 0.0.29__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vectordb_bench/__init__.py +14 -27
- vectordb_bench/backend/assembler.py +19 -6
- vectordb_bench/backend/cases.py +186 -23
- vectordb_bench/backend/clients/__init__.py +32 -0
- vectordb_bench/backend/clients/api.py +22 -1
- vectordb_bench/backend/clients/aws_opensearch/aws_opensearch.py +249 -43
- vectordb_bench/backend/clients/aws_opensearch/cli.py +51 -21
- vectordb_bench/backend/clients/aws_opensearch/config.py +58 -16
- vectordb_bench/backend/clients/chroma/chroma.py +6 -2
- vectordb_bench/backend/clients/elastic_cloud/config.py +19 -1
- vectordb_bench/backend/clients/elastic_cloud/elastic_cloud.py +133 -45
- vectordb_bench/backend/clients/lancedb/cli.py +62 -8
- vectordb_bench/backend/clients/lancedb/config.py +14 -1
- vectordb_bench/backend/clients/lancedb/lancedb.py +21 -9
- vectordb_bench/backend/clients/memorydb/memorydb.py +2 -2
- vectordb_bench/backend/clients/milvus/cli.py +30 -9
- vectordb_bench/backend/clients/milvus/config.py +3 -0
- vectordb_bench/backend/clients/milvus/milvus.py +81 -23
- vectordb_bench/backend/clients/oceanbase/cli.py +100 -0
- vectordb_bench/backend/clients/oceanbase/config.py +125 -0
- vectordb_bench/backend/clients/oceanbase/oceanbase.py +215 -0
- vectordb_bench/backend/clients/pinecone/pinecone.py +39 -25
- vectordb_bench/backend/clients/qdrant_cloud/config.py +59 -3
- vectordb_bench/backend/clients/qdrant_cloud/qdrant_cloud.py +100 -33
- vectordb_bench/backend/clients/qdrant_local/cli.py +60 -0
- vectordb_bench/backend/clients/qdrant_local/config.py +47 -0
- vectordb_bench/backend/clients/qdrant_local/qdrant_local.py +232 -0
- vectordb_bench/backend/clients/weaviate_cloud/cli.py +29 -3
- vectordb_bench/backend/clients/weaviate_cloud/config.py +2 -0
- vectordb_bench/backend/clients/weaviate_cloud/weaviate_cloud.py +5 -0
- vectordb_bench/backend/dataset.py +143 -27
- vectordb_bench/backend/filter.py +76 -0
- vectordb_bench/backend/runner/__init__.py +3 -3
- vectordb_bench/backend/runner/mp_runner.py +52 -39
- vectordb_bench/backend/runner/rate_runner.py +68 -52
- vectordb_bench/backend/runner/read_write_runner.py +125 -68
- vectordb_bench/backend/runner/serial_runner.py +56 -23
- vectordb_bench/backend/task_runner.py +48 -20
- vectordb_bench/cli/batch_cli.py +121 -0
- vectordb_bench/cli/cli.py +59 -1
- vectordb_bench/cli/vectordbbench.py +7 -0
- vectordb_bench/config-files/batch_sample_config.yml +17 -0
- vectordb_bench/frontend/components/check_results/data.py +16 -11
- vectordb_bench/frontend/components/check_results/filters.py +53 -25
- vectordb_bench/frontend/components/check_results/headerIcon.py +16 -13
- vectordb_bench/frontend/components/check_results/nav.py +20 -0
- vectordb_bench/frontend/components/custom/displayCustomCase.py +43 -8
- vectordb_bench/frontend/components/custom/displaypPrams.py +10 -5
- vectordb_bench/frontend/components/custom/getCustomConfig.py +10 -0
- vectordb_bench/frontend/components/label_filter/charts.py +60 -0
- vectordb_bench/frontend/components/run_test/caseSelector.py +48 -52
- vectordb_bench/frontend/components/run_test/dbSelector.py +9 -5
- vectordb_bench/frontend/components/run_test/inputWidget.py +48 -0
- vectordb_bench/frontend/components/run_test/submitTask.py +3 -1
- vectordb_bench/frontend/components/streaming/charts.py +253 -0
- vectordb_bench/frontend/components/streaming/data.py +62 -0
- vectordb_bench/frontend/components/tables/data.py +1 -1
- vectordb_bench/frontend/components/welcome/explainPrams.py +66 -0
- vectordb_bench/frontend/components/welcome/pagestyle.py +106 -0
- vectordb_bench/frontend/components/welcome/welcomePrams.py +147 -0
- vectordb_bench/frontend/config/dbCaseConfigs.py +420 -41
- vectordb_bench/frontend/config/styles.py +32 -2
- vectordb_bench/frontend/pages/concurrent.py +5 -1
- vectordb_bench/frontend/pages/custom.py +4 -0
- vectordb_bench/frontend/pages/label_filter.py +56 -0
- vectordb_bench/frontend/pages/quries_per_dollar.py +5 -1
- vectordb_bench/frontend/pages/results.py +60 -0
- vectordb_bench/frontend/pages/run_test.py +3 -3
- vectordb_bench/frontend/pages/streaming.py +135 -0
- vectordb_bench/frontend/pages/tables.py +4 -0
- vectordb_bench/frontend/vdb_benchmark.py +16 -41
- vectordb_bench/interface.py +6 -2
- vectordb_bench/metric.py +15 -1
- vectordb_bench/models.py +38 -11
- vectordb_bench/results/ElasticCloud/result_20250318_standard_elasticcloud.json +5890 -0
- vectordb_bench/results/Milvus/result_20250509_standard_milvus.json +6138 -0
- vectordb_bench/results/OpenSearch/result_20250224_standard_opensearch.json +7319 -0
- vectordb_bench/results/Pinecone/result_20250124_standard_pinecone.json +2365 -0
- vectordb_bench/results/QdrantCloud/result_20250602_standard_qdrantcloud.json +3556 -0
- vectordb_bench/results/ZillizCloud/result_20250613_standard_zillizcloud.json +6290 -0
- vectordb_bench/results/dbPrices.json +12 -4
- {vectordb_bench-0.0.29.dist-info → vectordb_bench-1.0.0.dist-info}/METADATA +131 -32
- {vectordb_bench-0.0.29.dist-info → vectordb_bench-1.0.0.dist-info}/RECORD +87 -65
- {vectordb_bench-0.0.29.dist-info → vectordb_bench-1.0.0.dist-info}/WHEEL +1 -1
- vectordb_bench/results/ZillizCloud/result_20230727_standard_zillizcloud.json +0 -791
- vectordb_bench/results/ZillizCloud/result_20230808_standard_zillizcloud.json +0 -679
- vectordb_bench/results/ZillizCloud/result_20240105_standard_202401_zillizcloud.json +0 -1352
- {vectordb_bench-0.0.29.dist-info → vectordb_bench-1.0.0.dist-info}/entry_points.txt +0 -0
- {vectordb_bench-0.0.29.dist-info → vectordb_bench-1.0.0.dist-info}/licenses/LICENSE +0 -0
- {vectordb_bench-0.0.29.dist-info → vectordb_bench-1.0.0.dist-info}/top_level.txt +0 -0
vectordb_bench/metric.py
CHANGED
@@ -13,8 +13,12 @@ class Metric:
|
|
13
13
|
# for load cases
|
14
14
|
max_load_count: int = 0
|
15
15
|
|
16
|
+
# for both performace and streaming cases
|
17
|
+
insert_duration: float = 0.0
|
18
|
+
optimize_duration: float = 0.0
|
19
|
+
load_duration: float = 0.0 # insert + optimize
|
20
|
+
|
16
21
|
# for performance cases
|
17
|
-
load_duration: float = 0.0 # duration to load all dataset into DB
|
18
22
|
qps: float = 0.0
|
19
23
|
serial_latency_p99: float = 0.0
|
20
24
|
recall: float = 0.0
|
@@ -24,6 +28,16 @@ class Metric:
|
|
24
28
|
conc_latency_p99_list: list[float] = field(default_factory=list)
|
25
29
|
conc_latency_avg_list: list[float] = field(default_factory=list)
|
26
30
|
|
31
|
+
# for streaming cases
|
32
|
+
st_ideal_insert_duration: int = 0
|
33
|
+
st_search_stage_list: list[int] = field(default_factory=list)
|
34
|
+
st_search_time_list: list[float] = field(default_factory=list)
|
35
|
+
st_max_qps_list_list: list[float] = field(default_factory=list)
|
36
|
+
st_recall_list: list[float] = field(default_factory=list)
|
37
|
+
st_ndcg_list: list[float] = field(default_factory=list)
|
38
|
+
st_serial_latency_p99_list: list[float] = field(default_factory=list)
|
39
|
+
st_conc_failed_rate_list: list[float] = field(default_factory=list)
|
40
|
+
|
27
41
|
|
28
42
|
QURIES_PER_DOLLAR_METRIC = "QP$ (Quries per Dollar)"
|
29
43
|
LOAD_DURATION_METRIC = "load_duration"
|
vectordb_bench/models.py
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
import logging
|
2
2
|
import pathlib
|
3
3
|
from datetime import date, datetime
|
4
|
-
from enum import Enum, StrEnum
|
4
|
+
from enum import Enum, StrEnum
|
5
5
|
from typing import Self
|
6
6
|
|
7
7
|
import ujson
|
8
8
|
|
9
9
|
from . import config
|
10
|
-
from .backend.cases import CaseType
|
10
|
+
from .backend.cases import Case, CaseType
|
11
11
|
from .backend.clients import (
|
12
12
|
DB,
|
13
13
|
DBCaseConfig,
|
@@ -105,10 +105,27 @@ class CaseConfigParamType(Enum):
|
|
105
105
|
num_partitions = "num_partitions"
|
106
106
|
num_sub_vectors = "num_sub_vectors"
|
107
107
|
sample_rate = "sample_rate"
|
108
|
-
|
109
|
-
|
108
|
+
index_thread_qty_during_force_merge = "index_thread_qty_during_force_merge"
|
109
|
+
number_of_indexing_clients = "number_of_indexing_clients"
|
110
|
+
number_of_shards = "number_of_shards"
|
111
|
+
number_of_replicas = "number_of_replicas"
|
112
|
+
index_thread_qty = "index_thread_qty"
|
113
|
+
engine_name = "engine_name"
|
114
|
+
metric_type_name = "metric_type_name"
|
110
115
|
mongodb_quantization_type = "quantization"
|
111
116
|
mongodb_num_candidates_ratio = "num_candidates_ratio"
|
117
|
+
use_partition_key = "use_partition_key"
|
118
|
+
refresh_interval = "refresh_interval"
|
119
|
+
use_rescore = "use_rescore"
|
120
|
+
oversample_ratio = "oversample_ratio"
|
121
|
+
use_routing = "use_routing"
|
122
|
+
|
123
|
+
dataset_with_size_type = "dataset_with_size_type"
|
124
|
+
insert_rate = "insert_rate"
|
125
|
+
search_stages = "search_stages"
|
126
|
+
concurrencies = "concurrencies"
|
127
|
+
optimize_after_write = "optimize_after_write"
|
128
|
+
read_dur_after_write = "read_dur_after_write"
|
112
129
|
|
113
130
|
|
114
131
|
class CustomizedCase(BaseModel):
|
@@ -144,14 +161,22 @@ class CaseConfig(BaseModel):
|
|
144
161
|
def __hash__(self) -> int:
|
145
162
|
return hash(self.json())
|
146
163
|
|
164
|
+
@property
|
165
|
+
def case(self) -> Case:
|
166
|
+
return self.case_id.case_cls(self.custom_case)
|
167
|
+
|
168
|
+
@property
|
169
|
+
def case_name(self) -> str:
|
170
|
+
return self.case.name
|
171
|
+
|
147
172
|
|
148
173
|
class TaskStage(StrEnum):
|
149
174
|
"""Enumerations of various stages of the task"""
|
150
175
|
|
151
|
-
DROP_OLD =
|
152
|
-
LOAD =
|
153
|
-
SEARCH_SERIAL =
|
154
|
-
SEARCH_CONCURRENT =
|
176
|
+
DROP_OLD = "drop_old"
|
177
|
+
LOAD = "load"
|
178
|
+
SEARCH_SERIAL = "search_serial"
|
179
|
+
SEARCH_CONCURRENT = "search_concurrent"
|
155
180
|
|
156
181
|
def __repr__(self) -> str:
|
157
182
|
return str.__repr__(self.value)
|
@@ -292,12 +317,14 @@ class TestResult(BaseModel):
|
|
292
317
|
key=lambda x: (
|
293
318
|
x.task_config.db.name,
|
294
319
|
x.task_config.db_config.db_label,
|
295
|
-
x.task_config.case_config.
|
320
|
+
x.task_config.case_config.case_name,
|
296
321
|
),
|
297
322
|
reverse=True,
|
298
323
|
)
|
299
324
|
|
300
325
|
filtered_results = [r for r in sorted_results if not filter_list or r.task_config.db not in filter_list]
|
326
|
+
if len(filtered_results) == 0:
|
327
|
+
return
|
301
328
|
|
302
329
|
def append_return(x: any, y: any):
|
303
330
|
x.append(y)
|
@@ -305,7 +332,7 @@ class TestResult(BaseModel):
|
|
305
332
|
|
306
333
|
max_db = max(map(len, [f.task_config.db.name for f in filtered_results]))
|
307
334
|
max_db_labels = max(map(len, [f.task_config.db_config.db_label for f in filtered_results])) + 3
|
308
|
-
max_case = max(map(len, [f.task_config.case_config.
|
335
|
+
max_case = max(map(len, [f.task_config.case_config.case_name for f in filtered_results]))
|
309
336
|
max_load_dur = max(map(len, [str(f.metrics.load_duration) for f in filtered_results])) + 3
|
310
337
|
max_qps = max(map(len, [str(f.metrics.qps) for f in filtered_results])) + 3
|
311
338
|
max_recall = max(map(len, [str(f.metrics.recall) for f in filtered_results])) + 3
|
@@ -359,7 +386,7 @@ class TestResult(BaseModel):
|
|
359
386
|
% (
|
360
387
|
f.task_config.db.name,
|
361
388
|
f.task_config.db_config.db_label,
|
362
|
-
f.task_config.case_config.
|
389
|
+
f.task_config.case_config.case_name,
|
363
390
|
self.task_label,
|
364
391
|
f.metrics.load_duration,
|
365
392
|
f.metrics.qps,
|