vectordb-bench 0.0.19__py3-none-any.whl → 0.0.20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vectordb_bench/__init__.py +49 -24
- vectordb_bench/__main__.py +4 -3
- vectordb_bench/backend/assembler.py +12 -13
- vectordb_bench/backend/cases.py +55 -45
- vectordb_bench/backend/clients/__init__.py +75 -14
- vectordb_bench/backend/clients/aliyun_elasticsearch/aliyun_elasticsearch.py +1 -2
- vectordb_bench/backend/clients/aliyun_elasticsearch/config.py +3 -4
- vectordb_bench/backend/clients/aliyun_opensearch/aliyun_opensearch.py +111 -70
- vectordb_bench/backend/clients/aliyun_opensearch/config.py +6 -7
- vectordb_bench/backend/clients/alloydb/alloydb.py +58 -80
- vectordb_bench/backend/clients/alloydb/cli.py +51 -34
- vectordb_bench/backend/clients/alloydb/config.py +30 -30
- vectordb_bench/backend/clients/api.py +5 -9
- vectordb_bench/backend/clients/aws_opensearch/aws_opensearch.py +46 -47
- vectordb_bench/backend/clients/aws_opensearch/cli.py +4 -7
- vectordb_bench/backend/clients/aws_opensearch/config.py +13 -9
- vectordb_bench/backend/clients/aws_opensearch/run.py +69 -59
- vectordb_bench/backend/clients/chroma/chroma.py +38 -36
- vectordb_bench/backend/clients/chroma/config.py +4 -2
- vectordb_bench/backend/clients/elastic_cloud/config.py +5 -5
- vectordb_bench/backend/clients/elastic_cloud/elastic_cloud.py +23 -22
- vectordb_bench/backend/clients/memorydb/cli.py +8 -8
- vectordb_bench/backend/clients/memorydb/config.py +2 -2
- vectordb_bench/backend/clients/memorydb/memorydb.py +65 -53
- vectordb_bench/backend/clients/milvus/cli.py +41 -83
- vectordb_bench/backend/clients/milvus/config.py +18 -8
- vectordb_bench/backend/clients/milvus/milvus.py +18 -19
- vectordb_bench/backend/clients/pgdiskann/cli.py +29 -22
- vectordb_bench/backend/clients/pgdiskann/config.py +29 -26
- vectordb_bench/backend/clients/pgdiskann/pgdiskann.py +55 -73
- vectordb_bench/backend/clients/pgvecto_rs/cli.py +9 -11
- vectordb_bench/backend/clients/pgvecto_rs/config.py +8 -14
- vectordb_bench/backend/clients/pgvecto_rs/pgvecto_rs.py +33 -34
- vectordb_bench/backend/clients/pgvector/cli.py +40 -31
- vectordb_bench/backend/clients/pgvector/config.py +63 -73
- vectordb_bench/backend/clients/pgvector/pgvector.py +97 -98
- vectordb_bench/backend/clients/pgvectorscale/cli.py +38 -24
- vectordb_bench/backend/clients/pgvectorscale/config.py +14 -15
- vectordb_bench/backend/clients/pgvectorscale/pgvectorscale.py +38 -43
- vectordb_bench/backend/clients/pinecone/config.py +1 -0
- vectordb_bench/backend/clients/pinecone/pinecone.py +14 -21
- vectordb_bench/backend/clients/qdrant_cloud/config.py +11 -10
- vectordb_bench/backend/clients/qdrant_cloud/qdrant_cloud.py +40 -31
- vectordb_bench/backend/clients/redis/cli.py +6 -12
- vectordb_bench/backend/clients/redis/config.py +7 -5
- vectordb_bench/backend/clients/redis/redis.py +94 -58
- vectordb_bench/backend/clients/test/cli.py +1 -2
- vectordb_bench/backend/clients/test/config.py +2 -2
- vectordb_bench/backend/clients/test/test.py +4 -5
- vectordb_bench/backend/clients/weaviate_cloud/cli.py +3 -4
- vectordb_bench/backend/clients/weaviate_cloud/config.py +2 -2
- vectordb_bench/backend/clients/weaviate_cloud/weaviate_cloud.py +36 -22
- vectordb_bench/backend/clients/zilliz_cloud/cli.py +14 -11
- vectordb_bench/backend/clients/zilliz_cloud/config.py +2 -4
- vectordb_bench/backend/clients/zilliz_cloud/zilliz_cloud.py +1 -1
- vectordb_bench/backend/data_source.py +30 -18
- vectordb_bench/backend/dataset.py +47 -27
- vectordb_bench/backend/result_collector.py +2 -3
- vectordb_bench/backend/runner/__init__.py +4 -6
- vectordb_bench/backend/runner/mp_runner.py +85 -34
- vectordb_bench/backend/runner/rate_runner.py +30 -19
- vectordb_bench/backend/runner/read_write_runner.py +51 -23
- vectordb_bench/backend/runner/serial_runner.py +91 -48
- vectordb_bench/backend/runner/util.py +4 -3
- vectordb_bench/backend/task_runner.py +92 -72
- vectordb_bench/backend/utils.py +17 -10
- vectordb_bench/base.py +0 -1
- vectordb_bench/cli/cli.py +65 -60
- vectordb_bench/cli/vectordbbench.py +6 -7
- vectordb_bench/frontend/components/check_results/charts.py +8 -19
- vectordb_bench/frontend/components/check_results/data.py +4 -16
- vectordb_bench/frontend/components/check_results/filters.py +8 -16
- vectordb_bench/frontend/components/check_results/nav.py +4 -4
- vectordb_bench/frontend/components/check_results/priceTable.py +1 -3
- vectordb_bench/frontend/components/check_results/stPageConfig.py +2 -1
- vectordb_bench/frontend/components/concurrent/charts.py +12 -12
- vectordb_bench/frontend/components/custom/displayCustomCase.py +17 -11
- vectordb_bench/frontend/components/custom/displaypPrams.py +4 -2
- vectordb_bench/frontend/components/custom/getCustomConfig.py +1 -2
- vectordb_bench/frontend/components/custom/initStyle.py +1 -1
- vectordb_bench/frontend/components/get_results/saveAsImage.py +2 -0
- vectordb_bench/frontend/components/run_test/caseSelector.py +3 -9
- vectordb_bench/frontend/components/run_test/dbConfigSetting.py +1 -4
- vectordb_bench/frontend/components/run_test/dbSelector.py +1 -1
- vectordb_bench/frontend/components/run_test/generateTasks.py +8 -8
- vectordb_bench/frontend/components/run_test/submitTask.py +14 -18
- vectordb_bench/frontend/components/tables/data.py +3 -6
- vectordb_bench/frontend/config/dbCaseConfigs.py +51 -84
- vectordb_bench/frontend/pages/concurrent.py +3 -5
- vectordb_bench/frontend/pages/custom.py +30 -9
- vectordb_bench/frontend/pages/quries_per_dollar.py +3 -3
- vectordb_bench/frontend/pages/run_test.py +3 -7
- vectordb_bench/frontend/utils.py +1 -1
- vectordb_bench/frontend/vdb_benchmark.py +4 -6
- vectordb_bench/interface.py +56 -26
- vectordb_bench/log_util.py +59 -64
- vectordb_bench/metric.py +10 -11
- vectordb_bench/models.py +26 -43
- {vectordb_bench-0.0.19.dist-info → vectordb_bench-0.0.20.dist-info}/METADATA +22 -15
- vectordb_bench-0.0.20.dist-info/RECORD +135 -0
- {vectordb_bench-0.0.19.dist-info → vectordb_bench-0.0.20.dist-info}/WHEEL +1 -1
- vectordb_bench-0.0.19.dist-info/RECORD +0 -135
- {vectordb_bench-0.0.19.dist-info → vectordb_bench-0.0.20.dist-info}/LICENSE +0 -0
- {vectordb_bench-0.0.19.dist-info → vectordb_bench-0.0.20.dist-info}/entry_points.txt +0 -0
- {vectordb_bench-0.0.19.dist-info → vectordb_bench-0.0.20.dist-info}/top_level.txt +0 -0
@@ -1,13 +1,21 @@
|
|
1
|
+
from functools import partial
|
1
2
|
import streamlit as st
|
2
3
|
from vectordb_bench.frontend.components.check_results.headerIcon import drawHeaderIcon
|
3
|
-
from vectordb_bench.frontend.components.custom.displayCustomCase import
|
4
|
+
from vectordb_bench.frontend.components.custom.displayCustomCase import (
|
5
|
+
displayCustomCase,
|
6
|
+
)
|
4
7
|
from vectordb_bench.frontend.components.custom.displaypPrams import displayParams
|
5
|
-
from vectordb_bench.frontend.components.custom.getCustomConfig import
|
8
|
+
from vectordb_bench.frontend.components.custom.getCustomConfig import (
|
9
|
+
CustomCaseConfig,
|
10
|
+
generate_custom_case,
|
11
|
+
get_custom_configs,
|
12
|
+
save_custom_configs,
|
13
|
+
)
|
6
14
|
from vectordb_bench.frontend.components.custom.initStyle import initStyle
|
7
15
|
from vectordb_bench.frontend.config.styles import FAVICON, PAGE_TITLE
|
8
16
|
|
9
17
|
|
10
|
-
class CustomCaseManager
|
18
|
+
class CustomCaseManager:
|
11
19
|
customCaseItems: list[CustomCaseConfig]
|
12
20
|
|
13
21
|
def __init__(self):
|
@@ -52,12 +60,25 @@ def main():
|
|
52
60
|
|
53
61
|
columns = expander.columns(8)
|
54
62
|
columns[0].button(
|
55
|
-
"Save",
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
63
|
+
"Save",
|
64
|
+
key=f"{key}_",
|
65
|
+
type="secondary",
|
66
|
+
on_click=lambda: customCaseManager.save(),
|
67
|
+
)
|
68
|
+
columns[1].button(
|
69
|
+
":red[Delete]",
|
70
|
+
key=f"{key}_delete",
|
71
|
+
type="secondary",
|
72
|
+
# B023
|
73
|
+
on_click=partial(lambda idx: customCaseManager.deleteCase(idx), idx=idx),
|
74
|
+
)
|
75
|
+
|
76
|
+
st.button(
|
77
|
+
"\+ New Dataset",
|
78
|
+
key="add_custom_configs",
|
79
|
+
type="primary",
|
80
|
+
on_click=lambda: customCaseManager.addCase(),
|
81
|
+
)
|
61
82
|
|
62
83
|
|
63
84
|
if __name__ == "__main__":
|
@@ -15,8 +15,8 @@ from vectordb_bench.frontend.components.check_results.nav import (
|
|
15
15
|
from vectordb_bench.frontend.components.check_results.charts import drawMetricChart
|
16
16
|
from vectordb_bench.frontend.components.check_results.filters import getshownData
|
17
17
|
from vectordb_bench.frontend.components.get_results.saveAsImage import getResults
|
18
|
-
|
19
|
-
from vectordb_bench.interface import
|
18
|
+
|
19
|
+
from vectordb_bench.interface import benchmark_runner
|
20
20
|
from vectordb_bench.metric import QURIES_PER_DOLLAR_METRIC
|
21
21
|
|
22
22
|
|
@@ -27,7 +27,7 @@ def main():
|
|
27
27
|
# header
|
28
28
|
drawHeaderIcon(st)
|
29
29
|
|
30
|
-
allResults =
|
30
|
+
allResults = benchmark_runner.get_results()
|
31
31
|
|
32
32
|
st.title("Vector DB Benchmark (QP$)")
|
33
33
|
|
@@ -15,10 +15,10 @@ from vectordb_bench.frontend.components.check_results.stPageConfig import initRu
|
|
15
15
|
def main():
|
16
16
|
# set page config
|
17
17
|
initRunTestPageConfig(st)
|
18
|
-
|
18
|
+
|
19
19
|
# init style
|
20
20
|
initStyle(st)
|
21
|
-
|
21
|
+
|
22
22
|
# header
|
23
23
|
drawHeaderIcon(st)
|
24
24
|
|
@@ -48,11 +48,7 @@ def main():
|
|
48
48
|
activedCaseList, allCaseConfigs = caseSelector(caseSelectorContainer, activedDbList)
|
49
49
|
|
50
50
|
# generate tasks
|
51
|
-
tasks = (
|
52
|
-
generate_tasks(activedDbList, dbConfigs, activedCaseList, allCaseConfigs)
|
53
|
-
if isAllValid
|
54
|
-
else []
|
55
|
-
)
|
51
|
+
tasks = generate_tasks(activedDbList, dbConfigs, activedCaseList, allCaseConfigs) if isAllValid else []
|
56
52
|
|
57
53
|
# submit
|
58
54
|
submitContainer = st.container()
|
vectordb_bench/frontend/utils.py
CHANGED
@@ -18,5 +18,5 @@ def addHorizontalLine(st):
|
|
18
18
|
|
19
19
|
def generate_random_string(length):
|
20
20
|
letters = string.ascii_letters + string.digits
|
21
|
-
result =
|
21
|
+
result = "".join(random.choice(letters) for _ in range(length))
|
22
22
|
return result
|
@@ -11,8 +11,8 @@ from vectordb_bench.frontend.components.check_results.nav import (
|
|
11
11
|
from vectordb_bench.frontend.components.check_results.charts import drawCharts
|
12
12
|
from vectordb_bench.frontend.components.check_results.filters import getshownData
|
13
13
|
from vectordb_bench.frontend.components.get_results.saveAsImage import getResults
|
14
|
-
|
15
|
-
from vectordb_bench.interface import
|
14
|
+
|
15
|
+
from vectordb_bench.interface import benchmark_runner
|
16
16
|
|
17
17
|
|
18
18
|
def main():
|
@@ -22,7 +22,7 @@ def main():
|
|
22
22
|
# header
|
23
23
|
drawHeaderIcon(st)
|
24
24
|
|
25
|
-
allResults =
|
25
|
+
allResults = benchmark_runner.get_results()
|
26
26
|
|
27
27
|
st.title("Vector Database Benchmark")
|
28
28
|
st.caption(
|
@@ -32,9 +32,7 @@ def main():
|
|
32
32
|
|
33
33
|
# results selector and filter
|
34
34
|
resultSelectorContainer = st.sidebar.container()
|
35
|
-
shownData, failedTasks, showCaseNames = getshownData(
|
36
|
-
allResults, resultSelectorContainer
|
37
|
-
)
|
35
|
+
shownData, failedTasks, showCaseNames = getshownData(allResults, resultSelectorContainer)
|
38
36
|
|
39
37
|
resultSelectorContainer.divider()
|
40
38
|
|
vectordb_bench/interface.py
CHANGED
@@ -5,6 +5,7 @@ import pathlib
|
|
5
5
|
import signal
|
6
6
|
import traceback
|
7
7
|
import uuid
|
8
|
+
from collections.abc import Callable
|
8
9
|
from enum import Enum
|
9
10
|
from multiprocessing.connection import Connection
|
10
11
|
|
@@ -16,8 +17,15 @@ from .backend.data_source import DatasetSource
|
|
16
17
|
from .backend.result_collector import ResultCollector
|
17
18
|
from .backend.task_runner import TaskRunner
|
18
19
|
from .metric import Metric
|
19
|
-
from .models import (
|
20
|
-
|
20
|
+
from .models import (
|
21
|
+
CaseResult,
|
22
|
+
LoadTimeoutError,
|
23
|
+
PerformanceTimeoutError,
|
24
|
+
ResultLabel,
|
25
|
+
TaskConfig,
|
26
|
+
TaskStage,
|
27
|
+
TestResult,
|
28
|
+
)
|
21
29
|
|
22
30
|
log = logging.getLogger(__name__)
|
23
31
|
|
@@ -37,11 +45,9 @@ class BenchMarkRunner:
|
|
37
45
|
self.drop_old: bool = True
|
38
46
|
self.dataset_source: DatasetSource = DatasetSource.S3
|
39
47
|
|
40
|
-
|
41
48
|
def set_drop_old(self, drop_old: bool):
|
42
49
|
self.drop_old = drop_old
|
43
50
|
|
44
|
-
|
45
51
|
def set_download_address(self, use_aliyun: bool):
|
46
52
|
if use_aliyun:
|
47
53
|
self.dataset_source = DatasetSource.AliyunOSS
|
@@ -59,7 +65,9 @@ class BenchMarkRunner:
|
|
59
65
|
log.warning("Empty tasks submitted")
|
60
66
|
return False
|
61
67
|
|
62
|
-
log.debug(
|
68
|
+
log.debug(
|
69
|
+
f"tasks: {tasks}, task_label: {task_label}, dataset source: {self.dataset_source}",
|
70
|
+
)
|
63
71
|
|
64
72
|
# Generate run_id
|
65
73
|
run_id = uuid.uuid4().hex
|
@@ -70,7 +78,12 @@ class BenchMarkRunner:
|
|
70
78
|
self.latest_error = ""
|
71
79
|
|
72
80
|
try:
|
73
|
-
self.running_task = Assembler.assemble_all(
|
81
|
+
self.running_task = Assembler.assemble_all(
|
82
|
+
run_id,
|
83
|
+
task_label,
|
84
|
+
tasks,
|
85
|
+
self.dataset_source,
|
86
|
+
)
|
74
87
|
self.running_task.display()
|
75
88
|
except ModuleNotFoundError as e:
|
76
89
|
msg = f"Please install client for database, error={e}"
|
@@ -119,7 +132,7 @@ class BenchMarkRunner:
|
|
119
132
|
return 0
|
120
133
|
|
121
134
|
def get_current_task_id(self) -> int:
|
122
|
-
"""
|
135
|
+
"""the index of current running task
|
123
136
|
return -1 if not running
|
124
137
|
"""
|
125
138
|
if not self.running_task:
|
@@ -153,18 +166,18 @@ class BenchMarkRunner:
|
|
153
166
|
task_config=runner.config,
|
154
167
|
)
|
155
168
|
|
156
|
-
# drop_old = False if latest_runner and runner == latest_runner else config.DROP_OLD
|
157
|
-
# drop_old = config.DROP_OLD
|
158
169
|
drop_old = TaskStage.DROP_OLD in runner.config.stages
|
159
|
-
if latest_runner and runner == latest_runner:
|
160
|
-
drop_old = False
|
161
|
-
elif not self.drop_old:
|
170
|
+
if (latest_runner and runner == latest_runner) or not self.drop_old:
|
162
171
|
drop_old = False
|
163
172
|
try:
|
164
|
-
log.info(
|
173
|
+
log.info(
|
174
|
+
f"[{idx+1}/{running_task.num_cases()}] start case: {runner.display()}, drop_old={drop_old}",
|
175
|
+
)
|
165
176
|
case_res.metrics = runner.run(drop_old)
|
166
|
-
log.info(
|
167
|
-
|
177
|
+
log.info(
|
178
|
+
f"[{idx+1}/{running_task.num_cases()}] finish case: {runner.display()}, "
|
179
|
+
f"result={case_res.metrics}, label={case_res.label}",
|
180
|
+
)
|
168
181
|
|
169
182
|
# cache the latest succeeded runner
|
170
183
|
latest_runner = runner
|
@@ -176,12 +189,16 @@ class BenchMarkRunner:
|
|
176
189
|
if not drop_old:
|
177
190
|
case_res.metrics.load_duration = cached_load_duration if cached_load_duration else 0.0
|
178
191
|
except (LoadTimeoutError, PerformanceTimeoutError) as e:
|
179
|
-
log.warning(
|
192
|
+
log.warning(
|
193
|
+
f"[{idx+1}/{running_task.num_cases()}] case {runner.display()} failed to run, reason={e}",
|
194
|
+
)
|
180
195
|
case_res.label = ResultLabel.OUTOFRANGE
|
181
196
|
continue
|
182
197
|
|
183
198
|
except Exception as e:
|
184
|
-
log.warning(
|
199
|
+
log.warning(
|
200
|
+
f"[{idx+1}/{running_task.num_cases()}] case {runner.display()} failed to run, reason={e}",
|
201
|
+
)
|
185
202
|
traceback.print_exc()
|
186
203
|
case_res.label = ResultLabel.FAILED
|
187
204
|
continue
|
@@ -200,10 +217,14 @@ class BenchMarkRunner:
|
|
200
217
|
|
201
218
|
send_conn.send((SIGNAL.SUCCESS, None))
|
202
219
|
send_conn.close()
|
203
|
-
log.info(
|
220
|
+
log.info(
|
221
|
+
f"Success to finish task: label={running_task.task_label}, run_id={running_task.run_id}",
|
222
|
+
)
|
204
223
|
|
205
224
|
except Exception as e:
|
206
|
-
err_msg =
|
225
|
+
err_msg = (
|
226
|
+
f"An error occurs when running task={running_task.task_label}, run_id={running_task.run_id}, err={e}"
|
227
|
+
)
|
207
228
|
traceback.print_exc()
|
208
229
|
log.warning(err_msg)
|
209
230
|
send_conn.send((SIGNAL.ERROR, err_msg))
|
@@ -226,16 +247,26 @@ class BenchMarkRunner:
|
|
226
247
|
self.receive_conn.close()
|
227
248
|
self.receive_conn = None
|
228
249
|
|
229
|
-
|
230
250
|
def _run_async(self, conn: Connection) -> bool:
|
231
|
-
log.info(
|
251
|
+
log.info(
|
252
|
+
f"task submitted: id={self.running_task.run_id}, {self.running_task.task_label}, ",
|
253
|
+
f"case number: {len(self.running_task.case_runners)}",
|
254
|
+
)
|
232
255
|
global global_result_future
|
233
|
-
executor = concurrent.futures.ProcessPoolExecutor(
|
256
|
+
executor = concurrent.futures.ProcessPoolExecutor(
|
257
|
+
max_workers=1,
|
258
|
+
mp_context=mp.get_context("spawn"),
|
259
|
+
)
|
234
260
|
global_result_future = executor.submit(self._async_task_v2, self.running_task, conn)
|
235
261
|
|
236
262
|
return True
|
237
263
|
|
238
|
-
def kill_proc_tree(
|
264
|
+
def kill_proc_tree(
|
265
|
+
self,
|
266
|
+
sig: int = signal.SIGTERM,
|
267
|
+
timeout: float | None = None,
|
268
|
+
on_terminate: Callable | None = None,
|
269
|
+
):
|
239
270
|
"""Kill a process tree (including grandchildren) with signal
|
240
271
|
"sig" and return a (gone, still_alive) tuple.
|
241
272
|
"on_terminate", if specified, is a callback function which is
|
@@ -248,12 +279,11 @@ class BenchMarkRunner:
|
|
248
279
|
p.send_signal(sig)
|
249
280
|
except psutil.NoSuchProcess:
|
250
281
|
pass
|
251
|
-
gone, alive = psutil.wait_procs(children, timeout=timeout,
|
252
|
-
callback=on_terminate)
|
282
|
+
gone, alive = psutil.wait_procs(children, timeout=timeout, callback=on_terminate)
|
253
283
|
|
254
284
|
for p in alive:
|
255
285
|
log.warning(f"force killing child process: {p}")
|
256
286
|
p.kill()
|
257
287
|
|
258
288
|
|
259
|
-
|
289
|
+
benchmark_runner = BenchMarkRunner()
|
vectordb_bench/log_util.py
CHANGED
@@ -1,102 +1,97 @@
|
|
1
1
|
import logging
|
2
2
|
from logging import config
|
3
3
|
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
4
|
+
|
5
|
+
def init(log_level: str):
|
6
|
+
log_config = {
|
7
|
+
"version": 1,
|
8
|
+
"disable_existing_loggers": False,
|
9
|
+
"formatters": {
|
10
|
+
"default": {
|
11
|
+
"format": "%(asctime)s | %(levelname)s |%(message)s (%(filename)s:%(lineno)s)",
|
11
12
|
},
|
12
|
-
|
13
|
-
|
14
|
-
|
13
|
+
"colorful_console": {
|
14
|
+
"format": "%(asctime)s | %(levelname)s: %(message)s (%(filename)s:%(lineno)s) (%(process)s)",
|
15
|
+
"()": ColorfulFormatter,
|
15
16
|
},
|
16
17
|
},
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
18
|
+
"handlers": {
|
19
|
+
"console": {
|
20
|
+
"class": "logging.StreamHandler",
|
21
|
+
"formatter": "colorful_console",
|
21
22
|
},
|
22
|
-
|
23
|
-
|
24
|
-
|
23
|
+
"no_color_console": {
|
24
|
+
"class": "logging.StreamHandler",
|
25
|
+
"formatter": "default",
|
25
26
|
},
|
26
27
|
},
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
28
|
+
"loggers": {
|
29
|
+
"vectordb_bench": {
|
30
|
+
"handlers": ["console"],
|
31
|
+
"level": log_level,
|
32
|
+
"propagate": False,
|
32
33
|
},
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
34
|
+
"no_color": {
|
35
|
+
"handlers": ["no_color_console"],
|
36
|
+
"level": log_level,
|
37
|
+
"propagate": False,
|
37
38
|
},
|
38
39
|
},
|
39
|
-
|
40
|
+
"propagate": False,
|
40
41
|
}
|
41
42
|
|
42
|
-
config.dictConfig(
|
43
|
+
config.dictConfig(log_config)
|
43
44
|
|
44
|
-
class colors:
|
45
|
-
HEADER= '\033[95m'
|
46
|
-
INFO= '\033[92m'
|
47
|
-
DEBUG= '\033[94m'
|
48
|
-
WARNING= '\033[93m'
|
49
|
-
ERROR= '\033[95m'
|
50
|
-
CRITICAL= '\033[91m'
|
51
|
-
ENDC= '\033[0m'
|
52
45
|
|
46
|
+
class colors:
|
47
|
+
HEADER = "\033[95m"
|
48
|
+
INFO = "\033[92m"
|
49
|
+
DEBUG = "\033[94m"
|
50
|
+
WARNING = "\033[93m"
|
51
|
+
ERROR = "\033[95m"
|
52
|
+
CRITICAL = "\033[91m"
|
53
|
+
ENDC = "\033[0m"
|
53
54
|
|
54
55
|
|
55
56
|
COLORS = {
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
57
|
+
"INFO": colors.INFO,
|
58
|
+
"INFOM": colors.INFO,
|
59
|
+
"DEBUG": colors.DEBUG,
|
60
|
+
"DEBUGM": colors.DEBUG,
|
61
|
+
"WARNING": colors.WARNING,
|
62
|
+
"WARNINGM": colors.WARNING,
|
63
|
+
"CRITICAL": colors.CRITICAL,
|
64
|
+
"CRITICALM": colors.CRITICAL,
|
65
|
+
"ERROR": colors.ERROR,
|
66
|
+
"ERRORM": colors.ERROR,
|
67
|
+
"ENDC": colors.ENDC,
|
67
68
|
}
|
68
69
|
|
69
70
|
|
70
71
|
class ColorFulFormatColMixin:
|
71
|
-
def format_col(self,
|
72
|
-
if level_name in COLORS
|
73
|
-
|
74
|
-
return
|
75
|
-
|
76
|
-
def formatTime(self, record, datefmt=None):
|
77
|
-
ret = super().formatTime(record, datefmt)
|
78
|
-
return ret
|
72
|
+
def format_col(self, message: str, level_name: str):
|
73
|
+
if level_name in COLORS:
|
74
|
+
message = COLORS[level_name] + message + COLORS["ENDC"]
|
75
|
+
return message
|
79
76
|
|
80
77
|
|
81
78
|
class ColorfulLogRecordProxy(logging.LogRecord):
|
82
|
-
def __init__(self, record):
|
79
|
+
def __init__(self, record: any):
|
83
80
|
self._record = record
|
84
|
-
msg_level = record.levelname +
|
81
|
+
msg_level = record.levelname + "M"
|
85
82
|
self.msg = f"{COLORS[msg_level]}{record.msg}{COLORS['ENDC']}"
|
86
83
|
self.filename = record.filename
|
87
|
-
self.lineno = f
|
88
|
-
self.process = f
|
84
|
+
self.lineno = f"{record.lineno}"
|
85
|
+
self.process = f"{record.process}"
|
89
86
|
self.levelname = f"{COLORS[record.levelname]}{record.levelname}{COLORS['ENDC']}"
|
90
87
|
|
91
|
-
def __getattr__(self, attr):
|
88
|
+
def __getattr__(self, attr: any):
|
92
89
|
if attr not in self.__dict__:
|
93
90
|
return getattr(self._record, attr)
|
94
91
|
return getattr(self, attr)
|
95
92
|
|
96
93
|
|
97
94
|
class ColorfulFormatter(ColorFulFormatColMixin, logging.Formatter):
|
98
|
-
def format(self, record):
|
95
|
+
def format(self, record: any):
|
99
96
|
proxy = ColorfulLogRecordProxy(record)
|
100
|
-
|
101
|
-
|
102
|
-
return message_str
|
97
|
+
return super().format(proxy)
|
vectordb_bench/metric.py
CHANGED
@@ -1,8 +1,7 @@
|
|
1
1
|
import logging
|
2
|
-
import numpy as np
|
3
|
-
|
4
2
|
from dataclasses import dataclass, field
|
5
3
|
|
4
|
+
import numpy as np
|
6
5
|
|
7
6
|
log = logging.getLogger(__name__)
|
8
7
|
|
@@ -33,19 +32,19 @@ MAX_LOAD_COUNT_METRIC = "max_load_count"
|
|
33
32
|
QPS_METRIC = "qps"
|
34
33
|
RECALL_METRIC = "recall"
|
35
34
|
|
36
|
-
|
35
|
+
metric_unit_map = {
|
37
36
|
LOAD_DURATION_METRIC: "s",
|
38
37
|
SERIAL_LATENCY_P99_METRIC: "ms",
|
39
38
|
MAX_LOAD_COUNT_METRIC: "K",
|
40
39
|
QURIES_PER_DOLLAR_METRIC: "K",
|
41
40
|
}
|
42
41
|
|
43
|
-
|
42
|
+
lower_is_better_metrics = [
|
44
43
|
LOAD_DURATION_METRIC,
|
45
44
|
SERIAL_LATENCY_P99_METRIC,
|
46
45
|
]
|
47
46
|
|
48
|
-
|
47
|
+
metric_order = [
|
49
48
|
QPS_METRIC,
|
50
49
|
RECALL_METRIC,
|
51
50
|
LOAD_DURATION_METRIC,
|
@@ -55,7 +54,7 @@ metricOrder = [
|
|
55
54
|
|
56
55
|
|
57
56
|
def isLowerIsBetterMetric(metric: str) -> bool:
|
58
|
-
return metric in
|
57
|
+
return metric in lower_is_better_metrics
|
59
58
|
|
60
59
|
|
61
60
|
def calc_recall(count: int, ground_truth: list[int], got: list[int]) -> float:
|
@@ -70,7 +69,7 @@ def calc_recall(count: int, ground_truth: list[int], got: list[int]) -> float:
|
|
70
69
|
def get_ideal_dcg(k: int):
|
71
70
|
ideal_dcg = 0
|
72
71
|
for i in range(k):
|
73
|
-
ideal_dcg += 1 / np.log2(i+2)
|
72
|
+
ideal_dcg += 1 / np.log2(i + 2)
|
74
73
|
|
75
74
|
return ideal_dcg
|
76
75
|
|
@@ -78,8 +77,8 @@ def get_ideal_dcg(k: int):
|
|
78
77
|
def calc_ndcg(ground_truth: list[int], got: list[int], ideal_dcg: float) -> float:
|
79
78
|
dcg = 0
|
80
79
|
ground_truth = list(ground_truth)
|
81
|
-
for
|
82
|
-
if
|
83
|
-
idx = ground_truth.index(
|
84
|
-
dcg += 1 / np.log2(idx+2)
|
80
|
+
for got_id in set(got):
|
81
|
+
if got_id in ground_truth:
|
82
|
+
idx = ground_truth.index(got_id)
|
83
|
+
dcg += 1 / np.log2(idx + 2)
|
85
84
|
return dcg / ideal_dcg
|