vectordb-bench 0.0.19__py3-none-any.whl → 0.0.21__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vectordb_bench/__init__.py +49 -24
- vectordb_bench/__main__.py +4 -3
- vectordb_bench/backend/assembler.py +12 -13
- vectordb_bench/backend/cases.py +55 -45
- vectordb_bench/backend/clients/__init__.py +85 -14
- vectordb_bench/backend/clients/aliyun_elasticsearch/aliyun_elasticsearch.py +1 -2
- vectordb_bench/backend/clients/aliyun_elasticsearch/config.py +3 -4
- vectordb_bench/backend/clients/aliyun_opensearch/aliyun_opensearch.py +112 -77
- vectordb_bench/backend/clients/aliyun_opensearch/config.py +6 -7
- vectordb_bench/backend/clients/alloydb/alloydb.py +59 -84
- vectordb_bench/backend/clients/alloydb/cli.py +51 -34
- vectordb_bench/backend/clients/alloydb/config.py +30 -30
- vectordb_bench/backend/clients/api.py +13 -24
- vectordb_bench/backend/clients/aws_opensearch/aws_opensearch.py +50 -54
- vectordb_bench/backend/clients/aws_opensearch/cli.py +4 -7
- vectordb_bench/backend/clients/aws_opensearch/config.py +13 -9
- vectordb_bench/backend/clients/aws_opensearch/run.py +69 -59
- vectordb_bench/backend/clients/chroma/chroma.py +39 -40
- vectordb_bench/backend/clients/chroma/config.py +4 -2
- vectordb_bench/backend/clients/elastic_cloud/config.py +5 -5
- vectordb_bench/backend/clients/elastic_cloud/elastic_cloud.py +24 -26
- vectordb_bench/backend/clients/memorydb/cli.py +8 -8
- vectordb_bench/backend/clients/memorydb/config.py +2 -2
- vectordb_bench/backend/clients/memorydb/memorydb.py +67 -58
- vectordb_bench/backend/clients/milvus/cli.py +41 -83
- vectordb_bench/backend/clients/milvus/config.py +18 -8
- vectordb_bench/backend/clients/milvus/milvus.py +19 -39
- vectordb_bench/backend/clients/pgdiskann/cli.py +29 -22
- vectordb_bench/backend/clients/pgdiskann/config.py +29 -26
- vectordb_bench/backend/clients/pgdiskann/pgdiskann.py +56 -77
- vectordb_bench/backend/clients/pgvecto_rs/cli.py +9 -11
- vectordb_bench/backend/clients/pgvecto_rs/config.py +8 -14
- vectordb_bench/backend/clients/pgvecto_rs/pgvecto_rs.py +34 -43
- vectordb_bench/backend/clients/pgvector/cli.py +40 -31
- vectordb_bench/backend/clients/pgvector/config.py +63 -73
- vectordb_bench/backend/clients/pgvector/pgvector.py +98 -104
- vectordb_bench/backend/clients/pgvectorscale/cli.py +38 -24
- vectordb_bench/backend/clients/pgvectorscale/config.py +14 -15
- vectordb_bench/backend/clients/pgvectorscale/pgvectorscale.py +39 -49
- vectordb_bench/backend/clients/pinecone/config.py +1 -0
- vectordb_bench/backend/clients/pinecone/pinecone.py +15 -25
- vectordb_bench/backend/clients/qdrant_cloud/config.py +11 -10
- vectordb_bench/backend/clients/qdrant_cloud/qdrant_cloud.py +41 -35
- vectordb_bench/backend/clients/redis/cli.py +6 -12
- vectordb_bench/backend/clients/redis/config.py +7 -5
- vectordb_bench/backend/clients/redis/redis.py +95 -62
- vectordb_bench/backend/clients/test/cli.py +2 -3
- vectordb_bench/backend/clients/test/config.py +2 -2
- vectordb_bench/backend/clients/test/test.py +5 -9
- vectordb_bench/backend/clients/weaviate_cloud/cli.py +3 -4
- vectordb_bench/backend/clients/weaviate_cloud/config.py +2 -2
- vectordb_bench/backend/clients/weaviate_cloud/weaviate_cloud.py +37 -26
- vectordb_bench/backend/clients/zilliz_cloud/cli.py +14 -11
- vectordb_bench/backend/clients/zilliz_cloud/config.py +2 -4
- vectordb_bench/backend/clients/zilliz_cloud/zilliz_cloud.py +1 -1
- vectordb_bench/backend/data_source.py +18 -14
- vectordb_bench/backend/dataset.py +47 -27
- vectordb_bench/backend/result_collector.py +2 -3
- vectordb_bench/backend/runner/__init__.py +4 -6
- vectordb_bench/backend/runner/mp_runner.py +56 -23
- vectordb_bench/backend/runner/rate_runner.py +30 -19
- vectordb_bench/backend/runner/read_write_runner.py +46 -22
- vectordb_bench/backend/runner/serial_runner.py +81 -46
- vectordb_bench/backend/runner/util.py +4 -3
- vectordb_bench/backend/task_runner.py +92 -92
- vectordb_bench/backend/utils.py +17 -10
- vectordb_bench/base.py +0 -1
- vectordb_bench/cli/cli.py +65 -60
- vectordb_bench/cli/vectordbbench.py +6 -7
- vectordb_bench/frontend/components/check_results/charts.py +8 -19
- vectordb_bench/frontend/components/check_results/data.py +4 -16
- vectordb_bench/frontend/components/check_results/filters.py +8 -16
- vectordb_bench/frontend/components/check_results/nav.py +4 -4
- vectordb_bench/frontend/components/check_results/priceTable.py +1 -3
- vectordb_bench/frontend/components/check_results/stPageConfig.py +2 -1
- vectordb_bench/frontend/components/concurrent/charts.py +12 -12
- vectordb_bench/frontend/components/custom/displayCustomCase.py +17 -11
- vectordb_bench/frontend/components/custom/displaypPrams.py +4 -2
- vectordb_bench/frontend/components/custom/getCustomConfig.py +1 -2
- vectordb_bench/frontend/components/custom/initStyle.py +1 -1
- vectordb_bench/frontend/components/get_results/saveAsImage.py +2 -0
- vectordb_bench/frontend/components/run_test/caseSelector.py +3 -9
- vectordb_bench/frontend/components/run_test/dbConfigSetting.py +1 -4
- vectordb_bench/frontend/components/run_test/dbSelector.py +1 -1
- vectordb_bench/frontend/components/run_test/generateTasks.py +8 -8
- vectordb_bench/frontend/components/run_test/submitTask.py +14 -18
- vectordb_bench/frontend/components/tables/data.py +3 -6
- vectordb_bench/frontend/config/dbCaseConfigs.py +51 -84
- vectordb_bench/frontend/pages/concurrent.py +3 -5
- vectordb_bench/frontend/pages/custom.py +30 -9
- vectordb_bench/frontend/pages/quries_per_dollar.py +3 -3
- vectordb_bench/frontend/pages/run_test.py +3 -7
- vectordb_bench/frontend/utils.py +1 -1
- vectordb_bench/frontend/vdb_benchmark.py +4 -6
- vectordb_bench/interface.py +45 -24
- vectordb_bench/log_util.py +59 -64
- vectordb_bench/metric.py +10 -11
- vectordb_bench/models.py +26 -43
- {vectordb_bench-0.0.19.dist-info → vectordb_bench-0.0.21.dist-info}/METADATA +22 -15
- vectordb_bench-0.0.21.dist-info/RECORD +135 -0
- {vectordb_bench-0.0.19.dist-info → vectordb_bench-0.0.21.dist-info}/WHEEL +1 -1
- vectordb_bench-0.0.19.dist-info/RECORD +0 -135
- {vectordb_bench-0.0.19.dist-info → vectordb_bench-0.0.21.dist-info}/LICENSE +0 -0
- {vectordb_bench-0.0.19.dist-info → vectordb_bench-0.0.21.dist-info}/entry_points.txt +0 -0
- {vectordb_bench-0.0.19.dist-info → vectordb_bench-0.0.21.dist-info}/top_level.txt +0 -0
@@ -1,13 +1,21 @@
|
|
1
|
+
from functools import partial
|
1
2
|
import streamlit as st
|
2
3
|
from vectordb_bench.frontend.components.check_results.headerIcon import drawHeaderIcon
|
3
|
-
from vectordb_bench.frontend.components.custom.displayCustomCase import
|
4
|
+
from vectordb_bench.frontend.components.custom.displayCustomCase import (
|
5
|
+
displayCustomCase,
|
6
|
+
)
|
4
7
|
from vectordb_bench.frontend.components.custom.displaypPrams import displayParams
|
5
|
-
from vectordb_bench.frontend.components.custom.getCustomConfig import
|
8
|
+
from vectordb_bench.frontend.components.custom.getCustomConfig import (
|
9
|
+
CustomCaseConfig,
|
10
|
+
generate_custom_case,
|
11
|
+
get_custom_configs,
|
12
|
+
save_custom_configs,
|
13
|
+
)
|
6
14
|
from vectordb_bench.frontend.components.custom.initStyle import initStyle
|
7
15
|
from vectordb_bench.frontend.config.styles import FAVICON, PAGE_TITLE
|
8
16
|
|
9
17
|
|
10
|
-
class CustomCaseManager
|
18
|
+
class CustomCaseManager:
|
11
19
|
customCaseItems: list[CustomCaseConfig]
|
12
20
|
|
13
21
|
def __init__(self):
|
@@ -52,12 +60,25 @@ def main():
|
|
52
60
|
|
53
61
|
columns = expander.columns(8)
|
54
62
|
columns[0].button(
|
55
|
-
"Save",
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
63
|
+
"Save",
|
64
|
+
key=f"{key}_",
|
65
|
+
type="secondary",
|
66
|
+
on_click=lambda: customCaseManager.save(),
|
67
|
+
)
|
68
|
+
columns[1].button(
|
69
|
+
":red[Delete]",
|
70
|
+
key=f"{key}_delete",
|
71
|
+
type="secondary",
|
72
|
+
# B023
|
73
|
+
on_click=partial(lambda idx: customCaseManager.deleteCase(idx), idx=idx),
|
74
|
+
)
|
75
|
+
|
76
|
+
st.button(
|
77
|
+
"\+ New Dataset",
|
78
|
+
key="add_custom_configs",
|
79
|
+
type="primary",
|
80
|
+
on_click=lambda: customCaseManager.addCase(),
|
81
|
+
)
|
61
82
|
|
62
83
|
|
63
84
|
if __name__ == "__main__":
|
@@ -15,8 +15,8 @@ from vectordb_bench.frontend.components.check_results.nav import (
|
|
15
15
|
from vectordb_bench.frontend.components.check_results.charts import drawMetricChart
|
16
16
|
from vectordb_bench.frontend.components.check_results.filters import getshownData
|
17
17
|
from vectordb_bench.frontend.components.get_results.saveAsImage import getResults
|
18
|
-
|
19
|
-
from vectordb_bench.interface import
|
18
|
+
|
19
|
+
from vectordb_bench.interface import benchmark_runner
|
20
20
|
from vectordb_bench.metric import QURIES_PER_DOLLAR_METRIC
|
21
21
|
|
22
22
|
|
@@ -27,7 +27,7 @@ def main():
|
|
27
27
|
# header
|
28
28
|
drawHeaderIcon(st)
|
29
29
|
|
30
|
-
allResults =
|
30
|
+
allResults = benchmark_runner.get_results()
|
31
31
|
|
32
32
|
st.title("Vector DB Benchmark (QP$)")
|
33
33
|
|
@@ -15,10 +15,10 @@ from vectordb_bench.frontend.components.check_results.stPageConfig import initRu
|
|
15
15
|
def main():
|
16
16
|
# set page config
|
17
17
|
initRunTestPageConfig(st)
|
18
|
-
|
18
|
+
|
19
19
|
# init style
|
20
20
|
initStyle(st)
|
21
|
-
|
21
|
+
|
22
22
|
# header
|
23
23
|
drawHeaderIcon(st)
|
24
24
|
|
@@ -48,11 +48,7 @@ def main():
|
|
48
48
|
activedCaseList, allCaseConfigs = caseSelector(caseSelectorContainer, activedDbList)
|
49
49
|
|
50
50
|
# generate tasks
|
51
|
-
tasks = (
|
52
|
-
generate_tasks(activedDbList, dbConfigs, activedCaseList, allCaseConfigs)
|
53
|
-
if isAllValid
|
54
|
-
else []
|
55
|
-
)
|
51
|
+
tasks = generate_tasks(activedDbList, dbConfigs, activedCaseList, allCaseConfigs) if isAllValid else []
|
56
52
|
|
57
53
|
# submit
|
58
54
|
submitContainer = st.container()
|
vectordb_bench/frontend/utils.py
CHANGED
@@ -18,5 +18,5 @@ def addHorizontalLine(st):
|
|
18
18
|
|
19
19
|
def generate_random_string(length):
|
20
20
|
letters = string.ascii_letters + string.digits
|
21
|
-
result =
|
21
|
+
result = "".join(random.choice(letters) for _ in range(length))
|
22
22
|
return result
|
@@ -11,8 +11,8 @@ from vectordb_bench.frontend.components.check_results.nav import (
|
|
11
11
|
from vectordb_bench.frontend.components.check_results.charts import drawCharts
|
12
12
|
from vectordb_bench.frontend.components.check_results.filters import getshownData
|
13
13
|
from vectordb_bench.frontend.components.get_results.saveAsImage import getResults
|
14
|
-
|
15
|
-
from vectordb_bench.interface import
|
14
|
+
|
15
|
+
from vectordb_bench.interface import benchmark_runner
|
16
16
|
|
17
17
|
|
18
18
|
def main():
|
@@ -22,7 +22,7 @@ def main():
|
|
22
22
|
# header
|
23
23
|
drawHeaderIcon(st)
|
24
24
|
|
25
|
-
allResults =
|
25
|
+
allResults = benchmark_runner.get_results()
|
26
26
|
|
27
27
|
st.title("Vector Database Benchmark")
|
28
28
|
st.caption(
|
@@ -32,9 +32,7 @@ def main():
|
|
32
32
|
|
33
33
|
# results selector and filter
|
34
34
|
resultSelectorContainer = st.sidebar.container()
|
35
|
-
shownData, failedTasks, showCaseNames = getshownData(
|
36
|
-
allResults, resultSelectorContainer
|
37
|
-
)
|
35
|
+
shownData, failedTasks, showCaseNames = getshownData(allResults, resultSelectorContainer)
|
38
36
|
|
39
37
|
resultSelectorContainer.divider()
|
40
38
|
|
vectordb_bench/interface.py
CHANGED
@@ -5,6 +5,7 @@ import pathlib
|
|
5
5
|
import signal
|
6
6
|
import traceback
|
7
7
|
import uuid
|
8
|
+
from collections.abc import Callable
|
8
9
|
from enum import Enum
|
9
10
|
from multiprocessing.connection import Connection
|
10
11
|
|
@@ -16,8 +17,15 @@ from .backend.data_source import DatasetSource
|
|
16
17
|
from .backend.result_collector import ResultCollector
|
17
18
|
from .backend.task_runner import TaskRunner
|
18
19
|
from .metric import Metric
|
19
|
-
from .models import (
|
20
|
-
|
20
|
+
from .models import (
|
21
|
+
CaseResult,
|
22
|
+
LoadTimeoutError,
|
23
|
+
PerformanceTimeoutError,
|
24
|
+
ResultLabel,
|
25
|
+
TaskConfig,
|
26
|
+
TaskStage,
|
27
|
+
TestResult,
|
28
|
+
)
|
21
29
|
|
22
30
|
log = logging.getLogger(__name__)
|
23
31
|
|
@@ -37,11 +45,9 @@ class BenchMarkRunner:
|
|
37
45
|
self.drop_old: bool = True
|
38
46
|
self.dataset_source: DatasetSource = DatasetSource.S3
|
39
47
|
|
40
|
-
|
41
48
|
def set_drop_old(self, drop_old: bool):
|
42
49
|
self.drop_old = drop_old
|
43
50
|
|
44
|
-
|
45
51
|
def set_download_address(self, use_aliyun: bool):
|
46
52
|
if use_aliyun:
|
47
53
|
self.dataset_source = DatasetSource.AliyunOSS
|
@@ -70,7 +76,12 @@ class BenchMarkRunner:
|
|
70
76
|
self.latest_error = ""
|
71
77
|
|
72
78
|
try:
|
73
|
-
self.running_task = Assembler.assemble_all(
|
79
|
+
self.running_task = Assembler.assemble_all(
|
80
|
+
run_id,
|
81
|
+
task_label,
|
82
|
+
tasks,
|
83
|
+
self.dataset_source,
|
84
|
+
)
|
74
85
|
self.running_task.display()
|
75
86
|
except ModuleNotFoundError as e:
|
76
87
|
msg = f"Please install client for database, error={e}"
|
@@ -119,7 +130,7 @@ class BenchMarkRunner:
|
|
119
130
|
return 0
|
120
131
|
|
121
132
|
def get_current_task_id(self) -> int:
|
122
|
-
"""
|
133
|
+
"""the index of current running task
|
123
134
|
return -1 if not running
|
124
135
|
"""
|
125
136
|
if not self.running_task:
|
@@ -153,18 +164,17 @@ class BenchMarkRunner:
|
|
153
164
|
task_config=runner.config,
|
154
165
|
)
|
155
166
|
|
156
|
-
# drop_old = False if latest_runner and runner == latest_runner else config.DROP_OLD
|
157
|
-
# drop_old = config.DROP_OLD
|
158
167
|
drop_old = TaskStage.DROP_OLD in runner.config.stages
|
159
|
-
if latest_runner and runner == latest_runner:
|
160
|
-
drop_old = False
|
161
|
-
elif not self.drop_old:
|
168
|
+
if (latest_runner and runner == latest_runner) or not self.drop_old:
|
162
169
|
drop_old = False
|
170
|
+
num_cases = running_task.num_cases()
|
163
171
|
try:
|
164
|
-
log.info(f"[{idx+1}/{
|
172
|
+
log.info(f"[{idx+1}/{num_cases}] start case: {runner.display()}, drop_old={drop_old}")
|
165
173
|
case_res.metrics = runner.run(drop_old)
|
166
|
-
log.info(
|
167
|
-
|
174
|
+
log.info(
|
175
|
+
f"[{idx+1}/{num_cases}] finish case: {runner.display()}, "
|
176
|
+
f"result={case_res.metrics}, label={case_res.label}"
|
177
|
+
)
|
168
178
|
|
169
179
|
# cache the latest succeeded runner
|
170
180
|
latest_runner = runner
|
@@ -176,12 +186,12 @@ class BenchMarkRunner:
|
|
176
186
|
if not drop_old:
|
177
187
|
case_res.metrics.load_duration = cached_load_duration if cached_load_duration else 0.0
|
178
188
|
except (LoadTimeoutError, PerformanceTimeoutError) as e:
|
179
|
-
log.warning(f"[{idx+1}/{
|
189
|
+
log.warning(f"[{idx+1}/{num_cases}] case {runner.display()} failed to run, reason={e}")
|
180
190
|
case_res.label = ResultLabel.OUTOFRANGE
|
181
191
|
continue
|
182
192
|
|
183
193
|
except Exception as e:
|
184
|
-
log.warning(f"[{idx+1}/{
|
194
|
+
log.warning(f"[{idx+1}/{num_cases}] case {runner.display()} failed to run, reason={e}")
|
185
195
|
traceback.print_exc()
|
186
196
|
case_res.label = ResultLabel.FAILED
|
187
197
|
continue
|
@@ -203,7 +213,9 @@ class BenchMarkRunner:
|
|
203
213
|
log.info(f"Success to finish task: label={running_task.task_label}, run_id={running_task.run_id}")
|
204
214
|
|
205
215
|
except Exception as e:
|
206
|
-
err_msg =
|
216
|
+
err_msg = (
|
217
|
+
f"An error occurs when running task={running_task.task_label}, run_id={running_task.run_id}, err={e}"
|
218
|
+
)
|
207
219
|
traceback.print_exc()
|
208
220
|
log.warning(err_msg)
|
209
221
|
send_conn.send((SIGNAL.ERROR, err_msg))
|
@@ -226,16 +238,26 @@ class BenchMarkRunner:
|
|
226
238
|
self.receive_conn.close()
|
227
239
|
self.receive_conn = None
|
228
240
|
|
229
|
-
|
230
241
|
def _run_async(self, conn: Connection) -> bool:
|
231
|
-
log.info(
|
242
|
+
log.info(
|
243
|
+
f"task submitted: id={self.running_task.run_id}, {self.running_task.task_label}, "
|
244
|
+
f"case number: {len(self.running_task.case_runners)}"
|
245
|
+
)
|
232
246
|
global global_result_future
|
233
|
-
executor = concurrent.futures.ProcessPoolExecutor(
|
247
|
+
executor = concurrent.futures.ProcessPoolExecutor(
|
248
|
+
max_workers=1,
|
249
|
+
mp_context=mp.get_context("spawn"),
|
250
|
+
)
|
234
251
|
global_result_future = executor.submit(self._async_task_v2, self.running_task, conn)
|
235
252
|
|
236
253
|
return True
|
237
254
|
|
238
|
-
def kill_proc_tree(
|
255
|
+
def kill_proc_tree(
|
256
|
+
self,
|
257
|
+
sig: int = signal.SIGTERM,
|
258
|
+
timeout: float | None = None,
|
259
|
+
on_terminate: Callable | None = None,
|
260
|
+
):
|
239
261
|
"""Kill a process tree (including grandchildren) with signal
|
240
262
|
"sig" and return a (gone, still_alive) tuple.
|
241
263
|
"on_terminate", if specified, is a callback function which is
|
@@ -248,12 +270,11 @@ class BenchMarkRunner:
|
|
248
270
|
p.send_signal(sig)
|
249
271
|
except psutil.NoSuchProcess:
|
250
272
|
pass
|
251
|
-
gone, alive = psutil.wait_procs(children, timeout=timeout,
|
252
|
-
callback=on_terminate)
|
273
|
+
gone, alive = psutil.wait_procs(children, timeout=timeout, callback=on_terminate)
|
253
274
|
|
254
275
|
for p in alive:
|
255
276
|
log.warning(f"force killing child process: {p}")
|
256
277
|
p.kill()
|
257
278
|
|
258
279
|
|
259
|
-
|
280
|
+
benchmark_runner = BenchMarkRunner()
|
vectordb_bench/log_util.py
CHANGED
@@ -1,102 +1,97 @@
|
|
1
1
|
import logging
|
2
2
|
from logging import config
|
3
3
|
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
4
|
+
|
5
|
+
def init(log_level: str):
|
6
|
+
log_config = {
|
7
|
+
"version": 1,
|
8
|
+
"disable_existing_loggers": False,
|
9
|
+
"formatters": {
|
10
|
+
"default": {
|
11
|
+
"format": "%(asctime)s | %(levelname)s |%(message)s (%(filename)s:%(lineno)s)",
|
11
12
|
},
|
12
|
-
|
13
|
-
|
14
|
-
|
13
|
+
"colorful_console": {
|
14
|
+
"format": "%(asctime)s | %(levelname)s: %(message)s (%(filename)s:%(lineno)s) (%(process)s)",
|
15
|
+
"()": ColorfulFormatter,
|
15
16
|
},
|
16
17
|
},
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
18
|
+
"handlers": {
|
19
|
+
"console": {
|
20
|
+
"class": "logging.StreamHandler",
|
21
|
+
"formatter": "colorful_console",
|
21
22
|
},
|
22
|
-
|
23
|
-
|
24
|
-
|
23
|
+
"no_color_console": {
|
24
|
+
"class": "logging.StreamHandler",
|
25
|
+
"formatter": "default",
|
25
26
|
},
|
26
27
|
},
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
28
|
+
"loggers": {
|
29
|
+
"vectordb_bench": {
|
30
|
+
"handlers": ["console"],
|
31
|
+
"level": log_level,
|
32
|
+
"propagate": False,
|
32
33
|
},
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
34
|
+
"no_color": {
|
35
|
+
"handlers": ["no_color_console"],
|
36
|
+
"level": log_level,
|
37
|
+
"propagate": False,
|
37
38
|
},
|
38
39
|
},
|
39
|
-
|
40
|
+
"propagate": False,
|
40
41
|
}
|
41
42
|
|
42
|
-
config.dictConfig(
|
43
|
+
config.dictConfig(log_config)
|
43
44
|
|
44
|
-
class colors:
|
45
|
-
HEADER= '\033[95m'
|
46
|
-
INFO= '\033[92m'
|
47
|
-
DEBUG= '\033[94m'
|
48
|
-
WARNING= '\033[93m'
|
49
|
-
ERROR= '\033[95m'
|
50
|
-
CRITICAL= '\033[91m'
|
51
|
-
ENDC= '\033[0m'
|
52
45
|
|
46
|
+
class colors:
|
47
|
+
HEADER = "\033[95m"
|
48
|
+
INFO = "\033[92m"
|
49
|
+
DEBUG = "\033[94m"
|
50
|
+
WARNING = "\033[93m"
|
51
|
+
ERROR = "\033[95m"
|
52
|
+
CRITICAL = "\033[91m"
|
53
|
+
ENDC = "\033[0m"
|
53
54
|
|
54
55
|
|
55
56
|
COLORS = {
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
57
|
+
"INFO": colors.INFO,
|
58
|
+
"INFOM": colors.INFO,
|
59
|
+
"DEBUG": colors.DEBUG,
|
60
|
+
"DEBUGM": colors.DEBUG,
|
61
|
+
"WARNING": colors.WARNING,
|
62
|
+
"WARNINGM": colors.WARNING,
|
63
|
+
"CRITICAL": colors.CRITICAL,
|
64
|
+
"CRITICALM": colors.CRITICAL,
|
65
|
+
"ERROR": colors.ERROR,
|
66
|
+
"ERRORM": colors.ERROR,
|
67
|
+
"ENDC": colors.ENDC,
|
67
68
|
}
|
68
69
|
|
69
70
|
|
70
71
|
class ColorFulFormatColMixin:
|
71
|
-
def format_col(self,
|
72
|
-
if level_name in COLORS
|
73
|
-
|
74
|
-
return
|
75
|
-
|
76
|
-
def formatTime(self, record, datefmt=None):
|
77
|
-
ret = super().formatTime(record, datefmt)
|
78
|
-
return ret
|
72
|
+
def format_col(self, message: str, level_name: str):
|
73
|
+
if level_name in COLORS:
|
74
|
+
message = COLORS[level_name] + message + COLORS["ENDC"]
|
75
|
+
return message
|
79
76
|
|
80
77
|
|
81
78
|
class ColorfulLogRecordProxy(logging.LogRecord):
|
82
|
-
def __init__(self, record):
|
79
|
+
def __init__(self, record: any):
|
83
80
|
self._record = record
|
84
|
-
msg_level = record.levelname +
|
81
|
+
msg_level = record.levelname + "M"
|
85
82
|
self.msg = f"{COLORS[msg_level]}{record.msg}{COLORS['ENDC']}"
|
86
83
|
self.filename = record.filename
|
87
|
-
self.lineno = f
|
88
|
-
self.process = f
|
84
|
+
self.lineno = f"{record.lineno}"
|
85
|
+
self.process = f"{record.process}"
|
89
86
|
self.levelname = f"{COLORS[record.levelname]}{record.levelname}{COLORS['ENDC']}"
|
90
87
|
|
91
|
-
def __getattr__(self, attr):
|
88
|
+
def __getattr__(self, attr: any):
|
92
89
|
if attr not in self.__dict__:
|
93
90
|
return getattr(self._record, attr)
|
94
91
|
return getattr(self, attr)
|
95
92
|
|
96
93
|
|
97
94
|
class ColorfulFormatter(ColorFulFormatColMixin, logging.Formatter):
|
98
|
-
def format(self, record):
|
95
|
+
def format(self, record: any):
|
99
96
|
proxy = ColorfulLogRecordProxy(record)
|
100
|
-
|
101
|
-
|
102
|
-
return message_str
|
97
|
+
return super().format(proxy)
|
vectordb_bench/metric.py
CHANGED
@@ -1,8 +1,7 @@
|
|
1
1
|
import logging
|
2
|
-
import numpy as np
|
3
|
-
|
4
2
|
from dataclasses import dataclass, field
|
5
3
|
|
4
|
+
import numpy as np
|
6
5
|
|
7
6
|
log = logging.getLogger(__name__)
|
8
7
|
|
@@ -33,19 +32,19 @@ MAX_LOAD_COUNT_METRIC = "max_load_count"
|
|
33
32
|
QPS_METRIC = "qps"
|
34
33
|
RECALL_METRIC = "recall"
|
35
34
|
|
36
|
-
|
35
|
+
metric_unit_map = {
|
37
36
|
LOAD_DURATION_METRIC: "s",
|
38
37
|
SERIAL_LATENCY_P99_METRIC: "ms",
|
39
38
|
MAX_LOAD_COUNT_METRIC: "K",
|
40
39
|
QURIES_PER_DOLLAR_METRIC: "K",
|
41
40
|
}
|
42
41
|
|
43
|
-
|
42
|
+
lower_is_better_metrics = [
|
44
43
|
LOAD_DURATION_METRIC,
|
45
44
|
SERIAL_LATENCY_P99_METRIC,
|
46
45
|
]
|
47
46
|
|
48
|
-
|
47
|
+
metric_order = [
|
49
48
|
QPS_METRIC,
|
50
49
|
RECALL_METRIC,
|
51
50
|
LOAD_DURATION_METRIC,
|
@@ -55,7 +54,7 @@ metricOrder = [
|
|
55
54
|
|
56
55
|
|
57
56
|
def isLowerIsBetterMetric(metric: str) -> bool:
|
58
|
-
return metric in
|
57
|
+
return metric in lower_is_better_metrics
|
59
58
|
|
60
59
|
|
61
60
|
def calc_recall(count: int, ground_truth: list[int], got: list[int]) -> float:
|
@@ -70,7 +69,7 @@ def calc_recall(count: int, ground_truth: list[int], got: list[int]) -> float:
|
|
70
69
|
def get_ideal_dcg(k: int):
|
71
70
|
ideal_dcg = 0
|
72
71
|
for i in range(k):
|
73
|
-
ideal_dcg += 1 / np.log2(i+2)
|
72
|
+
ideal_dcg += 1 / np.log2(i + 2)
|
74
73
|
|
75
74
|
return ideal_dcg
|
76
75
|
|
@@ -78,8 +77,8 @@ def get_ideal_dcg(k: int):
|
|
78
77
|
def calc_ndcg(ground_truth: list[int], got: list[int], ideal_dcg: float) -> float:
|
79
78
|
dcg = 0
|
80
79
|
ground_truth = list(ground_truth)
|
81
|
-
for
|
82
|
-
if
|
83
|
-
idx = ground_truth.index(
|
84
|
-
dcg += 1 / np.log2(idx+2)
|
80
|
+
for got_id in set(got):
|
81
|
+
if got_id in ground_truth:
|
82
|
+
idx = ground_truth.index(got_id)
|
83
|
+
dcg += 1 / np.log2(idx + 2)
|
85
84
|
return dcg / ideal_dcg
|