vectordb-bench 0.0.19__py3-none-any.whl → 0.0.21__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vectordb_bench/__init__.py +49 -24
- vectordb_bench/__main__.py +4 -3
- vectordb_bench/backend/assembler.py +12 -13
- vectordb_bench/backend/cases.py +55 -45
- vectordb_bench/backend/clients/__init__.py +85 -14
- vectordb_bench/backend/clients/aliyun_elasticsearch/aliyun_elasticsearch.py +1 -2
- vectordb_bench/backend/clients/aliyun_elasticsearch/config.py +3 -4
- vectordb_bench/backend/clients/aliyun_opensearch/aliyun_opensearch.py +112 -77
- vectordb_bench/backend/clients/aliyun_opensearch/config.py +6 -7
- vectordb_bench/backend/clients/alloydb/alloydb.py +59 -84
- vectordb_bench/backend/clients/alloydb/cli.py +51 -34
- vectordb_bench/backend/clients/alloydb/config.py +30 -30
- vectordb_bench/backend/clients/api.py +13 -24
- vectordb_bench/backend/clients/aws_opensearch/aws_opensearch.py +50 -54
- vectordb_bench/backend/clients/aws_opensearch/cli.py +4 -7
- vectordb_bench/backend/clients/aws_opensearch/config.py +13 -9
- vectordb_bench/backend/clients/aws_opensearch/run.py +69 -59
- vectordb_bench/backend/clients/chroma/chroma.py +39 -40
- vectordb_bench/backend/clients/chroma/config.py +4 -2
- vectordb_bench/backend/clients/elastic_cloud/config.py +5 -5
- vectordb_bench/backend/clients/elastic_cloud/elastic_cloud.py +24 -26
- vectordb_bench/backend/clients/memorydb/cli.py +8 -8
- vectordb_bench/backend/clients/memorydb/config.py +2 -2
- vectordb_bench/backend/clients/memorydb/memorydb.py +67 -58
- vectordb_bench/backend/clients/milvus/cli.py +41 -83
- vectordb_bench/backend/clients/milvus/config.py +18 -8
- vectordb_bench/backend/clients/milvus/milvus.py +19 -39
- vectordb_bench/backend/clients/pgdiskann/cli.py +29 -22
- vectordb_bench/backend/clients/pgdiskann/config.py +29 -26
- vectordb_bench/backend/clients/pgdiskann/pgdiskann.py +56 -77
- vectordb_bench/backend/clients/pgvecto_rs/cli.py +9 -11
- vectordb_bench/backend/clients/pgvecto_rs/config.py +8 -14
- vectordb_bench/backend/clients/pgvecto_rs/pgvecto_rs.py +34 -43
- vectordb_bench/backend/clients/pgvector/cli.py +40 -31
- vectordb_bench/backend/clients/pgvector/config.py +63 -73
- vectordb_bench/backend/clients/pgvector/pgvector.py +98 -104
- vectordb_bench/backend/clients/pgvectorscale/cli.py +38 -24
- vectordb_bench/backend/clients/pgvectorscale/config.py +14 -15
- vectordb_bench/backend/clients/pgvectorscale/pgvectorscale.py +39 -49
- vectordb_bench/backend/clients/pinecone/config.py +1 -0
- vectordb_bench/backend/clients/pinecone/pinecone.py +15 -25
- vectordb_bench/backend/clients/qdrant_cloud/config.py +11 -10
- vectordb_bench/backend/clients/qdrant_cloud/qdrant_cloud.py +41 -35
- vectordb_bench/backend/clients/redis/cli.py +6 -12
- vectordb_bench/backend/clients/redis/config.py +7 -5
- vectordb_bench/backend/clients/redis/redis.py +95 -62
- vectordb_bench/backend/clients/test/cli.py +2 -3
- vectordb_bench/backend/clients/test/config.py +2 -2
- vectordb_bench/backend/clients/test/test.py +5 -9
- vectordb_bench/backend/clients/weaviate_cloud/cli.py +3 -4
- vectordb_bench/backend/clients/weaviate_cloud/config.py +2 -2
- vectordb_bench/backend/clients/weaviate_cloud/weaviate_cloud.py +37 -26
- vectordb_bench/backend/clients/zilliz_cloud/cli.py +14 -11
- vectordb_bench/backend/clients/zilliz_cloud/config.py +2 -4
- vectordb_bench/backend/clients/zilliz_cloud/zilliz_cloud.py +1 -1
- vectordb_bench/backend/data_source.py +18 -14
- vectordb_bench/backend/dataset.py +47 -27
- vectordb_bench/backend/result_collector.py +2 -3
- vectordb_bench/backend/runner/__init__.py +4 -6
- vectordb_bench/backend/runner/mp_runner.py +56 -23
- vectordb_bench/backend/runner/rate_runner.py +30 -19
- vectordb_bench/backend/runner/read_write_runner.py +46 -22
- vectordb_bench/backend/runner/serial_runner.py +81 -46
- vectordb_bench/backend/runner/util.py +4 -3
- vectordb_bench/backend/task_runner.py +92 -92
- vectordb_bench/backend/utils.py +17 -10
- vectordb_bench/base.py +0 -1
- vectordb_bench/cli/cli.py +65 -60
- vectordb_bench/cli/vectordbbench.py +6 -7
- vectordb_bench/frontend/components/check_results/charts.py +8 -19
- vectordb_bench/frontend/components/check_results/data.py +4 -16
- vectordb_bench/frontend/components/check_results/filters.py +8 -16
- vectordb_bench/frontend/components/check_results/nav.py +4 -4
- vectordb_bench/frontend/components/check_results/priceTable.py +1 -3
- vectordb_bench/frontend/components/check_results/stPageConfig.py +2 -1
- vectordb_bench/frontend/components/concurrent/charts.py +12 -12
- vectordb_bench/frontend/components/custom/displayCustomCase.py +17 -11
- vectordb_bench/frontend/components/custom/displaypPrams.py +4 -2
- vectordb_bench/frontend/components/custom/getCustomConfig.py +1 -2
- vectordb_bench/frontend/components/custom/initStyle.py +1 -1
- vectordb_bench/frontend/components/get_results/saveAsImage.py +2 -0
- vectordb_bench/frontend/components/run_test/caseSelector.py +3 -9
- vectordb_bench/frontend/components/run_test/dbConfigSetting.py +1 -4
- vectordb_bench/frontend/components/run_test/dbSelector.py +1 -1
- vectordb_bench/frontend/components/run_test/generateTasks.py +8 -8
- vectordb_bench/frontend/components/run_test/submitTask.py +14 -18
- vectordb_bench/frontend/components/tables/data.py +3 -6
- vectordb_bench/frontend/config/dbCaseConfigs.py +51 -84
- vectordb_bench/frontend/pages/concurrent.py +3 -5
- vectordb_bench/frontend/pages/custom.py +30 -9
- vectordb_bench/frontend/pages/quries_per_dollar.py +3 -3
- vectordb_bench/frontend/pages/run_test.py +3 -7
- vectordb_bench/frontend/utils.py +1 -1
- vectordb_bench/frontend/vdb_benchmark.py +4 -6
- vectordb_bench/interface.py +45 -24
- vectordb_bench/log_util.py +59 -64
- vectordb_bench/metric.py +10 -11
- vectordb_bench/models.py +26 -43
- {vectordb_bench-0.0.19.dist-info → vectordb_bench-0.0.21.dist-info}/METADATA +22 -15
- vectordb_bench-0.0.21.dist-info/RECORD +135 -0
- {vectordb_bench-0.0.19.dist-info → vectordb_bench-0.0.21.dist-info}/WHEEL +1 -1
- vectordb_bench-0.0.19.dist-info/RECORD +0 -135
- {vectordb_bench-0.0.19.dist-info → vectordb_bench-0.0.21.dist-info}/LICENSE +0 -0
- {vectordb_bench-0.0.19.dist-info → vectordb_bench-0.0.21.dist-info}/entry_points.txt +0 -0
- {vectordb_bench-0.0.19.dist-info → vectordb_bench-0.0.21.dist-info}/top_level.txt +0 -0
@@ -1,24 +1,20 @@
|
|
1
|
+
import concurrent
|
1
2
|
import logging
|
2
|
-
import psutil
|
3
3
|
import traceback
|
4
|
-
import concurrent
|
5
|
-
import numpy as np
|
6
4
|
from enum import Enum, auto
|
7
5
|
|
8
|
-
|
9
|
-
|
10
|
-
from ..base import BaseModel
|
11
|
-
from ..models import TaskConfig, PerformanceTimeoutError, TaskStage
|
6
|
+
import numpy as np
|
7
|
+
import psutil
|
12
8
|
|
13
|
-
from .
|
14
|
-
|
15
|
-
|
16
|
-
)
|
17
|
-
from ..metric import Metric
|
18
|
-
from .runner import MultiProcessingSearchRunner
|
19
|
-
from .runner import SerialSearchRunner, SerialInsertRunner
|
20
|
-
from .data_source import DatasetSource
|
9
|
+
from vectordb_bench.base import BaseModel
|
10
|
+
from vectordb_bench.metric import Metric
|
11
|
+
from vectordb_bench.models import PerformanceTimeoutError, TaskConfig, TaskStage
|
21
12
|
|
13
|
+
from . import utils
|
14
|
+
from .cases import Case, CaseLabel
|
15
|
+
from .clients import MetricType, api
|
16
|
+
from .data_source import DatasetSource
|
17
|
+
from .runner import MultiProcessingSearchRunner, SerialInsertRunner, SerialSearchRunner
|
22
18
|
|
23
19
|
log = logging.getLogger(__name__)
|
24
20
|
|
@@ -53,24 +49,39 @@ class CaseRunner(BaseModel):
|
|
53
49
|
search_runner: MultiProcessingSearchRunner | None = None
|
54
50
|
final_search_runner: MultiProcessingSearchRunner | None = None
|
55
51
|
|
56
|
-
def __eq__(self, obj):
|
52
|
+
def __eq__(self, obj: any):
|
57
53
|
if isinstance(obj, CaseRunner):
|
58
|
-
return
|
59
|
-
self.
|
60
|
-
self.config.
|
61
|
-
self.
|
54
|
+
return (
|
55
|
+
self.ca.label == CaseLabel.Performance
|
56
|
+
and self.config.db == obj.config.db
|
57
|
+
and self.config.db_case_config == obj.config.db_case_config
|
58
|
+
and self.ca.dataset == obj.ca.dataset
|
59
|
+
)
|
62
60
|
return False
|
63
61
|
|
64
62
|
def display(self) -> dict:
|
65
|
-
c_dict = self.ca.dict(
|
66
|
-
|
63
|
+
c_dict = self.ca.dict(
|
64
|
+
include={
|
65
|
+
"label": True,
|
66
|
+
"filters": True,
|
67
|
+
"dataset": {
|
68
|
+
"data": {
|
69
|
+
"name": True,
|
70
|
+
"size": True,
|
71
|
+
"dim": True,
|
72
|
+
"metric_type": True,
|
73
|
+
"label": True,
|
74
|
+
},
|
75
|
+
},
|
76
|
+
},
|
77
|
+
)
|
78
|
+
c_dict["db"] = self.config.db_name
|
67
79
|
return c_dict
|
68
80
|
|
69
81
|
@property
|
70
82
|
def normalize(self) -> bool:
|
71
83
|
assert self.db
|
72
|
-
return self.db.need_normalize_cosine() and
|
73
|
-
self.ca.dataset.data.metric_type == MetricType.COSINE
|
84
|
+
return self.db.need_normalize_cosine() and self.ca.dataset.data.metric_type == MetricType.COSINE
|
74
85
|
|
75
86
|
def init_db(self, drop_old: bool = True) -> None:
|
76
87
|
db_cls = self.config.db.init_cls
|
@@ -80,20 +91,14 @@ class CaseRunner(BaseModel):
|
|
80
91
|
db_config=self.config.db_config.to_dict(),
|
81
92
|
db_case_config=self.config.db_case_config,
|
82
93
|
drop_old=drop_old,
|
83
|
-
)
|
84
|
-
|
94
|
+
)
|
85
95
|
|
86
96
|
def _pre_run(self, drop_old: bool = True):
|
87
97
|
try:
|
88
98
|
self.init_db(drop_old)
|
89
99
|
self.ca.dataset.prepare(self.dataset_source, filters=self.ca.filter_rate)
|
90
100
|
except ModuleNotFoundError as e:
|
91
|
-
log.warning(
|
92
|
-
f"pre run case error: please install client for db: {self.config.db}, error={e}"
|
93
|
-
)
|
94
|
-
raise e from None
|
95
|
-
except Exception as e:
|
96
|
-
log.warning(f"pre run case error: {e}")
|
101
|
+
log.warning(f"pre run case error: please install client for db: {self.config.db}, error={e}")
|
97
102
|
raise e from None
|
98
103
|
|
99
104
|
def run(self, drop_old: bool = True) -> Metric:
|
@@ -103,12 +108,11 @@ class CaseRunner(BaseModel):
|
|
103
108
|
|
104
109
|
if self.ca.label == CaseLabel.Load:
|
105
110
|
return self._run_capacity_case()
|
106
|
-
|
111
|
+
if self.ca.label == CaseLabel.Performance:
|
107
112
|
return self._run_perf_case(drop_old)
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
raise ValueError(msg)
|
113
|
+
msg = f"unknown case type: {self.ca.label}"
|
114
|
+
log.warning(msg)
|
115
|
+
raise ValueError(msg)
|
112
116
|
|
113
117
|
def _run_capacity_case(self) -> Metric:
|
114
118
|
"""run capacity cases
|
@@ -120,16 +124,17 @@ class CaseRunner(BaseModel):
|
|
120
124
|
log.info("Start capacity case")
|
121
125
|
try:
|
122
126
|
runner = SerialInsertRunner(
|
123
|
-
self.db,
|
127
|
+
self.db,
|
128
|
+
self.ca.dataset,
|
129
|
+
self.normalize,
|
130
|
+
self.ca.load_timeout,
|
124
131
|
)
|
125
132
|
count = runner.run_endlessness()
|
126
133
|
except Exception as e:
|
127
134
|
log.warning(f"Failed to run capacity case, reason = {e}")
|
128
135
|
raise e from None
|
129
136
|
else:
|
130
|
-
log.info(
|
131
|
-
f"Capacity case loading dataset reaches VectorDB's limit: max capacity = {count}"
|
132
|
-
)
|
137
|
+
log.info(f"Capacity case loading dataset reaches VectorDB's limit: max capacity = {count}")
|
133
138
|
return Metric(max_load_count=count)
|
134
139
|
|
135
140
|
def _run_perf_case(self, drop_old: bool = True) -> Metric:
|
@@ -138,29 +143,12 @@ class CaseRunner(BaseModel):
|
|
138
143
|
Returns:
|
139
144
|
Metric: load_duration, recall, serial_latency_p99, and, qps
|
140
145
|
"""
|
141
|
-
'''
|
142
|
-
if drop_old:
|
143
|
-
_, load_dur = self._load_train_data()
|
144
|
-
build_dur = self._optimize()
|
145
|
-
m.load_duration = round(load_dur+build_dur, 4)
|
146
|
-
log.info(
|
147
|
-
f"Finish loading the entire dataset into VectorDB,"
|
148
|
-
f" insert_duration={load_dur}, optimize_duration={build_dur}"
|
149
|
-
f" load_duration(insert + optimize) = {m.load_duration}"
|
150
|
-
)
|
151
|
-
|
152
|
-
self._init_search_runner()
|
153
|
-
|
154
|
-
m.qps, m.conc_num_list, m.conc_qps_list, m.conc_latency_p99_list = self._conc_search()
|
155
|
-
m.recall, m.serial_latency_p99 = self._serial_search()
|
156
|
-
'''
|
157
146
|
|
158
147
|
log.info("Start performance case")
|
159
148
|
try:
|
160
149
|
m = Metric()
|
161
150
|
if drop_old:
|
162
151
|
if TaskStage.LOAD in self.config.stages:
|
163
|
-
# self._load_train_data()
|
164
152
|
_, load_dur = self._load_train_data()
|
165
153
|
build_dur = self._optimize()
|
166
154
|
m.load_duration = round(load_dur + build_dur, 4)
|
@@ -171,20 +159,23 @@ class CaseRunner(BaseModel):
|
|
171
159
|
)
|
172
160
|
else:
|
173
161
|
log.info("Data loading skipped")
|
174
|
-
if
|
175
|
-
TaskStage.SEARCH_SERIAL in self.config.stages
|
176
|
-
or TaskStage.SEARCH_CONCURRENT in self.config.stages
|
177
|
-
):
|
162
|
+
if TaskStage.SEARCH_SERIAL in self.config.stages or TaskStage.SEARCH_CONCURRENT in self.config.stages:
|
178
163
|
self._init_search_runner()
|
179
164
|
if TaskStage.SEARCH_CONCURRENT in self.config.stages:
|
180
165
|
search_results = self._conc_search()
|
181
|
-
|
166
|
+
(
|
167
|
+
m.qps,
|
168
|
+
m.conc_num_list,
|
169
|
+
m.conc_qps_list,
|
170
|
+
m.conc_latency_p99_list,
|
171
|
+
m.conc_latency_avg_list,
|
172
|
+
) = search_results
|
182
173
|
if TaskStage.SEARCH_SERIAL in self.config.stages:
|
183
174
|
search_results = self._serial_search()
|
184
|
-
|
175
|
+
"""
|
185
176
|
m.recall = search_results.recall
|
186
177
|
m.serial_latencies = search_results.serial_latencies
|
187
|
-
|
178
|
+
"""
|
188
179
|
m.recall, m.ndcg, m.serial_latency_p99 = search_results
|
189
180
|
|
190
181
|
except Exception as e:
|
@@ -199,7 +190,12 @@ class CaseRunner(BaseModel):
|
|
199
190
|
def _load_train_data(self):
|
200
191
|
"""Insert train data and get the insert_duration"""
|
201
192
|
try:
|
202
|
-
runner = SerialInsertRunner(
|
193
|
+
runner = SerialInsertRunner(
|
194
|
+
self.db,
|
195
|
+
self.ca.dataset,
|
196
|
+
self.normalize,
|
197
|
+
self.ca.load_timeout,
|
198
|
+
)
|
203
199
|
runner.run()
|
204
200
|
except Exception as e:
|
205
201
|
raise e from None
|
@@ -215,11 +211,12 @@ class CaseRunner(BaseModel):
|
|
215
211
|
"""
|
216
212
|
try:
|
217
213
|
results, _ = self.serial_search_runner.run()
|
218
|
-
return results
|
219
214
|
except Exception as e:
|
220
|
-
log.warning(f"search error: {
|
215
|
+
log.warning(f"search error: {e!s}, {e}")
|
221
216
|
self.stop()
|
222
|
-
raise e from
|
217
|
+
raise e from e
|
218
|
+
else:
|
219
|
+
return results
|
223
220
|
|
224
221
|
def _conc_search(self):
|
225
222
|
"""Performance concurrency tests, search the test data endlessness
|
@@ -231,26 +228,26 @@ class CaseRunner(BaseModel):
|
|
231
228
|
try:
|
232
229
|
return self.search_runner.run()
|
233
230
|
except Exception as e:
|
234
|
-
log.warning(f"search error: {
|
231
|
+
log.warning(f"search error: {e!s}, {e}")
|
235
232
|
raise e from None
|
236
233
|
finally:
|
237
234
|
self.stop()
|
238
235
|
|
239
236
|
@utils.time_it
|
240
|
-
def
|
237
|
+
def _optimize_task(self) -> None:
|
241
238
|
with self.db.init():
|
242
|
-
self.db.
|
239
|
+
self.db.optimize(data_size=self.ca.dataset.data.size)
|
243
240
|
|
244
241
|
def _optimize(self) -> float:
|
245
242
|
with concurrent.futures.ProcessPoolExecutor(max_workers=1) as executor:
|
246
|
-
future = executor.submit(self.
|
243
|
+
future = executor.submit(self._optimize_task)
|
247
244
|
try:
|
248
245
|
return future.result(timeout=self.ca.optimize_timeout)[1]
|
249
246
|
except TimeoutError as e:
|
250
247
|
log.warning(f"VectorDB optimize timeout in {self.ca.optimize_timeout}")
|
251
248
|
for pid, _ in executor._processes.items():
|
252
249
|
psutil.Process(pid).kill()
|
253
|
-
raise PerformanceTimeoutError
|
250
|
+
raise PerformanceTimeoutError from e
|
254
251
|
except Exception as e:
|
255
252
|
log.warning(f"VectorDB optimize error: {e}")
|
256
253
|
raise e from None
|
@@ -286,6 +283,16 @@ class CaseRunner(BaseModel):
|
|
286
283
|
self.search_runner.stop()
|
287
284
|
|
288
285
|
|
286
|
+
DATA_FORMAT = " %-14s | %-12s %-20s %7s | %-10s"
|
287
|
+
TITLE_FORMAT = (" %-14s | %-12s %-20s %7s | %-10s") % (
|
288
|
+
"DB",
|
289
|
+
"CaseType",
|
290
|
+
"Dataset",
|
291
|
+
"Filter",
|
292
|
+
"task_label",
|
293
|
+
)
|
294
|
+
|
295
|
+
|
289
296
|
class TaskRunner(BaseModel):
|
290
297
|
run_id: str
|
291
298
|
task_label: str
|
@@ -304,18 +311,8 @@ class TaskRunner(BaseModel):
|
|
304
311
|
return sum([1 for c in self.case_runners if c.status == status])
|
305
312
|
|
306
313
|
def display(self) -> None:
|
307
|
-
DATA_FORMAT = (" %-14s | %-12s %-20s %7s | %-10s")
|
308
|
-
TITLE_FORMAT = (" %-14s | %-12s %-20s %7s | %-10s") % (
|
309
|
-
"DB", "CaseType", "Dataset", "Filter", "task_label")
|
310
|
-
|
311
314
|
fmt = [TITLE_FORMAT]
|
312
|
-
fmt.append(DATA_FORMAT%(
|
313
|
-
"-"*11,
|
314
|
-
"-"*12,
|
315
|
-
"-"*20,
|
316
|
-
"-"*7,
|
317
|
-
"-"*7
|
318
|
-
))
|
315
|
+
fmt.append(DATA_FORMAT % ("-" * 11, "-" * 12, "-" * 20, "-" * 7, "-" * 7))
|
319
316
|
|
320
317
|
for f in self.case_runners:
|
321
318
|
if f.ca.filter_rate != 0.0:
|
@@ -326,13 +323,16 @@ class TaskRunner(BaseModel):
|
|
326
323
|
filters = "None"
|
327
324
|
|
328
325
|
ds_str = f"{f.ca.dataset.data.name}-{f.ca.dataset.data.label}-{utils.numerize(f.ca.dataset.data.size)}"
|
329
|
-
fmt.append(
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
326
|
+
fmt.append(
|
327
|
+
DATA_FORMAT
|
328
|
+
% (
|
329
|
+
f.config.db_name,
|
330
|
+
f.ca.label.name,
|
331
|
+
ds_str,
|
332
|
+
filters,
|
333
|
+
self.task_label,
|
334
|
+
),
|
335
|
+
)
|
336
336
|
|
337
337
|
tmp_logger = logging.getLogger("no_color")
|
338
338
|
for f in fmt:
|
vectordb_bench/backend/utils.py
CHANGED
@@ -2,7 +2,7 @@ import time
|
|
2
2
|
from functools import wraps
|
3
3
|
|
4
4
|
|
5
|
-
def numerize(n) -> str:
|
5
|
+
def numerize(n: int) -> str:
|
6
6
|
"""display positive number n for readability
|
7
7
|
|
8
8
|
Examples:
|
@@ -16,32 +16,34 @@ def numerize(n) -> str:
|
|
16
16
|
"K": 1e6,
|
17
17
|
"M": 1e9,
|
18
18
|
"B": 1e12,
|
19
|
-
"END": float(
|
19
|
+
"END": float("inf"),
|
20
20
|
}
|
21
21
|
|
22
22
|
display_n, sufix = n, ""
|
23
23
|
for s, base in sufix2upbound.items():
|
24
24
|
# number >= 1000B will alway have sufix 'B'
|
25
25
|
if s == "END":
|
26
|
-
display_n = int(n/1e9)
|
26
|
+
display_n = int(n / 1e9)
|
27
27
|
sufix = "B"
|
28
28
|
break
|
29
29
|
|
30
30
|
if n < base:
|
31
31
|
sufix = "" if s == "EMPTY" else s
|
32
|
-
display_n = int(n/(base/1e3))
|
32
|
+
display_n = int(n / (base / 1e3))
|
33
33
|
break
|
34
34
|
return f"{display_n}{sufix}"
|
35
35
|
|
36
36
|
|
37
|
-
def time_it(func):
|
38
|
-
"""
|
37
|
+
def time_it(func: any):
|
38
|
+
"""returns result and elapsed time"""
|
39
|
+
|
39
40
|
@wraps(func)
|
40
41
|
def inner(*args, **kwargs):
|
41
42
|
pref = time.perf_counter()
|
42
43
|
result = func(*args, **kwargs)
|
43
44
|
delta = time.perf_counter() - pref
|
44
45
|
return result, delta
|
46
|
+
|
45
47
|
return inner
|
46
48
|
|
47
49
|
|
@@ -62,14 +64,19 @@ def compose_train_files(train_count: int, use_shuffled: bool) -> list[str]:
|
|
62
64
|
return train_files
|
63
65
|
|
64
66
|
|
65
|
-
|
67
|
+
ONE_PERCENT = 0.01
|
68
|
+
NINETY_NINE_PERCENT = 0.99
|
69
|
+
|
70
|
+
|
71
|
+
def compose_gt_file(filters: float | str | None = None) -> str:
|
66
72
|
if filters is None:
|
67
73
|
return "neighbors.parquet"
|
68
74
|
|
69
|
-
if filters ==
|
75
|
+
if filters == ONE_PERCENT:
|
70
76
|
return "neighbors_head_1p.parquet"
|
71
77
|
|
72
|
-
if filters ==
|
78
|
+
if filters == NINETY_NINE_PERCENT:
|
73
79
|
return "neighbors_tail_1p.parquet"
|
74
80
|
|
75
|
-
|
81
|
+
msg = f"Filters not supported: {filters}"
|
82
|
+
raise ValueError(msg)
|
vectordb_bench/base.py
CHANGED